ozontech · Gwynbleidd0241 · Jan 28, 2026 · Jan 28, 2026 · Feb 2, 2026 · Feb 3, 2026
diff --git a/plugin/action/hash/normalize/README.md b/plugin/action/hash/normalize/README.md
@@ -26,7 +26,7 @@ We support a set of patterns out of the box.
 | 11 | uuid | `<uuid>` | 7c1811ed-e98f-4c9c-a9f9-58c757ff494f |
 | 12 | hash | `<hash>` | 48757ec9f04efe7faacec8722f3476339b125a6b6172b8a69ff3aa329e0bd0ff<br>a94a8fe5ccb19ba61c4c0873d391e987982fbbd3<br>098f6bcd4621d373cade4e832627b4f6 |
 | 13 | datetime | `<datetime>` | 2025-01-13T10:20:40.999999Z<br>2025-01-13T10:20:40+04:00<br>2025-01-13 10:20:40<br>2025-01-13<br>10:20:40 |
-| 14 | ip | `<ip>` | 1.2.3.4<br>01.102.103.104 |
+| 14 | ip | `<ip>` | **IPv4:** 1.2.3.4<br>**IPv6:** 2001:db8:3333:4444:5555:6666:1.2.3.4 |
 | 15 | duration | `<duration>` | -1m5s<br>1w2d3h4m5s6ms7us8ns |
 | 16 | hex | `<hex>` | 0x13eb85e69dfbc0758b12acdaae36287d<br>0X553026A59C |
 | 17 | float | `<float>` | 100.23<br>-4.56 |

diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go
@@ -3,6 +3,7 @@ package normalize
 import (
 	"errors"
 	"fmt"
+	"net"
 	"slices"
 	"strings"
 
@@ -208,7 +209,12 @@ func initTokens(lexer *lexmachine.Lexer,
 	addTokens := func(patterns []TokenPattern) {
 		for _, p := range patterns {
 			if p.mask == 0 || builtinPatterns&p.mask != 0 {
-				lexer.Add([]byte(p.RE), newToken(p.Placeholder))
+				switch p.mask {
+				case pIp:
+					lexer.Add([]byte(p.RE), newIpToken(p.Placeholder))
+				default:
+					lexer.Add([]byte(p.RE), newToken(p.Placeholder))
+				}
 			}
 		}
 	}
@@ -264,6 +270,84 @@ func newToken(placeholder string) lexmachine.Action {
 	}
 }
 
+func newIpToken(placeholder string) lexmachine.Action {
+	return func(s *lexmachine.Scanner, m *machines.Match) (any, error) {
+		// skip `\w<match>\w`
+		if m.TC > 0 && isWord(s.Text[m.TC-1]) ||
+			m.TC+len(m.Bytes) < len(s.Text) && isWord(s.Text[m.TC+len(m.Bytes)]) {
+			return nil, nil
+		}
+
+		// Fallback IP parser.
+		// Scans for IP-like patterns until end, then validates with net.ParseIP.
+		// Necessary because lexer's own pattern matching can be incomplete.
+		begin, end := m.TC, m.TC
+
+		for end < len(s.Text) {
+			if !isIPChar(s.Text[end]) {
+				break
+			}
+			end++
+		}
+
+		candidate := string(s.Text[begin:end])
+		trimmedCandidate := strings.TrimSuffix(candidate, ":")
+		// classic ip (IPv4+IPv6)
+		if ip := net.ParseIP(candidate); ip != nil {
+			return token{
+				placeholder: placeholder,
+				begin:       begin,
+				end:         end,
+			}, nil
+		}
+
+		if strings.Count(trimmedCandidate, ":") >= 2 {
+			// IPv6+:
+			if ip := net.ParseIP(trimmedCandidate); ip != nil {
+				return token{
+					placeholder: placeholder,
+					begin:       begin,
+					end:         end - 1,
+				}, nil
+			}
+		} else {
+			// IPv4+:
+			if ip := net.ParseIP(trimmedCandidate); ip != nil {
+				return token{
+					placeholder: placeholder,
+					begin:       begin,
+					end:         end - 1,
+				}, nil
+			}
+
+			// IPv4:port
+			host, _, err := net.SplitHostPort(candidate)
+			if err == nil {
+				if ip := net.ParseIP(host); ip != nil {
+					return token{
+						placeholder: placeholder,
+						begin:       begin,
+						end:         end,
+					}, nil
+				}
+			}
+
+			// IPv4:port+:
+			host, _, err = net.SplitHostPort(trimmedCandidate)
+			if err == nil {
+				if ip := net.ParseIP(host); ip != nil {
+					return token{
+						placeholder: placeholder,
+						begin:       begin,
+						end:         end - 1,
+					}, nil
+				}
+			}
+		}
+		return nil, nil
+	}
+}
+
 func (n *tokenNormalizer) normalizeByScanner(out []byte, scanner *lexmachine.Scanner) []byte {
 	prevEnd := 0
 	for tokRaw, err, eos := scanner.Next(); !eos; tokRaw, err, eos = scanner.Next() {
@@ -457,6 +541,13 @@ func isWord(c byte) bool {
 		c == '_'
 }
 
+func isIPChar(c byte) bool {
+	return (c >= '0' && c <= '9') ||
+		(c >= 'a' && c <= 'f') ||
+		(c >= 'A' && c <= 'F') ||
+		c == ':' || c == '.'
+}
+
 // [lexmachine] pkg doesn't support 'exactly' re syntax (a{3}, a{3,6}),
 // so we use [strings.Repeat] instead
 var builtinTokenPatterns = []TokenPattern{
@@ -523,9 +614,11 @@ var builtinTokenPatterns = []TokenPattern{
 		mask: pDatetime,
 	},
 	{
-		// IPv4 only
 		Placeholder: placeholderByPattern[pIp],
-		RE:          strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`),
+		RE: fmt.Sprintf(`%s|%s`,
+			strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`),
+			`[0-9a-fA-F:]*:[0-9a-fA-F:]*`,
+		),
 
 		mask: pIp,
 	},

diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go
@@ -280,27 +280,40 @@ func TestTokenNormalizerBuiltin(t *testing.T) {
 			name: "ip",
 			inputs: []string{
 				"some 1.2.3.4 here",
-				"some 01.102.103.104 here",
+				"some 101.102.103.104 here",
+				"some 10.234.121.44:34850 here",
 
 				// IPv6 Normal
-				//"some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here",
-				//"some :: here",
-				//"some 2001:db8:: here",
-				//"some ::1234:5678 here",
-				//"some 2001:0db8:0001:0000:0000:0ab9:C0A8:0102 here",
-				//"some 2001:db8::1234:5678 here",
+				"some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here",
+				"some :: here",
+				"some ::1 here",
+				"some 2001:db8:: here",
+				"some ::1234:5678 here",
+				"some 2001:0db8:0001:0000:0000:0ab9:C0A8:0102 here",
+				"some 2001:db8::1234:5678 here",
 
 				// IPv6 Dual
-				//"some 2001:db8:3333:4444:5555:6666:1.2.3.4 here",
-				//"some ::11.22.33.44 here",
-				//"some 2001:db8::123.123.123.123 here",
-				//"some ::1234:5678:91.123.4.56 here",
-				//"some ::1234:5678:1.2.3.4 here",
-				//"some 2001:db8::1234:5678:5.6.7.8 here",
+				"some 2001:db8:3333:4444:5555:6666:1.2.3.4 here",
+				"some ::11.22.33.44 here",
+				"some 2001:db8::123.123.123.123 here",
+				"some ::1234:5678:91.123.4.56 here",
+				"some ::1234:5678:1.2.3.4 here",
+				"some 2001:db8::1234:5678:5.6.7.8 here",
 			},
 			patterns: "ip",
 			want:     "some <ip> here",
 		},
+		{
+			name: "ip_with_colon",
+			inputs: []string{
+				"some 10.234.121.44:34850: here",
+				"some 10.234.121.44: here",
+				"some 2001:db8:3333:4444:5555:6666:1.2.3.4: here",
+				"some ::11.22.33.44: here",
+			},
+			patterns: "ip",
+			want:     "some <ip>: here",
+		},
 		{
 			name: "duration",
 			inputs: []string{