From e9a77d6da0300f884fe338d1e0d95562f7e2e85e Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Wed, 28 Jan 2026 13:27:05 +0300 Subject: [PATCH 1/9] add IPv6 address normalization --- plugin/action/hash/README.md | 4 +- plugin/action/hash/normalize/README.md | 6 +- .../action/hash/normalize/token_normalizer.go | 56 +++++++++++++++++-- .../hash/normalize/token_normalizer_test.go | 24 ++++---- 4 files changed, 68 insertions(+), 22 deletions(-) diff --git a/plugin/action/hash/README.md b/plugin/action/hash/README.md index 9d38df034..2e2e32d36 100755 --- a/plugin/action/hash/README.md +++ b/plugin/action/hash/README.md @@ -100,7 +100,7 @@ The resulting event: ```json { "level": "error", - "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 10.125.172.251, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", + "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 2001:db8::1, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", "hash": 13863947727397728753 } ``` @@ -296,4 +296,4 @@ Normalizer params. It works for `fields` with `format: normalize`.
-
*Generated using [__insane-doc__](https://github.com/vitkovskii/insane-doc)* \ No newline at end of file +
*Generated using [__insane-doc__](https://github.com/vitkovskii/insane-doc)* diff --git a/plugin/action/hash/normalize/README.md b/plugin/action/hash/normalize/README.md index 7eaf796b6..15381cad0 100644 --- a/plugin/action/hash/normalize/README.md +++ b/plugin/action/hash/normalize/README.md @@ -26,14 +26,16 @@ We support a set of patterns out of the box. | 11 | sha1 | `` | a94a8fe5ccb19ba61c4c0873d391e987982fbbd3 | | 12 | md5 | `` | 098f6bcd4621d373cade4e832627b4f6 | | 13 | datetime | `` | 2025-01-13T10:20:40.999999Z
2025-01-13T10:20:40+04:00
2025-01-13 10:20:40
2025-01-13
10:20:40 | -| 14 | ip | `` | 1.2.3.4
01.102.103.104 | +| 14 | ip | `` | **IPv4:** 1.2.3.4
**IPv6:** 2001:db8:3333:4444:5555:6666:1.2.3.4 | | 15 | duration | `` | -1m5s
1w2d3h4m5s6ms7us8ns | | 16 | hex | `` | 0x13eb85e69dfbc0758b12acdaae36287d
0X553026A59C | | 17 | float | `` | 100.23
-4.56 | | 18 | int | `` | 100
-200 | | 19 | bool | `` | TRUE
false | +**Note:** The `ip` pattern now includes IPv6 support. All IP formats normalize to ``. + ### Limitations of the RE language We use the [lexmachine](https://github.com/timtadh/lexmachine) package to search for tokens according to the described patterns (lexical analysis). -This package doesn't support the full syntax of the RE language. For more information, see [readme](https://github.com/timtadh/lexmachine?tab=readme-ov-file#regular-expressions) section and [grammar](https://github.com/timtadh/lexmachine/blob/master/grammar) file. \ No newline at end of file +This package doesn't support the full syntax of the RE language. For more information, see [readme](https://github.com/timtadh/lexmachine?tab=readme-ov-file#regular-expressions) section and [grammar](https://github.com/timtadh/lexmachine/blob/master/grammar) file. diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index cbf4b4dfa..8f34fc392 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -3,6 +3,7 @@ package normalize import ( "errors" "fmt" + "net" "slices" "strings" @@ -32,7 +33,8 @@ const ( pSha1 pMd5 pDatetime - pIp + pIPv4 + pIPv6 pDuration pHex pFloat @@ -57,7 +59,7 @@ var patternById = map[string]int{ "sha1": pSha1, "md5": pMd5, "datetime": pDatetime, - "ip": pIp, + "ip": pIPv4 | pIPv6, "duration": pDuration, "hex": pHex, "float": pFloat, @@ -79,7 +81,8 @@ var placeholderByPattern = map[int]string{ pSha1: "", pMd5: "", pDatetime: "", - pIp: "", + pIPv4: "", + pIPv6: "", pDuration: "", pHex: "", pFloat: "", @@ -166,6 +169,10 @@ func NewTokenNormalizer(params TokenNormalizerParams) (Normalizer, error) { func (n *tokenNormalizer) Normalize(out, data []byte) []byte { out = out[:0] + if hasPattern(n.builtinPatterns, pIPv6) { + data = n.normalizeIP(data) + } + var scanner *lexmachine.Scanner if n.normalizeByBytes { out = n.normalizeByTokenizer(out, newTokenizer(n.builtinPatterns, data)) @@ -205,8 +212,12 @@ func parseBuiltinPatterns(s string) (int, error) { func initTokens(lexer *lexmachine.Lexer, builtinPatterns int, customPatterns []TokenPattern, ) error { + hasIpPattern := hasPattern(builtinPatterns, pIPv6) addTokens := func(patterns []TokenPattern) { for _, p := range patterns { + if hasIpPattern && p.mask == pIPv6 { + continue + } if p.mask == 0 || builtinPatterns&p.mask != 0 { lexer.Add([]byte(p.RE), newToken(p.Placeholder)) } @@ -457,6 +468,40 @@ func isWord(c byte) bool { c == '_' } +func (n *tokenNormalizer) normalizeIP(data []byte) []byte { + out := make([]byte, 0, len(data)) + pos := 0 + + for pos < len(data) { + if !isIPChar(data[pos]) { + out = append(out, data[pos]) + pos++ + continue + } + + start := pos + for pos < len(data) && isIPChar(data[pos]) { + pos++ + } + + potentialIP := string(data[start:pos]) + if net.ParseIP(potentialIP) != nil { + out = append(out, ""...) + } else { + out = append(out, data[start:pos]...) + } + } + + return out +} + +func isIPChar(c byte) bool { + return (c >= '0' && c <= '9') || + (c >= 'a' && c <= 'f') || + (c >= 'A' && c <= 'F') || + c == ':' || c == '.' +} + // [lexmachine] pkg doesn't support 'exactly' re syntax (a{3}, a{3,6}), // so we use [strings.Repeat] instead var builtinTokenPatterns = []TokenPattern{ @@ -518,11 +563,10 @@ var builtinTokenPatterns = []TokenPattern{ mask: pDatetime, }, { - // IPv4 only - Placeholder: placeholderByPattern[pIp], + Placeholder: placeholderByPattern[pIPv4], RE: strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`), - mask: pIp, + mask: pIPv4, }, { Placeholder: placeholderByPattern[pDuration], diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go index 78582193f..fc5ba5da9 100644 --- a/plugin/action/hash/normalize/token_normalizer_test.go +++ b/plugin/action/hash/normalize/token_normalizer_test.go @@ -282,20 +282,20 @@ func TestTokenNormalizerBuiltin(t *testing.T) { "some 01.102.103.104 here", // IPv6 Normal - //"some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here", - //"some :: here", - //"some 2001:db8:: here", - //"some ::1234:5678 here", - //"some 2001:0db8:0001:0000:0000:0ab9:C0A8:0102 here", - //"some 2001:db8::1234:5678 here", + "some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here", + "some :: here", + "some 2001:db8:: here", + "some ::1234:5678 here", + "some 2001:0db8:0001:0000:0000:0ab9:C0A8:0102 here", + "some 2001:db8::1234:5678 here", // IPv6 Dual - //"some 2001:db8:3333:4444:5555:6666:1.2.3.4 here", - //"some ::11.22.33.44 here", - //"some 2001:db8::123.123.123.123 here", - //"some ::1234:5678:91.123.4.56 here", - //"some ::1234:5678:1.2.3.4 here", - //"some 2001:db8::1234:5678:5.6.7.8 here", + "some 2001:db8:3333:4444:5555:6666:1.2.3.4 here", + "some ::11.22.33.44 here", + "some 2001:db8::123.123.123.123 here", + "some ::1234:5678:91.123.4.56 here", + "some ::1234:5678:1.2.3.4 here", + "some 2001:db8::1234:5678:5.6.7.8 here", }, patterns: "ip", want: "some here", From 1fc585bfc8846df24b429f9586f51f11684253db Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Wed, 28 Jan 2026 15:58:52 +0300 Subject: [PATCH 2/9] correctly updated docs --- plugin/action/hash/README.md | 2 +- plugin/action/hash/hash.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin/action/hash/README.md b/plugin/action/hash/README.md index 2e2e32d36..f663e21c5 100755 --- a/plugin/action/hash/README.md +++ b/plugin/action/hash/README.md @@ -296,4 +296,4 @@ Normalizer params. It works for `fields` with `format: normalize`.
-
*Generated using [__insane-doc__](https://github.com/vitkovskii/insane-doc)* +
*Generated using [__insane-doc__](https://github.com/vitkovskii/insane-doc)* \ No newline at end of file diff --git a/plugin/action/hash/hash.go b/plugin/action/hash/hash.go index 922a86ddd..884c18f88 100644 --- a/plugin/action/hash/hash.go +++ b/plugin/action/hash/hash.go @@ -116,7 +116,7 @@ The resulting event: ```json { "level": "error", - "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 10.125.172.251, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", + "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 2001:db8::1, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", "hash": 13863947727397728753 } ``` From 5f6a70c59ff09e433701cd483916c6671e76975c Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Tue, 3 Feb 2026 22:02:08 +0300 Subject: [PATCH 3/9] completed IPv6 normalization --- .../action/hash/normalize/token_normalizer.go | 81 +++++++++---------- .../hash/normalize/token_normalizer_test.go | 2 +- 2 files changed, 37 insertions(+), 46 deletions(-) diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index 8f34fc392..7b40a6b34 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -33,8 +33,7 @@ const ( pSha1 pMd5 pDatetime - pIPv4 - pIPv6 + pIp pDuration pHex pFloat @@ -59,7 +58,7 @@ var patternById = map[string]int{ "sha1": pSha1, "md5": pMd5, "datetime": pDatetime, - "ip": pIPv4 | pIPv6, + "ip": pIp, "duration": pDuration, "hex": pHex, "float": pFloat, @@ -81,8 +80,7 @@ var placeholderByPattern = map[int]string{ pSha1: "", pMd5: "", pDatetime: "", - pIPv4: "", - pIPv6: "", + pIp: "", pDuration: "", pHex: "", pFloat: "", @@ -169,10 +167,6 @@ func NewTokenNormalizer(params TokenNormalizerParams) (Normalizer, error) { func (n *tokenNormalizer) Normalize(out, data []byte) []byte { out = out[:0] - if hasPattern(n.builtinPatterns, pIPv6) { - data = n.normalizeIP(data) - } - var scanner *lexmachine.Scanner if n.normalizeByBytes { out = n.normalizeByTokenizer(out, newTokenizer(n.builtinPatterns, data)) @@ -212,14 +206,10 @@ func parseBuiltinPatterns(s string) (int, error) { func initTokens(lexer *lexmachine.Lexer, builtinPatterns int, customPatterns []TokenPattern, ) error { - hasIpPattern := hasPattern(builtinPatterns, pIPv6) addTokens := func(patterns []TokenPattern) { for _, p := range patterns { - if hasIpPattern && p.mask == pIPv6 { - continue - } if p.mask == 0 || builtinPatterns&p.mask != 0 { - lexer.Add([]byte(p.RE), newToken(p.Placeholder)) + lexer.Add([]byte(p.RE), newToken(p.Placeholder, p.mask)) } } } @@ -259,7 +249,7 @@ type token struct { end int } -func newToken(placeholder string) lexmachine.Action { +func newToken(placeholder string, patternID int) lexmachine.Action { return func(s *lexmachine.Scanner, m *machines.Match) (any, error) { // skip `\w\w` if m.TC > 0 && isWord(s.Text[m.TC-1]) || @@ -267,6 +257,31 @@ func newToken(placeholder string) lexmachine.Action { return nil, nil } + // Fallback IP parser. + // Scans for IP-like patterns until end, then validates with net.ParseIP. + // Necessary because lexer's own pattern matching can be incomplete. + if patternID == pIp { + begin, end := m.TC, m.TC + + for begin < len(s.Text) { + if !isIPChar(s.Text[end]) { + break + } + end++ + } + + candidate := string(s.Text[begin:end]) + if net.ParseIP(candidate) != nil { + return token{ + placeholder: placeholder, + begin: begin, + end: end, + }, nil + } else { + return nil, nil + } + } + return token{ placeholder: placeholder, begin: m.TC, @@ -468,33 +483,6 @@ func isWord(c byte) bool { c == '_' } -func (n *tokenNormalizer) normalizeIP(data []byte) []byte { - out := make([]byte, 0, len(data)) - pos := 0 - - for pos < len(data) { - if !isIPChar(data[pos]) { - out = append(out, data[pos]) - pos++ - continue - } - - start := pos - for pos < len(data) && isIPChar(data[pos]) { - pos++ - } - - potentialIP := string(data[start:pos]) - if net.ParseIP(potentialIP) != nil { - out = append(out, ""...) - } else { - out = append(out, data[start:pos]...) - } - } - - return out -} - func isIPChar(c byte) bool { return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || @@ -563,10 +551,13 @@ var builtinTokenPatterns = []TokenPattern{ mask: pDatetime, }, { - Placeholder: placeholderByPattern[pIPv4], - RE: strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`), + Placeholder: placeholderByPattern[pIp], + RE: fmt.Sprintf(`%s|%s`, + strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`), + `[0-9a-fA-F:]*:[0-9a-fF-F:]*`, + ), - mask: pIPv4, + mask: pIp, }, { Placeholder: placeholderByPattern[pDuration], diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go index fc5ba5da9..6ddd58f4a 100644 --- a/plugin/action/hash/normalize/token_normalizer_test.go +++ b/plugin/action/hash/normalize/token_normalizer_test.go @@ -279,7 +279,7 @@ func TestTokenNormalizerBuiltin(t *testing.T) { name: "ip", inputs: []string{ "some 1.2.3.4 here", - "some 01.102.103.104 here", + "some 101.102.103.104 here", // IPv6 Normal "some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here", From 3412b916837054ec935881bb312b4f0a12f8beae Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Fri, 6 Mar 2026 13:00:24 +0300 Subject: [PATCH 4/9] add func NewIpToken --- plugin/action/hash/normalize/README.md | 2 - .../action/hash/normalize/token_normalizer.go | 59 +++++++++++-------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/plugin/action/hash/normalize/README.md b/plugin/action/hash/normalize/README.md index 15381cad0..f8f62a8e1 100644 --- a/plugin/action/hash/normalize/README.md +++ b/plugin/action/hash/normalize/README.md @@ -33,8 +33,6 @@ We support a set of patterns out of the box. | 18 | int | `` | 100
-200 | | 19 | bool | `` | TRUE
false | -**Note:** The `ip` pattern now includes IPv6 support. All IP formats normalize to ``. - ### Limitations of the RE language We use the [lexmachine](https://github.com/timtadh/lexmachine) package to search for tokens according to the described patterns (lexical analysis). diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index 7b40a6b34..27c64bdd2 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -209,7 +209,12 @@ func initTokens(lexer *lexmachine.Lexer, addTokens := func(patterns []TokenPattern) { for _, p := range patterns { if p.mask == 0 || builtinPatterns&p.mask != 0 { - lexer.Add([]byte(p.RE), newToken(p.Placeholder, p.mask)) + switch p.mask { + case pIp: + lexer.Add([]byte(p.RE), newIpToken(p.Placeholder)) + default: + lexer.Add([]byte(p.RE), newToken(p.Placeholder)) + } } } } @@ -249,7 +254,7 @@ type token struct { end int } -func newToken(placeholder string, patternID int) lexmachine.Action { +func newToken(placeholder string) lexmachine.Action { return func(s *lexmachine.Scanner, m *machines.Match) (any, error) { // skip `\w\w` if m.TC > 0 && isWord(s.Text[m.TC-1]) || @@ -257,36 +262,38 @@ func newToken(placeholder string, patternID int) lexmachine.Action { return nil, nil } + return token{ + placeholder: placeholder, + begin: m.TC, + end: m.TC + len(m.Bytes), + }, nil + } +} + +func newIpToken(placeholder string) lexmachine.Action { + return func(s *lexmachine.Scanner, m *machines.Match) (any, error) { // Fallback IP parser. // Scans for IP-like patterns until end, then validates with net.ParseIP. // Necessary because lexer's own pattern matching can be incomplete. - if patternID == pIp { - begin, end := m.TC, m.TC + begin, end := m.TC, m.TC - for begin < len(s.Text) { - if !isIPChar(s.Text[end]) { - break - } - end++ - } - - candidate := string(s.Text[begin:end]) - if net.ParseIP(candidate) != nil { - return token{ - placeholder: placeholder, - begin: begin, - end: end, - }, nil - } else { - return nil, nil + for begin < len(s.Text) { + if !isIPChar(s.Text[end]) { + break } + end++ } - return token{ - placeholder: placeholder, - begin: m.TC, - end: m.TC + len(m.Bytes), - }, nil + candidate := string(s.Text[begin:end]) + if net.ParseIP(candidate) != nil { + return token{ + placeholder: placeholder, + begin: begin, + end: end, + }, nil + } else { + return nil, nil + } } } @@ -554,7 +561,7 @@ var builtinTokenPatterns = []TokenPattern{ Placeholder: placeholderByPattern[pIp], RE: fmt.Sprintf(`%s|%s`, strings.TrimSuffix(strings.Repeat(`(25[0-5]|(2[0-4]|1?[0-9])?[0-9])\.`, 4), `\.`), - `[0-9a-fA-F:]*:[0-9a-fF-F:]*`, + `[0-9a-fA-F:]*:[0-9a-fA-F:]*`, ), mask: pIp, From 8e5f69b729e1fb9712b21e3b703af3325dc63011 Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Tue, 10 Mar 2026 16:48:23 +0300 Subject: [PATCH 5/9] correct docs --- .../action/hash/normalize/token_normalizer.go | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index 27c64bdd2..213f50209 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -272,6 +272,12 @@ func newToken(placeholder string) lexmachine.Action { func newIpToken(placeholder string) lexmachine.Action { return func(s *lexmachine.Scanner, m *machines.Match) (any, error) { + // skip `\w\w` + if m.TC > 0 && isWord(s.Text[m.TC-1]) || + m.TC+len(m.Bytes) < len(s.Text) && isWord(s.Text[m.TC+len(m.Bytes)]) { + return nil, nil + } + // Fallback IP parser. // Scans for IP-like patterns until end, then validates with net.ParseIP. // Necessary because lexer's own pattern matching can be incomplete. @@ -285,15 +291,15 @@ func newIpToken(placeholder string) lexmachine.Action { } candidate := string(s.Text[begin:end]) - if net.ParseIP(candidate) != nil { - return token{ - placeholder: placeholder, - begin: begin, - end: end, - }, nil - } else { + if net.ParseIP(candidate) == nil { return nil, nil } + + return token{ + placeholder: placeholder, + begin: begin, + end: end, + }, nil } } From e1bdca61e968ea58df571897dfb30a4feaa120ae Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Tue, 10 Mar 2026 16:54:26 +0300 Subject: [PATCH 6/9] correct docs --- plugin/action/hash/README.md | 2 +- plugin/action/hash/hash.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugin/action/hash/README.md b/plugin/action/hash/README.md index f663e21c5..9d38df034 100755 --- a/plugin/action/hash/README.md +++ b/plugin/action/hash/README.md @@ -100,7 +100,7 @@ The resulting event: ```json { "level": "error", - "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 2001:db8::1, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", + "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 10.125.172.251, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", "hash": 13863947727397728753 } ``` diff --git a/plugin/action/hash/hash.go b/plugin/action/hash/hash.go index 884c18f88..922a86ddd 100644 --- a/plugin/action/hash/hash.go +++ b/plugin/action/hash/hash.go @@ -116,7 +116,7 @@ The resulting event: ```json { "level": "error", - "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 2001:db8::1, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", + "message": "2023-10-30T13:35:33.638720813Z error occurred, client: 10.125.172.251, upstream: \"http://10.117.246.15:84/download\", host: \"mpm-youtube-downloader-38.name.com:84\"", "hash": 13863947727397728753 } ``` From 0a3378505df7011217321d56a717201f6bf390a4 Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Mon, 16 Mar 2026 15:50:28 +0300 Subject: [PATCH 7/9] fix --- plugin/action/hash/normalize/token_normalizer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index 213f50209..75d57ebcd 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -283,7 +283,7 @@ func newIpToken(placeholder string) lexmachine.Action { // Necessary because lexer's own pattern matching can be incomplete. begin, end := m.TC, m.TC - for begin < len(s.Text) { + for end < len(s.Text) { if !isIPChar(s.Text[end]) { break } From 1fc17d47444b63600e36059c7b841cbeb943a504 Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Tue, 24 Mar 2026 09:28:32 +0300 Subject: [PATCH 8/9] fix ipv6 --- .../action/hash/normalize/token_normalizer.go | 35 +++++++++++++++---- .../hash/normalize/token_normalizer_test.go | 1 + 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index 75d57ebcd..beccc3a18 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -291,15 +291,36 @@ func newIpToken(placeholder string) lexmachine.Action { } candidate := string(s.Text[begin:end]) - if net.ParseIP(candidate) == nil { - return nil, nil + if ip := net.ParseIP(candidate); ip != nil { + return token{ + placeholder: placeholder, + begin: begin, + end: end, + }, nil } - return token{ - placeholder: placeholder, - begin: begin, - end: end, - }, nil + host, _, err := net.SplitHostPort(candidate) + if err == nil { + if ip := net.ParseIP(host); ip != nil { + return token{ + placeholder: placeholder, + begin: begin, + end: end, + }, nil + } + } + + host, _, err = net.SplitHostPort(strings.TrimSuffix(candidate, ":")) + if err == nil { + if ip := net.ParseIP(host); ip != nil { + return token{ + placeholder: placeholder, + begin: begin, + end: end - 1, + }, nil + } + } + return nil, nil } } diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go index 6ddd58f4a..88ea74f1a 100644 --- a/plugin/action/hash/normalize/token_normalizer_test.go +++ b/plugin/action/hash/normalize/token_normalizer_test.go @@ -280,6 +280,7 @@ func TestTokenNormalizerBuiltin(t *testing.T) { inputs: []string{ "some 1.2.3.4 here", "some 101.102.103.104 here", + "some 10.234.121.44:34850 here", // IPv6 Normal "some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here", From b85d48a0252b76c29dd6151e78f08ccffe2bb4d3 Mon Sep 17 00:00:00 2001 From: Sergey Lazarenko Date: Mon, 6 Apr 2026 22:05:29 +0300 Subject: [PATCH 9/9] new fersion ip --- .../action/hash/normalize/token_normalizer.go | 42 +++++++++++++++---- .../hash/normalize/token_normalizer_test.go | 12 ++++++ 2 files changed, 45 insertions(+), 9 deletions(-) diff --git a/plugin/action/hash/normalize/token_normalizer.go b/plugin/action/hash/normalize/token_normalizer.go index beccc3a18..235b16876 100644 --- a/plugin/action/hash/normalize/token_normalizer.go +++ b/plugin/action/hash/normalize/token_normalizer.go @@ -291,6 +291,8 @@ func newIpToken(placeholder string) lexmachine.Action { } candidate := string(s.Text[begin:end]) + trimmedCandidate := strings.TrimSuffix(candidate, ":") + // classic ip (IPv4+IPv6) if ip := net.ParseIP(candidate); ip != nil { return token{ placeholder: placeholder, @@ -299,26 +301,48 @@ func newIpToken(placeholder string) lexmachine.Action { }, nil } - host, _, err := net.SplitHostPort(candidate) - if err == nil { - if ip := net.ParseIP(host); ip != nil { + if strings.Count(trimmedCandidate, ":") >= 2 { + // IPv6+: + if ip := net.ParseIP(trimmedCandidate); ip != nil { return token{ placeholder: placeholder, begin: begin, - end: end, + end: end - 1, }, nil } - } - - host, _, err = net.SplitHostPort(strings.TrimSuffix(candidate, ":")) - if err == nil { - if ip := net.ParseIP(host); ip != nil { + } else { + // IPv4+: + if ip := net.ParseIP(trimmedCandidate); ip != nil { return token{ placeholder: placeholder, begin: begin, end: end - 1, }, nil } + + // IPv4:port + host, _, err := net.SplitHostPort(candidate) + if err == nil { + if ip := net.ParseIP(host); ip != nil { + return token{ + placeholder: placeholder, + begin: begin, + end: end, + }, nil + } + } + + // IPv4:port+: + host, _, err = net.SplitHostPort(trimmedCandidate) + if err == nil { + if ip := net.ParseIP(host); ip != nil { + return token{ + placeholder: placeholder, + begin: begin, + end: end - 1, + }, nil + } + } } return nil, nil } diff --git a/plugin/action/hash/normalize/token_normalizer_test.go b/plugin/action/hash/normalize/token_normalizer_test.go index 88ea74f1a..3be54796e 100644 --- a/plugin/action/hash/normalize/token_normalizer_test.go +++ b/plugin/action/hash/normalize/token_normalizer_test.go @@ -285,6 +285,7 @@ func TestTokenNormalizerBuiltin(t *testing.T) { // IPv6 Normal "some 2001:db8:3333:4444:5555:DDDD:EEEE:FFFF here", "some :: here", + "some ::1 here", "some 2001:db8:: here", "some ::1234:5678 here", "some 2001:0db8:0001:0000:0000:0ab9:C0A8:0102 here", @@ -301,6 +302,17 @@ func TestTokenNormalizerBuiltin(t *testing.T) { patterns: "ip", want: "some here", }, + { + name: "ip_with_colon", + inputs: []string{ + "some 10.234.121.44:34850: here", + "some 10.234.121.44: here", + "some 2001:db8:3333:4444:5555:6666:1.2.3.4: here", + "some ::11.22.33.44: here", + }, + patterns: "ip", + want: "some : here", + }, { name: "duration", inputs: []string{