diff --git a/internal/utils/runes/runes.go b/internal/utils/runes/runes.go index 31df974..b13fd13 100644 --- a/internal/utils/runes/runes.go +++ b/internal/utils/runes/runes.go @@ -2,6 +2,40 @@ package runes +// ContainsAnyWordRunes 直接使用rune检查字符串是否包含任一单词 +func ContainsAnyWordRunes(s string, words [][]rune, isCaseInsensitive bool) bool { + var allRunes = []rune(s) + if len(allRunes) == 0 || len(words) == 0 { + return false + } + + var lastRune rune // last searching rune in s + var lastIndex = -2 // -2: not started, -1: not found, >=0: rune index + for _, wordRunes := range words { + if len(wordRunes) == 0 { + continue + } + + if lastIndex > -2 && lastRune == wordRunes[0] { + if lastIndex >= 0 { + result, _ := ContainsWordRunes(allRunes[lastIndex:], wordRunes, isCaseInsensitive) + if result { + return true + } + } + continue + } else { + result, firstIndex := ContainsWordRunes(allRunes, wordRunes, isCaseInsensitive) + lastIndex = firstIndex + if result { + return true + } + } + lastRune = wordRunes[0] + } + return false +} + // ContainsAnyWord 检查字符串是否包含任一单词 func ContainsAnyWord(s string, words []string, isCaseInsensitive bool) bool { var allRunes = []rune(s) diff --git a/internal/utils/runes/runes_test.go b/internal/utils/runes/runes_test.go index f978e57..b5ec1ac 100644 --- a/internal/utils/runes/runes_test.go +++ b/internal/utils/runes/runes_test.go @@ -27,6 +27,7 @@ func TestContainsAnyWord(t *testing.T) { a.IsFalse(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, false)) a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how"}, true)) a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, true)) + a.IsTrue(runes.ContainsAnyWord("How-are you?", []string{"how", "ok"}, true)) } func TestContainsAnyWord_Sort(t *testing.T) { @@ -100,6 +101,12 @@ func BenchmarkContainsAnyWord(b *testing.B) { var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n") sort.Strings(words) + + var wordRunes = [][]rune{} + for _, word := range words { + wordRunes = append(wordRunes, []rune(word)) + } + b.ResetTimer() b.RunParallel(func(pb *testing.PB) { @@ -109,6 +116,27 @@ func BenchmarkContainsAnyWord(b *testing.B) { }) } +func BenchmarkContainsAnyWordRunes(b *testing.B) { + runtime.GOMAXPROCS(4) + + var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n") + sort.Strings(words) + + var wordRunes = [][]rune{} + for _, word := range words { + wordRunes = append(wordRunes, []rune(word)) + } + + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _ = runes.ContainsAnyWordRunes("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0", wordRunes, true) + } + }) +} + + func BenchmarkContainsAnyWord_Regexp(b *testing.B) { runtime.GOMAXPROCS(4) var reg = regexp.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|")) diff --git a/internal/waf/rule.go b/internal/waf/rule.go index e33df9e..0fcef92 100644 --- a/internal/waf/rule.go +++ b/internal/waf/rule.go @@ -53,6 +53,7 @@ type Rule struct { ipRangeListValue *values.IPRangeList stringValues []string + stringValueRunes [][]rune ipList *values.StringList floatValue float64 @@ -97,6 +98,11 @@ func (this *Rule) Init() error { if this.Operator == RuleOperatorContainsAnyWord || this.Operator == RuleOperatorContainsAllWords || this.Operator == RuleOperatorNotContainsAnyWord { sort.Strings(this.stringValues) } + + this.stringValueRunes = [][]rune{} + for _, line := range this.stringValues { + this.stringValueRunes = append(this.stringValueRunes, []rune(line)) + } } case RuleOperatorMatch: var v = this.Value @@ -567,11 +573,11 @@ func (this *Rule) Test(value any) bool { } return false case RuleOperatorContainsAnyWord: - return runes.ContainsAnyWord(this.stringifyValue(value), this.stringValues, this.IsCaseInsensitive) + return runes.ContainsAnyWordRunes(this.stringifyValue(value), this.stringValueRunes, this.IsCaseInsensitive) case RuleOperatorContainsAllWords: return runes.ContainsAllWords(this.stringifyValue(value), this.stringValues, this.IsCaseInsensitive) case RuleOperatorNotContainsAnyWord: - return !runes.ContainsAnyWord(this.stringifyValue(value), this.stringValues, this.IsCaseInsensitive) + return !runes.ContainsAnyWordRunes(this.stringifyValue(value), this.stringValueRunes, this.IsCaseInsensitive) case RuleOperatorContainsSQLInjection: if value == nil { return false