diff --git a/internal/utils/runes/runes.go b/internal/utils/runes/runes.go index d82824b..31df974 100644 --- a/internal/utils/runes/runes.go +++ b/internal/utils/runes/runes.go @@ -9,10 +9,30 @@ func ContainsAnyWord(s string, words []string, isCaseInsensitive bool) bool { return false } + var lastRune rune // last searching rune in s + var lastIndex = -2 // -2: not started, -1: not found, >=0: rune index for _, word := range words { - if ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive) { - return true + var wordRunes = []rune(word) + if len(wordRunes) == 0 { + continue } + + if lastIndex > -2 && lastRune == wordRunes[0] { + if lastIndex >= 0 { + result, _ := ContainsWordRunes(allRunes[lastIndex:], wordRunes, isCaseInsensitive) + if result { + return true + } + } + continue + } else { + result, firstIndex := ContainsWordRunes(allRunes, wordRunes, isCaseInsensitive) + lastIndex = firstIndex + if result { + return true + } + } + lastRune = wordRunes[0] } return false } @@ -25,7 +45,7 @@ func ContainsAllWords(s string, words []string, isCaseInsensitive bool) bool { } for _, word := range words { - if !ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive) { + if result, _ := ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive); !result { return false } } @@ -33,16 +53,22 @@ func ContainsAllWords(s string, words []string, isCaseInsensitive bool) bool { } // ContainsWordRunes 检查字符列表是否包含某个单词子字符列表 -func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) bool { +func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) (result bool, firstIndex int) { + firstIndex = -1 + var l = len(subRunes) if l == 0 { - return false + return false, 0 } var al = len(allRunes) for index, r := range allRunes { if EqualRune(r, subRunes[0], isCaseInsensitive) && (index == 0 || !isChar(allRunes[index-1]) /**boundary check **/) { + if firstIndex < 0 { + firstIndex = index + } + var found = true if l > 1 { for i := 1; i < l; i++ { @@ -56,12 +82,12 @@ func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) // check after charset if found && (al <= index+l || !isChar(allRunes[index+l]) /**boundary check **/) { - return true + return true, firstIndex } } } - return false + return false, firstIndex } // ContainsSubRunes 检查字符列表是否包含某个子子字符列表 diff --git a/internal/utils/runes/runes_test.go b/internal/utils/runes/runes_test.go index da2fd4e..f978e57 100644 --- a/internal/utils/runes/runes_test.go +++ b/internal/utils/runes/runes_test.go @@ -3,9 +3,13 @@ package runes_test import ( + "github.com/TeaOSLab/EdgeNode/internal/re" "github.com/TeaOSLab/EdgeNode/internal/utils/runes" "github.com/iwind/TeaGo/assert" + "regexp" "runtime" + "sort" + "strings" "testing" ) @@ -25,6 +29,11 @@ func TestContainsAnyWord(t *testing.T) { a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, true)) } +func TestContainsAnyWord_Sort(t *testing.T) { + var a = assert.NewAssertion(t) + a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"abc", "ant", "arm", "Hit", "Hi", "Pet", "pie", "are"}, false)) +} + func TestContainsWordRunes(t *testing.T) { var a = assert.NewAssertion(t) a.IsFalse(runes.ContainsWordRunes([]rune(""), []rune("How"), true)) @@ -81,7 +90,45 @@ func BenchmarkContainsWordRunes(b *testing.B) { b.RunParallel(func(pb *testing.PB) { for pb.Next() { - _ = runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true) + _, _ = runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true) + } + }) +} + +func BenchmarkContainsAnyWord(b *testing.B) { + runtime.GOMAXPROCS(4) + + var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n") + sort.Strings(words) + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _ = runes.ContainsAnyWord("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0", words, true) + } + }) +} + +func BenchmarkContainsAnyWord_Regexp(b *testing.B) { + runtime.GOMAXPROCS(4) + var reg = regexp.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|")) + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0") + } + }) +} + +func BenchmarkContainsAnyWord_Re(b *testing.B) { + runtime.GOMAXPROCS(4) + var reg = re.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|")) + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + _ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0") } }) }