mirror of
https://github.com/TeaOSLab/EdgeNode.git
synced 2025-11-15 17:40:27 +08:00
提升单词匹配性能
This commit is contained in:
@@ -9,11 +9,31 @@ func ContainsAnyWord(s string, words []string, isCaseInsensitive bool) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
var lastRune rune // last searching rune in s
|
||||
var lastIndex = -2 // -2: not started, -1: not found, >=0: rune index
|
||||
for _, word := range words {
|
||||
if ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive) {
|
||||
var wordRunes = []rune(word)
|
||||
if len(wordRunes) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
if lastIndex > -2 && lastRune == wordRunes[0] {
|
||||
if lastIndex >= 0 {
|
||||
result, _ := ContainsWordRunes(allRunes[lastIndex:], wordRunes, isCaseInsensitive)
|
||||
if result {
|
||||
return true
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
result, firstIndex := ContainsWordRunes(allRunes, wordRunes, isCaseInsensitive)
|
||||
lastIndex = firstIndex
|
||||
if result {
|
||||
return true
|
||||
}
|
||||
}
|
||||
lastRune = wordRunes[0]
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -25,7 +45,7 @@ func ContainsAllWords(s string, words []string, isCaseInsensitive bool) bool {
|
||||
}
|
||||
|
||||
for _, word := range words {
|
||||
if !ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive) {
|
||||
if result, _ := ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive); !result {
|
||||
return false
|
||||
}
|
||||
}
|
||||
@@ -33,16 +53,22 @@ func ContainsAllWords(s string, words []string, isCaseInsensitive bool) bool {
|
||||
}
|
||||
|
||||
// ContainsWordRunes 检查字符列表是否包含某个单词子字符列表
|
||||
func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) bool {
|
||||
func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) (result bool, firstIndex int) {
|
||||
firstIndex = -1
|
||||
|
||||
var l = len(subRunes)
|
||||
if l == 0 {
|
||||
return false
|
||||
return false, 0
|
||||
}
|
||||
|
||||
var al = len(allRunes)
|
||||
|
||||
for index, r := range allRunes {
|
||||
if EqualRune(r, subRunes[0], isCaseInsensitive) && (index == 0 || !isChar(allRunes[index-1]) /**boundary check **/) {
|
||||
if firstIndex < 0 {
|
||||
firstIndex = index
|
||||
}
|
||||
|
||||
var found = true
|
||||
if l > 1 {
|
||||
for i := 1; i < l; i++ {
|
||||
@@ -56,12 +82,12 @@ func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool)
|
||||
|
||||
// check after charset
|
||||
if found && (al <= index+l || !isChar(allRunes[index+l]) /**boundary check **/) {
|
||||
return true
|
||||
return true, firstIndex
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
return false, firstIndex
|
||||
}
|
||||
|
||||
// ContainsSubRunes 检查字符列表是否包含某个子子字符列表
|
||||
|
||||
@@ -3,9 +3,13 @@
|
||||
package runes_test
|
||||
|
||||
import (
|
||||
"github.com/TeaOSLab/EdgeNode/internal/re"
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils/runes"
|
||||
"github.com/iwind/TeaGo/assert"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
@@ -25,6 +29,11 @@ func TestContainsAnyWord(t *testing.T) {
|
||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, true))
|
||||
}
|
||||
|
||||
func TestContainsAnyWord_Sort(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"abc", "ant", "arm", "Hit", "Hi", "Pet", "pie", "are"}, false))
|
||||
}
|
||||
|
||||
func TestContainsWordRunes(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
a.IsFalse(runes.ContainsWordRunes([]rune(""), []rune("How"), true))
|
||||
@@ -81,7 +90,45 @@ func BenchmarkContainsWordRunes(b *testing.B) {
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true)
|
||||
_, _ = runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsAnyWord(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
|
||||
var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n")
|
||||
sort.Strings(words)
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = runes.ContainsAnyWord("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0", words, true)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsAnyWord_Regexp(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
var reg = regexp.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|"))
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkContainsAnyWord_Re(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
var reg = re.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|"))
|
||||
b.ResetTimer()
|
||||
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
_ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user