mirror of
https://github.com/TeaOSLab/EdgeNode.git
synced 2025-11-15 17:40:27 +08:00
提升单词匹配性能
This commit is contained in:
@@ -9,10 +9,30 @@ func ContainsAnyWord(s string, words []string, isCaseInsensitive bool) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var lastRune rune // last searching rune in s
|
||||||
|
var lastIndex = -2 // -2: not started, -1: not found, >=0: rune index
|
||||||
for _, word := range words {
|
for _, word := range words {
|
||||||
if ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive) {
|
var wordRunes = []rune(word)
|
||||||
return true
|
if len(wordRunes) == 0 {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if lastIndex > -2 && lastRune == wordRunes[0] {
|
||||||
|
if lastIndex >= 0 {
|
||||||
|
result, _ := ContainsWordRunes(allRunes[lastIndex:], wordRunes, isCaseInsensitive)
|
||||||
|
if result {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
} else {
|
||||||
|
result, firstIndex := ContainsWordRunes(allRunes, wordRunes, isCaseInsensitive)
|
||||||
|
lastIndex = firstIndex
|
||||||
|
if result {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastRune = wordRunes[0]
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@@ -25,7 +45,7 @@ func ContainsAllWords(s string, words []string, isCaseInsensitive bool) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, word := range words {
|
for _, word := range words {
|
||||||
if !ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive) {
|
if result, _ := ContainsWordRunes(allRunes, []rune(word), isCaseInsensitive); !result {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -33,16 +53,22 @@ func ContainsAllWords(s string, words []string, isCaseInsensitive bool) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ContainsWordRunes 检查字符列表是否包含某个单词子字符列表
|
// ContainsWordRunes 检查字符列表是否包含某个单词子字符列表
|
||||||
func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) bool {
|
func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool) (result bool, firstIndex int) {
|
||||||
|
firstIndex = -1
|
||||||
|
|
||||||
var l = len(subRunes)
|
var l = len(subRunes)
|
||||||
if l == 0 {
|
if l == 0 {
|
||||||
return false
|
return false, 0
|
||||||
}
|
}
|
||||||
|
|
||||||
var al = len(allRunes)
|
var al = len(allRunes)
|
||||||
|
|
||||||
for index, r := range allRunes {
|
for index, r := range allRunes {
|
||||||
if EqualRune(r, subRunes[0], isCaseInsensitive) && (index == 0 || !isChar(allRunes[index-1]) /**boundary check **/) {
|
if EqualRune(r, subRunes[0], isCaseInsensitive) && (index == 0 || !isChar(allRunes[index-1]) /**boundary check **/) {
|
||||||
|
if firstIndex < 0 {
|
||||||
|
firstIndex = index
|
||||||
|
}
|
||||||
|
|
||||||
var found = true
|
var found = true
|
||||||
if l > 1 {
|
if l > 1 {
|
||||||
for i := 1; i < l; i++ {
|
for i := 1; i < l; i++ {
|
||||||
@@ -56,12 +82,12 @@ func ContainsWordRunes(allRunes []rune, subRunes []rune, isCaseInsensitive bool)
|
|||||||
|
|
||||||
// check after charset
|
// check after charset
|
||||||
if found && (al <= index+l || !isChar(allRunes[index+l]) /**boundary check **/) {
|
if found && (al <= index+l || !isChar(allRunes[index+l]) /**boundary check **/) {
|
||||||
return true
|
return true, firstIndex
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false, firstIndex
|
||||||
}
|
}
|
||||||
|
|
||||||
// ContainsSubRunes 检查字符列表是否包含某个子子字符列表
|
// ContainsSubRunes 检查字符列表是否包含某个子子字符列表
|
||||||
|
|||||||
@@ -3,9 +3,13 @@
|
|||||||
package runes_test
|
package runes_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/TeaOSLab/EdgeNode/internal/re"
|
||||||
"github.com/TeaOSLab/EdgeNode/internal/utils/runes"
|
"github.com/TeaOSLab/EdgeNode/internal/utils/runes"
|
||||||
"github.com/iwind/TeaGo/assert"
|
"github.com/iwind/TeaGo/assert"
|
||||||
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -25,6 +29,11 @@ func TestContainsAnyWord(t *testing.T) {
|
|||||||
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, true))
|
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, true))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestContainsAnyWord_Sort(t *testing.T) {
|
||||||
|
var a = assert.NewAssertion(t)
|
||||||
|
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"abc", "ant", "arm", "Hit", "Hi", "Pet", "pie", "are"}, false))
|
||||||
|
}
|
||||||
|
|
||||||
func TestContainsWordRunes(t *testing.T) {
|
func TestContainsWordRunes(t *testing.T) {
|
||||||
var a = assert.NewAssertion(t)
|
var a = assert.NewAssertion(t)
|
||||||
a.IsFalse(runes.ContainsWordRunes([]rune(""), []rune("How"), true))
|
a.IsFalse(runes.ContainsWordRunes([]rune(""), []rune("How"), true))
|
||||||
@@ -81,7 +90,45 @@ func BenchmarkContainsWordRunes(b *testing.B) {
|
|||||||
|
|
||||||
b.RunParallel(func(pb *testing.PB) {
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
for pb.Next() {
|
for pb.Next() {
|
||||||
_ = runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true)
|
_, _ = runes.ContainsWordRunes([]rune("How are you"), []rune("YOU"), true)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkContainsAnyWord(b *testing.B) {
|
||||||
|
runtime.GOMAXPROCS(4)
|
||||||
|
|
||||||
|
var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n")
|
||||||
|
sort.Strings(words)
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
for pb.Next() {
|
||||||
|
_ = runes.ContainsAnyWord("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0", words, true)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkContainsAnyWord_Regexp(b *testing.B) {
|
||||||
|
runtime.GOMAXPROCS(4)
|
||||||
|
var reg = regexp.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|"))
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
for pb.Next() {
|
||||||
|
_ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkContainsAnyWord_Re(b *testing.B) {
|
||||||
|
runtime.GOMAXPROCS(4)
|
||||||
|
var reg = re.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|"))
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
b.RunParallel(func(pb *testing.PB) {
|
||||||
|
for pb.Next() {
|
||||||
|
_ = reg.MatchString("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0")
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user