优化WAF单词匹配性能

This commit is contained in:
GoEdgeLab
2023-12-09 19:19:29 +08:00
parent df7174c892
commit 3e2c5e876d
3 changed files with 70 additions and 2 deletions

View File

@@ -2,6 +2,40 @@
package runes
// ContainsAnyWordRunes 直接使用rune检查字符串是否包含任一单词
func ContainsAnyWordRunes(s string, words [][]rune, isCaseInsensitive bool) bool {
var allRunes = []rune(s)
if len(allRunes) == 0 || len(words) == 0 {
return false
}
var lastRune rune // last searching rune in s
var lastIndex = -2 // -2: not started, -1: not found, >=0: rune index
for _, wordRunes := range words {
if len(wordRunes) == 0 {
continue
}
if lastIndex > -2 && lastRune == wordRunes[0] {
if lastIndex >= 0 {
result, _ := ContainsWordRunes(allRunes[lastIndex:], wordRunes, isCaseInsensitive)
if result {
return true
}
}
continue
} else {
result, firstIndex := ContainsWordRunes(allRunes, wordRunes, isCaseInsensitive)
lastIndex = firstIndex
if result {
return true
}
}
lastRune = wordRunes[0]
}
return false
}
// ContainsAnyWord 检查字符串是否包含任一单词
func ContainsAnyWord(s string, words []string, isCaseInsensitive bool) bool {
var allRunes = []rune(s)

View File

@@ -27,6 +27,7 @@ func TestContainsAnyWord(t *testing.T) {
a.IsFalse(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, false))
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how"}, true))
a.IsTrue(runes.ContainsAnyWord("How are you?", []string{"how", "ok"}, true))
a.IsTrue(runes.ContainsAnyWord("How-are you?", []string{"how", "ok"}, true))
}
func TestContainsAnyWord_Sort(t *testing.T) {
@@ -100,6 +101,12 @@ func BenchmarkContainsAnyWord(b *testing.B) {
var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n")
sort.Strings(words)
var wordRunes = [][]rune{}
for _, word := range words {
wordRunes = append(wordRunes, []rune(word))
}
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
@@ -109,6 +116,27 @@ func BenchmarkContainsAnyWord(b *testing.B) {
})
}
func BenchmarkContainsAnyWordRunes(b *testing.B) {
runtime.GOMAXPROCS(4)
var words = strings.Split("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n")
sort.Strings(words)
var wordRunes = [][]rune{}
for _, word := range words {
wordRunes = append(wordRunes, []rune(word))
}
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
_ = runes.ContainsAnyWordRunes("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_0_0) AppleWebKit/500.00 (KHTML, like Gecko) Chrome/100.0.0.0", wordRunes, true)
}
})
}
func BenchmarkContainsAnyWord_Regexp(b *testing.B) {
runtime.GOMAXPROCS(4)
var reg = regexp.MustCompile("(?i)" + strings.ReplaceAll("python\npycurl\nhttp-client\nhttpclient\napachebench\nnethttp\nhttp_request\njava\nperl\nruby\nscrapy\nphp\nrust", "\n", "|"))

View File

@@ -53,6 +53,7 @@ type Rule struct {
ipRangeListValue *values.IPRangeList
stringValues []string
stringValueRunes [][]rune
ipList *values.StringList
floatValue float64
@@ -97,6 +98,11 @@ func (this *Rule) Init() error {
if this.Operator == RuleOperatorContainsAnyWord || this.Operator == RuleOperatorContainsAllWords || this.Operator == RuleOperatorNotContainsAnyWord {
sort.Strings(this.stringValues)
}
this.stringValueRunes = [][]rune{}
for _, line := range this.stringValues {
this.stringValueRunes = append(this.stringValueRunes, []rune(line))
}
}
case RuleOperatorMatch:
var v = this.Value
@@ -567,11 +573,11 @@ func (this *Rule) Test(value any) bool {
}
return false
case RuleOperatorContainsAnyWord:
return runes.ContainsAnyWord(this.stringifyValue(value), this.stringValues, this.IsCaseInsensitive)
return runes.ContainsAnyWordRunes(this.stringifyValue(value), this.stringValueRunes, this.IsCaseInsensitive)
case RuleOperatorContainsAllWords:
return runes.ContainsAllWords(this.stringifyValue(value), this.stringValues, this.IsCaseInsensitive)
case RuleOperatorNotContainsAnyWord:
return !runes.ContainsAnyWord(this.stringifyValue(value), this.stringValues, this.IsCaseInsensitive)
return !runes.ContainsAnyWordRunes(this.stringifyValue(value), this.stringValueRunes, this.IsCaseInsensitive)
case RuleOperatorContainsSQLInjection:
if value == nil {
return false