对WAF正则缓存增加命中率检查

This commit is contained in:
刘祥超
2023-10-12 20:10:30 +08:00
parent adb0069c59
commit 3aa68b5ffc
4 changed files with 309 additions and 5 deletions

View File

@@ -0,0 +1,161 @@
// Copyright 2023 GoEdge CDN goedge.cdn@gmail.com. All rights reserved. Official site: https://goedge.cn .
package cachehits
import (
"github.com/TeaOSLab/EdgeNode/internal/goman"
"github.com/TeaOSLab/EdgeNode/internal/utils"
"github.com/TeaOSLab/EdgeNode/internal/utils/fasttime"
"sync"
"sync/atomic"
"time"
)
const countSamples = 10_000
type Item struct {
countHits uint64
countCached uint64
timestamp int64
isGood bool
isBad bool
}
type Stat struct {
goodRatio uint64
maxItems int
itemMap map[string]*Item // category => *Item
mu *sync.RWMutex
ticker *time.Ticker
}
func NewStat(goodRatio uint64) *Stat {
if goodRatio == 0 {
goodRatio = 5
}
var maxItems = utils.SystemMemoryGB() * 10_000
if maxItems <= 0 {
maxItems = 100_000
}
var stat = &Stat{
goodRatio: goodRatio,
itemMap: map[string]*Item{},
mu: &sync.RWMutex{},
ticker: time.NewTicker(24 * time.Hour),
maxItems: maxItems,
}
goman.New(func() {
stat.init()
})
return stat
}
func (this *Stat) init() {
for range this.ticker.C {
var currentTime = fasttime.Now().Unix()
this.mu.RLock()
for _, item := range this.itemMap {
if item.timestamp < currentTime-7*24*86400 {
// reset
item.countHits = 0
item.countCached = 1
item.timestamp = currentTime
item.isGood = false
item.isBad = false
}
}
this.mu.RUnlock()
}
}
func (this *Stat) IncreaseCached(category string) {
this.mu.RLock()
var item = this.itemMap[category]
if item != nil {
if item.isGood || item.isBad {
this.mu.RUnlock()
return
}
atomic.AddUint64(&item.countCached, 1)
this.mu.RUnlock()
return
}
this.mu.RUnlock()
this.mu.Lock()
if len(this.itemMap) > this.maxItems {
// remove one randomly
for k := range this.itemMap {
delete(this.itemMap, k)
break
}
}
this.itemMap[category] = &Item{
countHits: 0,
countCached: 1,
timestamp: fasttime.Now().Unix(),
}
this.mu.Unlock()
}
func (this *Stat) IncreaseHit(category string) {
this.mu.RLock()
defer this.mu.RUnlock()
var item = this.itemMap[category]
if item != nil {
if item.isGood || item.isBad {
return
}
atomic.AddUint64(&item.countHits, 1)
return
}
}
func (this *Stat) IsGood(category string) bool {
this.mu.RLock()
defer func() {
this.mu.RUnlock()
}()
var item = this.itemMap[category]
if item != nil {
if item.isBad {
return false
}
if item.isGood {
return true
}
if item.countCached > countSamples {
var isGood = item.countHits*100/item.countCached >= this.goodRatio
if isGood {
item.isGood = true
} else {
item.isBad = true
}
return isGood
}
}
return true
}
func (this *Stat) Len() int {
this.mu.RLock()
defer this.mu.RUnlock()
return len(this.itemMap)
}

View File

@@ -0,0 +1,107 @@
// Copyright 2023 GoEdge CDN goedge.cdn@gmail.com. All rights reserved. Official site: https://goedge.cn .
package cachehits_test
import (
"github.com/TeaOSLab/EdgeNode/internal/utils/cachehits"
"github.com/TeaOSLab/EdgeNode/internal/utils/testutils"
"github.com/iwind/TeaGo/assert"
"github.com/iwind/TeaGo/rands"
"github.com/iwind/TeaGo/types"
"runtime"
"strconv"
"testing"
"time"
)
func TestNewStat(t *testing.T) {
var a = assert.NewAssertion(t)
{
var stat = cachehits.NewStat(20)
for i := 0; i < 1000; i++ {
stat.IncreaseCached("a")
}
a.IsTrue(stat.IsGood("a"))
}
{
var stat = cachehits.NewStat(5)
for i := 0; i < 10000; i++ {
stat.IncreaseCached("a")
}
for i := 0; i < 500; i++ {
stat.IncreaseHit("a")
}
stat.IncreaseHit("b") // empty
a.IsTrue(stat.IsGood("a"))
a.IsTrue(stat.IsGood("b"))
}
{
var stat = cachehits.NewStat(10)
for i := 0; i < 10000; i++ {
stat.IncreaseCached("a")
}
for i := 0; i < 1000; i++ {
stat.IncreaseHit("a")
}
stat.IncreaseHit("b") // empty
a.IsTrue(stat.IsGood("a"))
a.IsTrue(stat.IsGood("b"))
}
{
var stat = cachehits.NewStat(5)
for i := 0; i < 10001; i++ {
stat.IncreaseCached("a")
}
for i := 0; i < 499; i++ {
stat.IncreaseHit("a")
}
a.IsFalse(stat.IsGood("a"))
}
}
func TestNewStat_Memory(t *testing.T) {
if !testutils.IsSingleTesting() {
return
}
var stat = cachehits.NewStat(20)
for i := 0; i < 10_000_000; i++ {
stat.IncreaseCached("a" + types.String(i))
}
time.Sleep(60 * time.Second)
t.Log(stat.Len())
}
func BenchmarkStat(b *testing.B) {
runtime.GOMAXPROCS(4)
var stat = cachehits.NewStat(5)
for i := 0; i < 1_000_000; i++ {
stat.IncreaseCached("a" + types.String(i))
}
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
var key = strconv.Itoa(rands.Int(0, 100_000))
stat.IncreaseCached(key)
if rands.Int(0, 3) == 0 {
stat.IncreaseHit(key)
}
_ = stat.IsGood(key)
}
})
}

View File

@@ -1,14 +1,24 @@
package utils
import (
teaconst "github.com/TeaOSLab/EdgeNode/internal/const"
"github.com/TeaOSLab/EdgeNode/internal/re"
"github.com/TeaOSLab/EdgeNode/internal/ttlcache"
"github.com/TeaOSLab/EdgeNode/internal/utils/cachehits"
"github.com/TeaOSLab/EdgeNode/internal/utils/fasttime"
"github.com/cespare/xxhash"
"strconv"
)
var cache = ttlcache.NewCache[int8]()
var cacheHits *cachehits.Stat
func init() {
if !teaconst.IsMain {
return
}
cacheHits = cachehits.NewStat(5)
}
const (
maxCacheDataSize = 1024
@@ -29,15 +39,18 @@ func MatchStringCache(regex *re.Regexp, s string, cacheLife CacheLife) bool {
return false
}
var regIdString = regex.IdString()
// 如果长度超过一定数量,大概率是不能重用的
if cacheLife <= 0 || len(s) > maxCacheDataSize {
if cacheLife <= 0 || len(s) > maxCacheDataSize || !cacheHits.IsGood(regIdString) {
return regex.MatchString(s)
}
var hash = xxhash.Sum64String(s)
var key = regex.IdString() + "@" + strconv.FormatUint(hash, 10)
var key = regIdString + "@" + strconv.FormatUint(hash, 10)
var item = cache.Read(key)
if item != nil {
cacheHits.IncreaseHit(regIdString)
return item.Value == 1
}
var b = regex.MatchString(s)
@@ -46,6 +59,7 @@ func MatchStringCache(regex *re.Regexp, s string, cacheLife CacheLife) bool {
} else {
cache.Write(key, 0, fasttime.Now().Unix()+cacheLife)
}
cacheHits.IncreaseCached(regIdString)
return b
}
@@ -55,15 +69,18 @@ func MatchBytesCache(regex *re.Regexp, byteSlice []byte, cacheLife CacheLife) bo
return false
}
var regIdString = regex.IdString()
// 如果长度超过一定数量,大概率是不能重用的
if cacheLife <= 0 || len(byteSlice) > maxCacheDataSize {
if cacheLife <= 0 || len(byteSlice) > maxCacheDataSize || !cacheHits.IsGood(regIdString) {
return regex.Match(byteSlice)
}
var hash = xxhash.Sum64(byteSlice)
var key = regex.IdString() + "@" + strconv.FormatUint(hash, 10)
var key = regIdString + "@" + strconv.FormatUint(hash, 10)
var item = cache.Read(key)
if item != nil {
cacheHits.IncreaseHit(regIdString)
return item.Value == 1
}
if item != nil {
@@ -75,5 +92,6 @@ func MatchBytesCache(regex *re.Regexp, byteSlice []byte, cacheLife CacheLife) bo
} else {
cache.Write(key, 0, fasttime.Now().Unix()+cacheLife)
}
cacheHits.IncreaseCached(regIdString)
return b
}

View File

@@ -2,7 +2,9 @@ package utils_test
import (
"github.com/TeaOSLab/EdgeNode/internal/re"
"github.com/TeaOSLab/EdgeNode/internal/utils/testutils"
"github.com/TeaOSLab/EdgeNode/internal/waf/utils"
"github.com/iwind/TeaGo/rands"
"net/http"
"regexp"
"runtime"
@@ -27,7 +29,11 @@ func TestMatchBytesCache(t *testing.T) {
}
func TestMatchRemoteCache(t *testing.T) {
client := http.Client{}
if !testutils.IsSingleTesting() {
return
}
var client = http.Client{}
for i := 0; i < 200_0000; i++ {
req, err := http.NewRequest(http.MethodGet, "http://192.168.2.30:8882/?arg="+strconv.Itoa(i), nil)
if err != nil {
@@ -63,6 +69,18 @@ func BenchmarkMatchStringCache(b *testing.B) {
}
}
func BenchmarkMatchStringCache_LowHit(b *testing.B) {
runtime.GOMAXPROCS(1)
var regex = re.MustCompile(`(?iU)\b(eval|system|exec|execute|passthru|shell_exec|phpinfo)\b`)
//b.Log(regex.Keywords())
for i := 0; i < b.N; i++ {
var data = strings.Repeat("A", rands.Int(0, 100))
_ = utils.MatchStringCache(regex, data, utils.CacheShortLife)
}
}
func BenchmarkMatchStringCache_WithoutCache(b *testing.B) {
runtime.GOMAXPROCS(1)