diff --git a/internal/utils/cachehits/stat.go b/internal/utils/cachehits/stat.go new file mode 100644 index 0000000..0c907d3 --- /dev/null +++ b/internal/utils/cachehits/stat.go @@ -0,0 +1,161 @@ +// Copyright 2023 GoEdge CDN goedge.cdn@gmail.com. All rights reserved. Official site: https://goedge.cn . + +package cachehits + +import ( + "github.com/TeaOSLab/EdgeNode/internal/goman" + "github.com/TeaOSLab/EdgeNode/internal/utils" + "github.com/TeaOSLab/EdgeNode/internal/utils/fasttime" + "sync" + "sync/atomic" + "time" +) + +const countSamples = 10_000 + +type Item struct { + countHits uint64 + countCached uint64 + timestamp int64 + + isGood bool + isBad bool +} + +type Stat struct { + goodRatio uint64 + maxItems int + + itemMap map[string]*Item // category => *Item + mu *sync.RWMutex + + ticker *time.Ticker +} + +func NewStat(goodRatio uint64) *Stat { + if goodRatio == 0 { + goodRatio = 5 + } + + var maxItems = utils.SystemMemoryGB() * 10_000 + if maxItems <= 0 { + maxItems = 100_000 + } + + var stat = &Stat{ + goodRatio: goodRatio, + itemMap: map[string]*Item{}, + mu: &sync.RWMutex{}, + ticker: time.NewTicker(24 * time.Hour), + maxItems: maxItems, + } + + goman.New(func() { + stat.init() + }) + return stat +} + +func (this *Stat) init() { + for range this.ticker.C { + var currentTime = fasttime.Now().Unix() + + this.mu.RLock() + for _, item := range this.itemMap { + if item.timestamp < currentTime-7*24*86400 { + // reset + item.countHits = 0 + item.countCached = 1 + item.timestamp = currentTime + item.isGood = false + item.isBad = false + } + } + this.mu.RUnlock() + } +} + +func (this *Stat) IncreaseCached(category string) { + this.mu.RLock() + var item = this.itemMap[category] + if item != nil { + if item.isGood || item.isBad { + this.mu.RUnlock() + return + } + + atomic.AddUint64(&item.countCached, 1) + this.mu.RUnlock() + return + } + this.mu.RUnlock() + + this.mu.Lock() + + if len(this.itemMap) > this.maxItems { + // remove one randomly + for k := range this.itemMap { + delete(this.itemMap, k) + break + } + } + + this.itemMap[category] = &Item{ + countHits: 0, + countCached: 1, + timestamp: fasttime.Now().Unix(), + } + this.mu.Unlock() +} + +func (this *Stat) IncreaseHit(category string) { + this.mu.RLock() + defer this.mu.RUnlock() + + var item = this.itemMap[category] + if item != nil { + if item.isGood || item.isBad { + return + } + + atomic.AddUint64(&item.countHits, 1) + return + } +} + +func (this *Stat) IsGood(category string) bool { + this.mu.RLock() + defer func() { + this.mu.RUnlock() + }() + + var item = this.itemMap[category] + if item != nil { + if item.isBad { + return false + } + if item.isGood { + return true + } + + if item.countCached > countSamples { + var isGood = item.countHits*100/item.countCached >= this.goodRatio + if isGood { + item.isGood = true + } else { + item.isBad = true + } + + return isGood + } + } + + return true +} + +func (this *Stat) Len() int { + this.mu.RLock() + defer this.mu.RUnlock() + + return len(this.itemMap) +} diff --git a/internal/utils/cachehits/stat_test.go b/internal/utils/cachehits/stat_test.go new file mode 100644 index 0000000..e031051 --- /dev/null +++ b/internal/utils/cachehits/stat_test.go @@ -0,0 +1,107 @@ +// Copyright 2023 GoEdge CDN goedge.cdn@gmail.com. All rights reserved. Official site: https://goedge.cn . + +package cachehits_test + +import ( + "github.com/TeaOSLab/EdgeNode/internal/utils/cachehits" + "github.com/TeaOSLab/EdgeNode/internal/utils/testutils" + "github.com/iwind/TeaGo/assert" + "github.com/iwind/TeaGo/rands" + "github.com/iwind/TeaGo/types" + "runtime" + "strconv" + "testing" + "time" +) + +func TestNewStat(t *testing.T) { + var a = assert.NewAssertion(t) + + { + var stat = cachehits.NewStat(20) + for i := 0; i < 1000; i++ { + stat.IncreaseCached("a") + } + + a.IsTrue(stat.IsGood("a")) + } + + { + var stat = cachehits.NewStat(5) + for i := 0; i < 10000; i++ { + stat.IncreaseCached("a") + } + for i := 0; i < 500; i++ { + stat.IncreaseHit("a") + } + + stat.IncreaseHit("b") // empty + + a.IsTrue(stat.IsGood("a")) + a.IsTrue(stat.IsGood("b")) + } + + { + var stat = cachehits.NewStat(10) + for i := 0; i < 10000; i++ { + stat.IncreaseCached("a") + } + for i := 0; i < 1000; i++ { + stat.IncreaseHit("a") + } + + stat.IncreaseHit("b") // empty + + a.IsTrue(stat.IsGood("a")) + a.IsTrue(stat.IsGood("b")) + } + + { + var stat = cachehits.NewStat(5) + for i := 0; i < 10001; i++ { + stat.IncreaseCached("a") + } + for i := 0; i < 499; i++ { + stat.IncreaseHit("a") + } + + a.IsFalse(stat.IsGood("a")) + } +} + +func TestNewStat_Memory(t *testing.T) { + if !testutils.IsSingleTesting() { + return + } + + var stat = cachehits.NewStat(20) + for i := 0; i < 10_000_000; i++ { + stat.IncreaseCached("a" + types.String(i)) + } + + time.Sleep(60 * time.Second) + + t.Log(stat.Len()) +} + +func BenchmarkStat(b *testing.B) { + runtime.GOMAXPROCS(4) + + var stat = cachehits.NewStat(5) + for i := 0; i < 1_000_000; i++ { + stat.IncreaseCached("a" + types.String(i)) + } + + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + var key = strconv.Itoa(rands.Int(0, 100_000)) + stat.IncreaseCached(key) + if rands.Int(0, 3) == 0 { + stat.IncreaseHit(key) + } + _ = stat.IsGood(key) + } + }) +} diff --git a/internal/waf/utils/utils.go b/internal/waf/utils/utils.go index ac85432..8bf060a 100644 --- a/internal/waf/utils/utils.go +++ b/internal/waf/utils/utils.go @@ -1,14 +1,24 @@ package utils import ( + teaconst "github.com/TeaOSLab/EdgeNode/internal/const" "github.com/TeaOSLab/EdgeNode/internal/re" "github.com/TeaOSLab/EdgeNode/internal/ttlcache" + "github.com/TeaOSLab/EdgeNode/internal/utils/cachehits" "github.com/TeaOSLab/EdgeNode/internal/utils/fasttime" "github.com/cespare/xxhash" "strconv" ) var cache = ttlcache.NewCache[int8]() +var cacheHits *cachehits.Stat + +func init() { + if !teaconst.IsMain { + return + } + cacheHits = cachehits.NewStat(5) +} const ( maxCacheDataSize = 1024 @@ -29,15 +39,18 @@ func MatchStringCache(regex *re.Regexp, s string, cacheLife CacheLife) bool { return false } + var regIdString = regex.IdString() + // 如果长度超过一定数量,大概率是不能重用的 - if cacheLife <= 0 || len(s) > maxCacheDataSize { + if cacheLife <= 0 || len(s) > maxCacheDataSize || !cacheHits.IsGood(regIdString) { return regex.MatchString(s) } var hash = xxhash.Sum64String(s) - var key = regex.IdString() + "@" + strconv.FormatUint(hash, 10) + var key = regIdString + "@" + strconv.FormatUint(hash, 10) var item = cache.Read(key) if item != nil { + cacheHits.IncreaseHit(regIdString) return item.Value == 1 } var b = regex.MatchString(s) @@ -46,6 +59,7 @@ func MatchStringCache(regex *re.Regexp, s string, cacheLife CacheLife) bool { } else { cache.Write(key, 0, fasttime.Now().Unix()+cacheLife) } + cacheHits.IncreaseCached(regIdString) return b } @@ -55,15 +69,18 @@ func MatchBytesCache(regex *re.Regexp, byteSlice []byte, cacheLife CacheLife) bo return false } + var regIdString = regex.IdString() + // 如果长度超过一定数量,大概率是不能重用的 - if cacheLife <= 0 || len(byteSlice) > maxCacheDataSize { + if cacheLife <= 0 || len(byteSlice) > maxCacheDataSize || !cacheHits.IsGood(regIdString) { return regex.Match(byteSlice) } var hash = xxhash.Sum64(byteSlice) - var key = regex.IdString() + "@" + strconv.FormatUint(hash, 10) + var key = regIdString + "@" + strconv.FormatUint(hash, 10) var item = cache.Read(key) if item != nil { + cacheHits.IncreaseHit(regIdString) return item.Value == 1 } if item != nil { @@ -75,5 +92,6 @@ func MatchBytesCache(regex *re.Regexp, byteSlice []byte, cacheLife CacheLife) bo } else { cache.Write(key, 0, fasttime.Now().Unix()+cacheLife) } + cacheHits.IncreaseCached(regIdString) return b } diff --git a/internal/waf/utils/utils_test.go b/internal/waf/utils/utils_test.go index 3aa9a29..9bb53dc 100644 --- a/internal/waf/utils/utils_test.go +++ b/internal/waf/utils/utils_test.go @@ -2,7 +2,9 @@ package utils_test import ( "github.com/TeaOSLab/EdgeNode/internal/re" + "github.com/TeaOSLab/EdgeNode/internal/utils/testutils" "github.com/TeaOSLab/EdgeNode/internal/waf/utils" + "github.com/iwind/TeaGo/rands" "net/http" "regexp" "runtime" @@ -27,7 +29,11 @@ func TestMatchBytesCache(t *testing.T) { } func TestMatchRemoteCache(t *testing.T) { - client := http.Client{} + if !testutils.IsSingleTesting() { + return + } + + var client = http.Client{} for i := 0; i < 200_0000; i++ { req, err := http.NewRequest(http.MethodGet, "http://192.168.2.30:8882/?arg="+strconv.Itoa(i), nil) if err != nil { @@ -63,6 +69,18 @@ func BenchmarkMatchStringCache(b *testing.B) { } } +func BenchmarkMatchStringCache_LowHit(b *testing.B) { + runtime.GOMAXPROCS(1) + + var regex = re.MustCompile(`(?iU)\b(eval|system|exec|execute|passthru|shell_exec|phpinfo)\b`) + //b.Log(regex.Keywords()) + + for i := 0; i < b.N; i++ { + var data = strings.Repeat("A", rands.Int(0, 100)) + _ = utils.MatchStringCache(regex, data, utils.CacheShortLife) + } +} + func BenchmarkMatchStringCache_WithoutCache(b *testing.B) { runtime.GOMAXPROCS(1)