diff --git a/internal/stats/user_agent_parser.go b/internal/stats/user_agent_parser.go index f740264..680e642 100644 --- a/internal/stats/user_agent_parser.go +++ b/internal/stats/user_agent_parser.go @@ -3,38 +3,51 @@ package stats import ( + "github.com/TeaOSLab/EdgeNode/internal/goman" "github.com/TeaOSLab/EdgeNode/internal/utils" "github.com/TeaOSLab/EdgeNode/internal/utils/fnv" + syncutils "github.com/TeaOSLab/EdgeNode/internal/utils/sync" "github.com/mssola/useragent" "sync" + "time" ) var SharedUserAgentParser = NewUserAgentParser() +const userAgentShardingCount = 8 + // UserAgentParser UserAgent解析器 type UserAgentParser struct { - parser *useragent.UserAgent + cacheMaps [userAgentShardingCount]map[uint64]UserAgentParserResult + pool *sync.Pool + mu *syncutils.RWMutex - cacheMap1 map[uint64]UserAgentParserResult - cacheMap2 map[uint64]UserAgentParserResult maxCacheItems int - - cacheCursor int - locker sync.RWMutex + gcTicker *time.Ticker + gcIndex int } +// NewUserAgentParser 获取新解析器 func NewUserAgentParser() *UserAgentParser { var parser = &UserAgentParser{ - parser: &useragent.UserAgent{}, - cacheMap1: map[uint64]UserAgentParserResult{}, - cacheMap2: map[uint64]UserAgentParserResult{}, - cacheCursor: 0, + pool: &sync.Pool{ + New: func() any { + return &useragent.UserAgent{} + }, + }, + cacheMaps: [userAgentShardingCount]map[uint64]UserAgentParserResult{}, + mu: syncutils.NewRWMutex(userAgentShardingCount), + } + + for i := 0; i < userAgentShardingCount; i++ { + parser.cacheMaps[i] = map[uint64]UserAgentParserResult{} } parser.init() return parser } +// 初始化 func (this *UserAgentParser) init() { var maxCacheItems = 10_000 var systemMemory = utils.SystemMemoryGB() @@ -46,8 +59,16 @@ func (this *UserAgentParser) init() { maxCacheItems = 20_000 } this.maxCacheItems = maxCacheItems + + this.gcTicker = time.NewTicker(5 * time.Second) + goman.New(func() { + for range this.gcTicker.C { + this.GC() + } + }) } +// Parse 解析UserAgent func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResult) { // 限制长度 if len(userAgent) == 0 || len(userAgent) > 256 { @@ -55,28 +76,22 @@ func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResu } var userAgentKey = fnv.HashString(userAgent) + var shardingIndex = int(userAgentKey % userAgentShardingCount) - this.locker.RLock() - cacheResult, ok := this.cacheMap1[userAgentKey] + this.mu.RLock(shardingIndex) + cacheResult, ok := this.cacheMaps[shardingIndex][userAgentKey] if ok { - this.locker.RUnlock() + this.mu.RUnlock(shardingIndex) return cacheResult } + this.mu.RUnlock(shardingIndex) - cacheResult, ok = this.cacheMap2[userAgentKey] - if ok { - this.locker.RUnlock() - return cacheResult - } - this.locker.RUnlock() - - this.locker.Lock() - defer this.locker.Unlock() - - this.parser.Parse(userAgent) - result.OS = this.parser.OSInfo() - result.BrowserName, result.BrowserVersion = this.parser.Browser() - result.IsMobile = this.parser.Mobile() + var parser = this.pool.Get().(*useragent.UserAgent) + parser.Parse(userAgent) + result.OS = parser.OSInfo() + result.BrowserName, result.BrowserVersion = parser.Browser() + result.IsMobile = parser.Mobile() + this.pool.Put(parser) // 忽略特殊字符 if len(result.BrowserName) > 0 { @@ -87,19 +102,45 @@ func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResu } } - if this.cacheCursor == 0 { - this.cacheMap1[userAgentKey] = result - if len(this.cacheMap1) >= this.maxCacheItems { - this.cacheCursor = 1 - this.cacheMap2 = map[uint64]UserAgentParserResult{} - } - } else { - this.cacheMap2[userAgentKey] = result - if len(this.cacheMap2) >= this.maxCacheItems { - this.cacheCursor = 0 - this.cacheMap1 = map[uint64]UserAgentParserResult{} - } - } + this.mu.Lock(shardingIndex) + this.cacheMaps[shardingIndex][userAgentKey] = result + this.mu.Unlock(shardingIndex) return } + +// MaxCacheItems 读取能容纳的缓存最大数量 +func (this *UserAgentParser) MaxCacheItems() int { + return this.maxCacheItems +} + +// Len 读取当前缓存数量 +func (this *UserAgentParser) Len() int { + var total = 0 + for i := 0; i < userAgentShardingCount; i++ { + this.mu.RLock(i) + total += len(this.cacheMaps[i]) + this.mu.RUnlock(i) + } + return total +} + +// GC 回收多余的缓存 +func (this *UserAgentParser) GC() { + var total = this.Len() + if total > this.maxCacheItems { + for { + var shardingIndex = this.gcIndex + + this.mu.Lock(shardingIndex) + total -= len(this.cacheMaps[shardingIndex]) + this.cacheMaps[shardingIndex] = map[uint64]UserAgentParserResult{} + this.gcIndex = (this.gcIndex + 1) % userAgentShardingCount + this.mu.Unlock(shardingIndex) + + if total <= this.maxCacheItems { + break + } + } + } +} diff --git a/internal/stats/user_agent_parser_test.go b/internal/stats/user_agent_parser_test.go index 40929f5..c0d62da 100644 --- a/internal/stats/user_agent_parser_test.go +++ b/internal/stats/user_agent_parser_test.go @@ -1,17 +1,21 @@ // Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. -package stats +package stats_test import ( + "github.com/TeaOSLab/EdgeNode/internal/stats" + "github.com/TeaOSLab/EdgeNode/internal/utils/testutils" + "github.com/iwind/TeaGo/assert" "github.com/iwind/TeaGo/rands" "github.com/iwind/TeaGo/types" "runtime" "runtime/debug" "testing" + "time" ) func TestUserAgentParser_Parse(t *testing.T) { - var parser = NewUserAgentParser() + var parser = stats.NewUserAgentParser() for i := 0; i < 4; i++ { t.Log(parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/1")) t.Log(parser.Parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36")) @@ -19,7 +23,7 @@ func TestUserAgentParser_Parse(t *testing.T) { } func TestUserAgentParser_Parse_Unknown(t *testing.T) { - var parser = NewUserAgentParser() + var parser = stats.NewUserAgentParser() t.Log(parser.Parse("Mozilla/5.0 (Wind 10.0; WOW64; rv:49.0) Apple/537.36 (KHTML, like Gecko) Chr/88.0.4324.96 Sa/537.36 Test/1")) t.Log(parser.Parse("")) } @@ -28,10 +32,10 @@ func TestUserAgentParser_Memory(t *testing.T) { var stat1 = &runtime.MemStats{} runtime.ReadMemStats(stat1) - var parser = NewUserAgentParser() + var parser = stats.NewUserAgentParser() for i := 0; i < 1_000_000; i++ { - parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000))) + parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000))) } runtime.GC() @@ -40,32 +44,76 @@ func TestUserAgentParser_Memory(t *testing.T) { var stat2 = &runtime.MemStats{} runtime.ReadMemStats(stat2) - t.Log("max cache items:", parser.maxCacheItems) - t.Log("cache1:", len(parser.cacheMap1), "cache2:", len(parser.cacheMap2), "cache3:", (stat2.HeapInuse-stat1.HeapInuse)/1024/1024, "MB") + t.Log("max cache items:", parser.MaxCacheItems()) + t.Log("cache:", parser.Len(), "usage:", (stat2.HeapInuse-stat1.HeapInuse)>>20, "MB") } -func BenchmarkUserAgentParser_Parse(b *testing.B) { - var parser = NewUserAgentParser() - for i := 0; i < b.N; i++ { - parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000))) +func TestNewUserAgentParser_GC(t *testing.T) { + if !testutils.IsSingleTesting() { + return } - b.Log(len(parser.cacheMap1), len(parser.cacheMap2)) -} -func BenchmarkUserAgentParser_Parse2(b *testing.B) { - var parser = NewUserAgentParser() - for i := 0; i < b.N; i++ { + var parser = stats.NewUserAgentParser() + + for i := 0; i < 1_000_000; i++ { parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000))) } - b.Log(len(parser.cacheMap1), len(parser.cacheMap2)) + + time.Sleep(60 * time.Second) // wait to gc + t.Log(parser.Len(), "cache items") } -func BenchmarkUserAgentParser_Parse3(b *testing.B) { - var parser = NewUserAgentParser() +func TestNewUserAgentParser_Mobile(t *testing.T) { + var a = assert.NewAssertion(t) + var parser = stats.NewUserAgentParser() + for _, userAgent := range []string{ + "Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148", + "Mozilla/5.0 (Linux; U; Android 2.2.1; en-us; Nexus One Build/FRG83) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", + } { + a.IsTrue(parser.Parse(userAgent).IsMobile) + } +} + +func BenchmarkUserAgentParser_Parse_Many_LimitCPU(b *testing.B) { + runtime.GOMAXPROCS(4) + + var parser = stats.NewUserAgentParser() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000))) + } + }) + b.Log(parser.Len()) +} + +func BenchmarkUserAgentParser_Parse_Many(b *testing.B) { + var parser = stats.NewUserAgentParser() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000))) + } + }) + b.Log(parser.Len()) +} + +func BenchmarkUserAgentParser_Parse_Few_LimitCPU(b *testing.B) { + runtime.GOMAXPROCS(4) + + var parser = stats.NewUserAgentParser() b.RunParallel(func(pb *testing.PB) { for pb.Next() { parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000))) } }) - b.Log(len(parser.cacheMap1), len(parser.cacheMap2)) + b.Log(parser.Len()) +} + +func BenchmarkUserAgentParser_Parse_Few(b *testing.B) { + var parser = stats.NewUserAgentParser() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000))) + } + }) + b.Log(parser.Len()) }