提升UA解析性能(2-4倍)

This commit is contained in:
GoEdgeLab
2024-01-12 16:30:32 +08:00
parent 035f6fb7aa
commit 1afb565a48
2 changed files with 149 additions and 60 deletions

View File

@@ -3,38 +3,51 @@
package stats
import (
"github.com/TeaOSLab/EdgeNode/internal/goman"
"github.com/TeaOSLab/EdgeNode/internal/utils"
"github.com/TeaOSLab/EdgeNode/internal/utils/fnv"
syncutils "github.com/TeaOSLab/EdgeNode/internal/utils/sync"
"github.com/mssola/useragent"
"sync"
"time"
)
var SharedUserAgentParser = NewUserAgentParser()
const userAgentShardingCount = 8
// UserAgentParser UserAgent解析器
type UserAgentParser struct {
parser *useragent.UserAgent
cacheMaps [userAgentShardingCount]map[uint64]UserAgentParserResult
pool *sync.Pool
mu *syncutils.RWMutex
cacheMap1 map[uint64]UserAgentParserResult
cacheMap2 map[uint64]UserAgentParserResult
maxCacheItems int
cacheCursor int
locker sync.RWMutex
gcTicker *time.Ticker
gcIndex int
}
// NewUserAgentParser 获取新解析器
func NewUserAgentParser() *UserAgentParser {
var parser = &UserAgentParser{
parser: &useragent.UserAgent{},
cacheMap1: map[uint64]UserAgentParserResult{},
cacheMap2: map[uint64]UserAgentParserResult{},
cacheCursor: 0,
pool: &sync.Pool{
New: func() any {
return &useragent.UserAgent{}
},
},
cacheMaps: [userAgentShardingCount]map[uint64]UserAgentParserResult{},
mu: syncutils.NewRWMutex(userAgentShardingCount),
}
for i := 0; i < userAgentShardingCount; i++ {
parser.cacheMaps[i] = map[uint64]UserAgentParserResult{}
}
parser.init()
return parser
}
// 初始化
func (this *UserAgentParser) init() {
var maxCacheItems = 10_000
var systemMemory = utils.SystemMemoryGB()
@@ -46,8 +59,16 @@ func (this *UserAgentParser) init() {
maxCacheItems = 20_000
}
this.maxCacheItems = maxCacheItems
this.gcTicker = time.NewTicker(5 * time.Second)
goman.New(func() {
for range this.gcTicker.C {
this.GC()
}
})
}
// Parse 解析UserAgent
func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResult) {
// 限制长度
if len(userAgent) == 0 || len(userAgent) > 256 {
@@ -55,28 +76,22 @@ func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResu
}
var userAgentKey = fnv.HashString(userAgent)
var shardingIndex = int(userAgentKey % userAgentShardingCount)
this.locker.RLock()
cacheResult, ok := this.cacheMap1[userAgentKey]
this.mu.RLock(shardingIndex)
cacheResult, ok := this.cacheMaps[shardingIndex][userAgentKey]
if ok {
this.locker.RUnlock()
this.mu.RUnlock(shardingIndex)
return cacheResult
}
this.mu.RUnlock(shardingIndex)
cacheResult, ok = this.cacheMap2[userAgentKey]
if ok {
this.locker.RUnlock()
return cacheResult
}
this.locker.RUnlock()
this.locker.Lock()
defer this.locker.Unlock()
this.parser.Parse(userAgent)
result.OS = this.parser.OSInfo()
result.BrowserName, result.BrowserVersion = this.parser.Browser()
result.IsMobile = this.parser.Mobile()
var parser = this.pool.Get().(*useragent.UserAgent)
parser.Parse(userAgent)
result.OS = parser.OSInfo()
result.BrowserName, result.BrowserVersion = parser.Browser()
result.IsMobile = parser.Mobile()
this.pool.Put(parser)
// 忽略特殊字符
if len(result.BrowserName) > 0 {
@@ -87,19 +102,45 @@ func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResu
}
}
if this.cacheCursor == 0 {
this.cacheMap1[userAgentKey] = result
if len(this.cacheMap1) >= this.maxCacheItems {
this.cacheCursor = 1
this.cacheMap2 = map[uint64]UserAgentParserResult{}
}
} else {
this.cacheMap2[userAgentKey] = result
if len(this.cacheMap2) >= this.maxCacheItems {
this.cacheCursor = 0
this.cacheMap1 = map[uint64]UserAgentParserResult{}
}
}
this.mu.Lock(shardingIndex)
this.cacheMaps[shardingIndex][userAgentKey] = result
this.mu.Unlock(shardingIndex)
return
}
// MaxCacheItems 读取能容纳的缓存最大数量
func (this *UserAgentParser) MaxCacheItems() int {
return this.maxCacheItems
}
// Len 读取当前缓存数量
func (this *UserAgentParser) Len() int {
var total = 0
for i := 0; i < userAgentShardingCount; i++ {
this.mu.RLock(i)
total += len(this.cacheMaps[i])
this.mu.RUnlock(i)
}
return total
}
// GC 回收多余的缓存
func (this *UserAgentParser) GC() {
var total = this.Len()
if total > this.maxCacheItems {
for {
var shardingIndex = this.gcIndex
this.mu.Lock(shardingIndex)
total -= len(this.cacheMaps[shardingIndex])
this.cacheMaps[shardingIndex] = map[uint64]UserAgentParserResult{}
this.gcIndex = (this.gcIndex + 1) % userAgentShardingCount
this.mu.Unlock(shardingIndex)
if total <= this.maxCacheItems {
break
}
}
}
}

View File

@@ -1,17 +1,21 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package stats
package stats_test
import (
"github.com/TeaOSLab/EdgeNode/internal/stats"
"github.com/TeaOSLab/EdgeNode/internal/utils/testutils"
"github.com/iwind/TeaGo/assert"
"github.com/iwind/TeaGo/rands"
"github.com/iwind/TeaGo/types"
"runtime"
"runtime/debug"
"testing"
"time"
)
func TestUserAgentParser_Parse(t *testing.T) {
var parser = NewUserAgentParser()
var parser = stats.NewUserAgentParser()
for i := 0; i < 4; i++ {
t.Log(parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/1"))
t.Log(parser.Parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"))
@@ -19,7 +23,7 @@ func TestUserAgentParser_Parse(t *testing.T) {
}
func TestUserAgentParser_Parse_Unknown(t *testing.T) {
var parser = NewUserAgentParser()
var parser = stats.NewUserAgentParser()
t.Log(parser.Parse("Mozilla/5.0 (Wind 10.0; WOW64; rv:49.0) Apple/537.36 (KHTML, like Gecko) Chr/88.0.4324.96 Sa/537.36 Test/1"))
t.Log(parser.Parse(""))
}
@@ -28,10 +32,10 @@ func TestUserAgentParser_Memory(t *testing.T) {
var stat1 = &runtime.MemStats{}
runtime.ReadMemStats(stat1)
var parser = NewUserAgentParser()
var parser = stats.NewUserAgentParser()
for i := 0; i < 1_000_000; i++ {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
}
runtime.GC()
@@ -40,32 +44,76 @@ func TestUserAgentParser_Memory(t *testing.T) {
var stat2 = &runtime.MemStats{}
runtime.ReadMemStats(stat2)
t.Log("max cache items:", parser.maxCacheItems)
t.Log("cache1:", len(parser.cacheMap1), "cache2:", len(parser.cacheMap2), "cache3:", (stat2.HeapInuse-stat1.HeapInuse)/1024/1024, "MB")
t.Log("max cache items:", parser.MaxCacheItems())
t.Log("cache:", parser.Len(), "usage:", (stat2.HeapInuse-stat1.HeapInuse)>>20, "MB")
}
func BenchmarkUserAgentParser_Parse(b *testing.B) {
var parser = NewUserAgentParser()
for i := 0; i < b.N; i++ {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
func TestNewUserAgentParser_GC(t *testing.T) {
if !testutils.IsSingleTesting() {
return
}
b.Log(len(parser.cacheMap1), len(parser.cacheMap2))
}
func BenchmarkUserAgentParser_Parse2(b *testing.B) {
var parser = NewUserAgentParser()
for i := 0; i < b.N; i++ {
var parser = stats.NewUserAgentParser()
for i := 0; i < 1_000_000; i++ {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
}
b.Log(len(parser.cacheMap1), len(parser.cacheMap2))
time.Sleep(60 * time.Second) // wait to gc
t.Log(parser.Len(), "cache items")
}
func BenchmarkUserAgentParser_Parse3(b *testing.B) {
var parser = NewUserAgentParser()
func TestNewUserAgentParser_Mobile(t *testing.T) {
var a = assert.NewAssertion(t)
var parser = stats.NewUserAgentParser()
for _, userAgent := range []string{
"Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148",
"Mozilla/5.0 (Linux; U; Android 2.2.1; en-us; Nexus One Build/FRG83) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
} {
a.IsTrue(parser.Parse(userAgent).IsMobile)
}
}
func BenchmarkUserAgentParser_Parse_Many_LimitCPU(b *testing.B) {
runtime.GOMAXPROCS(4)
var parser = stats.NewUserAgentParser()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
}
})
b.Log(parser.Len())
}
func BenchmarkUserAgentParser_Parse_Many(b *testing.B) {
var parser = stats.NewUserAgentParser()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
}
})
b.Log(parser.Len())
}
func BenchmarkUserAgentParser_Parse_Few_LimitCPU(b *testing.B) {
runtime.GOMAXPROCS(4)
var parser = stats.NewUserAgentParser()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
}
})
b.Log(len(parser.cacheMap1), len(parser.cacheMap2))
b.Log(parser.Len())
}
func BenchmarkUserAgentParser_Parse_Few(b *testing.B) {
var parser = stats.NewUserAgentParser()
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
}
})
b.Log(parser.Len())
}