mirror of
https://github.com/TeaOSLab/EdgeNode.git
synced 2025-11-03 15:00:26 +08:00
提升UA解析性能(2-4倍)
This commit is contained in:
@@ -3,38 +3,51 @@
|
||||
package stats
|
||||
|
||||
import (
|
||||
"github.com/TeaOSLab/EdgeNode/internal/goman"
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils"
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils/fnv"
|
||||
syncutils "github.com/TeaOSLab/EdgeNode/internal/utils/sync"
|
||||
"github.com/mssola/useragent"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
var SharedUserAgentParser = NewUserAgentParser()
|
||||
|
||||
const userAgentShardingCount = 8
|
||||
|
||||
// UserAgentParser UserAgent解析器
|
||||
type UserAgentParser struct {
|
||||
parser *useragent.UserAgent
|
||||
cacheMaps [userAgentShardingCount]map[uint64]UserAgentParserResult
|
||||
pool *sync.Pool
|
||||
mu *syncutils.RWMutex
|
||||
|
||||
cacheMap1 map[uint64]UserAgentParserResult
|
||||
cacheMap2 map[uint64]UserAgentParserResult
|
||||
maxCacheItems int
|
||||
|
||||
cacheCursor int
|
||||
locker sync.RWMutex
|
||||
gcTicker *time.Ticker
|
||||
gcIndex int
|
||||
}
|
||||
|
||||
// NewUserAgentParser 获取新解析器
|
||||
func NewUserAgentParser() *UserAgentParser {
|
||||
var parser = &UserAgentParser{
|
||||
parser: &useragent.UserAgent{},
|
||||
cacheMap1: map[uint64]UserAgentParserResult{},
|
||||
cacheMap2: map[uint64]UserAgentParserResult{},
|
||||
cacheCursor: 0,
|
||||
pool: &sync.Pool{
|
||||
New: func() any {
|
||||
return &useragent.UserAgent{}
|
||||
},
|
||||
},
|
||||
cacheMaps: [userAgentShardingCount]map[uint64]UserAgentParserResult{},
|
||||
mu: syncutils.NewRWMutex(userAgentShardingCount),
|
||||
}
|
||||
|
||||
for i := 0; i < userAgentShardingCount; i++ {
|
||||
parser.cacheMaps[i] = map[uint64]UserAgentParserResult{}
|
||||
}
|
||||
|
||||
parser.init()
|
||||
return parser
|
||||
}
|
||||
|
||||
// 初始化
|
||||
func (this *UserAgentParser) init() {
|
||||
var maxCacheItems = 10_000
|
||||
var systemMemory = utils.SystemMemoryGB()
|
||||
@@ -46,8 +59,16 @@ func (this *UserAgentParser) init() {
|
||||
maxCacheItems = 20_000
|
||||
}
|
||||
this.maxCacheItems = maxCacheItems
|
||||
|
||||
this.gcTicker = time.NewTicker(5 * time.Second)
|
||||
goman.New(func() {
|
||||
for range this.gcTicker.C {
|
||||
this.GC()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Parse 解析UserAgent
|
||||
func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResult) {
|
||||
// 限制长度
|
||||
if len(userAgent) == 0 || len(userAgent) > 256 {
|
||||
@@ -55,28 +76,22 @@ func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResu
|
||||
}
|
||||
|
||||
var userAgentKey = fnv.HashString(userAgent)
|
||||
var shardingIndex = int(userAgentKey % userAgentShardingCount)
|
||||
|
||||
this.locker.RLock()
|
||||
cacheResult, ok := this.cacheMap1[userAgentKey]
|
||||
this.mu.RLock(shardingIndex)
|
||||
cacheResult, ok := this.cacheMaps[shardingIndex][userAgentKey]
|
||||
if ok {
|
||||
this.locker.RUnlock()
|
||||
this.mu.RUnlock(shardingIndex)
|
||||
return cacheResult
|
||||
}
|
||||
this.mu.RUnlock(shardingIndex)
|
||||
|
||||
cacheResult, ok = this.cacheMap2[userAgentKey]
|
||||
if ok {
|
||||
this.locker.RUnlock()
|
||||
return cacheResult
|
||||
}
|
||||
this.locker.RUnlock()
|
||||
|
||||
this.locker.Lock()
|
||||
defer this.locker.Unlock()
|
||||
|
||||
this.parser.Parse(userAgent)
|
||||
result.OS = this.parser.OSInfo()
|
||||
result.BrowserName, result.BrowserVersion = this.parser.Browser()
|
||||
result.IsMobile = this.parser.Mobile()
|
||||
var parser = this.pool.Get().(*useragent.UserAgent)
|
||||
parser.Parse(userAgent)
|
||||
result.OS = parser.OSInfo()
|
||||
result.BrowserName, result.BrowserVersion = parser.Browser()
|
||||
result.IsMobile = parser.Mobile()
|
||||
this.pool.Put(parser)
|
||||
|
||||
// 忽略特殊字符
|
||||
if len(result.BrowserName) > 0 {
|
||||
@@ -87,19 +102,45 @@ func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResu
|
||||
}
|
||||
}
|
||||
|
||||
if this.cacheCursor == 0 {
|
||||
this.cacheMap1[userAgentKey] = result
|
||||
if len(this.cacheMap1) >= this.maxCacheItems {
|
||||
this.cacheCursor = 1
|
||||
this.cacheMap2 = map[uint64]UserAgentParserResult{}
|
||||
}
|
||||
} else {
|
||||
this.cacheMap2[userAgentKey] = result
|
||||
if len(this.cacheMap2) >= this.maxCacheItems {
|
||||
this.cacheCursor = 0
|
||||
this.cacheMap1 = map[uint64]UserAgentParserResult{}
|
||||
}
|
||||
}
|
||||
this.mu.Lock(shardingIndex)
|
||||
this.cacheMaps[shardingIndex][userAgentKey] = result
|
||||
this.mu.Unlock(shardingIndex)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// MaxCacheItems 读取能容纳的缓存最大数量
|
||||
func (this *UserAgentParser) MaxCacheItems() int {
|
||||
return this.maxCacheItems
|
||||
}
|
||||
|
||||
// Len 读取当前缓存数量
|
||||
func (this *UserAgentParser) Len() int {
|
||||
var total = 0
|
||||
for i := 0; i < userAgentShardingCount; i++ {
|
||||
this.mu.RLock(i)
|
||||
total += len(this.cacheMaps[i])
|
||||
this.mu.RUnlock(i)
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// GC 回收多余的缓存
|
||||
func (this *UserAgentParser) GC() {
|
||||
var total = this.Len()
|
||||
if total > this.maxCacheItems {
|
||||
for {
|
||||
var shardingIndex = this.gcIndex
|
||||
|
||||
this.mu.Lock(shardingIndex)
|
||||
total -= len(this.cacheMaps[shardingIndex])
|
||||
this.cacheMaps[shardingIndex] = map[uint64]UserAgentParserResult{}
|
||||
this.gcIndex = (this.gcIndex + 1) % userAgentShardingCount
|
||||
this.mu.Unlock(shardingIndex)
|
||||
|
||||
if total <= this.maxCacheItems {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,17 +1,21 @@
|
||||
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
|
||||
|
||||
package stats
|
||||
package stats_test
|
||||
|
||||
import (
|
||||
"github.com/TeaOSLab/EdgeNode/internal/stats"
|
||||
"github.com/TeaOSLab/EdgeNode/internal/utils/testutils"
|
||||
"github.com/iwind/TeaGo/assert"
|
||||
"github.com/iwind/TeaGo/rands"
|
||||
"github.com/iwind/TeaGo/types"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestUserAgentParser_Parse(t *testing.T) {
|
||||
var parser = NewUserAgentParser()
|
||||
var parser = stats.NewUserAgentParser()
|
||||
for i := 0; i < 4; i++ {
|
||||
t.Log(parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/1"))
|
||||
t.Log(parser.Parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"))
|
||||
@@ -19,7 +23,7 @@ func TestUserAgentParser_Parse(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestUserAgentParser_Parse_Unknown(t *testing.T) {
|
||||
var parser = NewUserAgentParser()
|
||||
var parser = stats.NewUserAgentParser()
|
||||
t.Log(parser.Parse("Mozilla/5.0 (Wind 10.0; WOW64; rv:49.0) Apple/537.36 (KHTML, like Gecko) Chr/88.0.4324.96 Sa/537.36 Test/1"))
|
||||
t.Log(parser.Parse(""))
|
||||
}
|
||||
@@ -28,10 +32,10 @@ func TestUserAgentParser_Memory(t *testing.T) {
|
||||
var stat1 = &runtime.MemStats{}
|
||||
runtime.ReadMemStats(stat1)
|
||||
|
||||
var parser = NewUserAgentParser()
|
||||
var parser = stats.NewUserAgentParser()
|
||||
|
||||
for i := 0; i < 1_000_000; i++ {
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
|
||||
}
|
||||
|
||||
runtime.GC()
|
||||
@@ -40,32 +44,76 @@ func TestUserAgentParser_Memory(t *testing.T) {
|
||||
var stat2 = &runtime.MemStats{}
|
||||
runtime.ReadMemStats(stat2)
|
||||
|
||||
t.Log("max cache items:", parser.maxCacheItems)
|
||||
t.Log("cache1:", len(parser.cacheMap1), "cache2:", len(parser.cacheMap2), "cache3:", (stat2.HeapInuse-stat1.HeapInuse)/1024/1024, "MB")
|
||||
t.Log("max cache items:", parser.MaxCacheItems())
|
||||
t.Log("cache:", parser.Len(), "usage:", (stat2.HeapInuse-stat1.HeapInuse)>>20, "MB")
|
||||
}
|
||||
|
||||
func BenchmarkUserAgentParser_Parse(b *testing.B) {
|
||||
var parser = NewUserAgentParser()
|
||||
for i := 0; i < b.N; i++ {
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
|
||||
func TestNewUserAgentParser_GC(t *testing.T) {
|
||||
if !testutils.IsSingleTesting() {
|
||||
return
|
||||
}
|
||||
b.Log(len(parser.cacheMap1), len(parser.cacheMap2))
|
||||
}
|
||||
|
||||
func BenchmarkUserAgentParser_Parse2(b *testing.B) {
|
||||
var parser = NewUserAgentParser()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var parser = stats.NewUserAgentParser()
|
||||
|
||||
for i := 0; i < 1_000_000; i++ {
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
|
||||
}
|
||||
b.Log(len(parser.cacheMap1), len(parser.cacheMap2))
|
||||
|
||||
time.Sleep(60 * time.Second) // wait to gc
|
||||
t.Log(parser.Len(), "cache items")
|
||||
}
|
||||
|
||||
func BenchmarkUserAgentParser_Parse3(b *testing.B) {
|
||||
var parser = NewUserAgentParser()
|
||||
func TestNewUserAgentParser_Mobile(t *testing.T) {
|
||||
var a = assert.NewAssertion(t)
|
||||
var parser = stats.NewUserAgentParser()
|
||||
for _, userAgent := range []string{
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148",
|
||||
"Mozilla/5.0 (Linux; U; Android 2.2.1; en-us; Nexus One Build/FRG83) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
|
||||
} {
|
||||
a.IsTrue(parser.Parse(userAgent).IsMobile)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkUserAgentParser_Parse_Many_LimitCPU(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
|
||||
var parser = stats.NewUserAgentParser()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
|
||||
}
|
||||
})
|
||||
b.Log(parser.Len())
|
||||
}
|
||||
|
||||
func BenchmarkUserAgentParser_Parse_Many(b *testing.B) {
|
||||
var parser = stats.NewUserAgentParser()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 1_000_000)))
|
||||
}
|
||||
})
|
||||
b.Log(parser.Len())
|
||||
}
|
||||
|
||||
func BenchmarkUserAgentParser_Parse_Few_LimitCPU(b *testing.B) {
|
||||
runtime.GOMAXPROCS(4)
|
||||
|
||||
var parser = stats.NewUserAgentParser()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
|
||||
}
|
||||
})
|
||||
b.Log(len(parser.cacheMap1), len(parser.cacheMap2))
|
||||
b.Log(parser.Len())
|
||||
}
|
||||
|
||||
func BenchmarkUserAgentParser_Parse_Few(b *testing.B) {
|
||||
var parser = stats.NewUserAgentParser()
|
||||
b.RunParallel(func(pb *testing.PB) {
|
||||
for pb.Next() {
|
||||
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
|
||||
}
|
||||
})
|
||||
b.Log(parser.Len())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user