优化UserAgent解析

This commit is contained in:
GoEdgeLab
2022-01-01 21:47:59 +08:00
parent be412f5740
commit e67b56f685
4 changed files with 148 additions and 5 deletions

View File

@@ -15,7 +15,6 @@ import (
"github.com/iwind/TeaGo/maps" "github.com/iwind/TeaGo/maps"
"github.com/iwind/TeaGo/types" "github.com/iwind/TeaGo/types"
timeutil "github.com/iwind/TeaGo/utils/time" timeutil "github.com/iwind/TeaGo/utils/time"
"github.com/mssola/user_agent"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@@ -177,7 +176,7 @@ func (this *HTTPRequestStatManager) AddFirewallRuleGroupId(serverId int64, firew
// Loop 单个循环 // Loop 单个循环
func (this *HTTPRequestStatManager) Loop() error { func (this *HTTPRequestStatManager) Loop() error {
timeout := time.NewTimer(10 * time.Minute) // 执行的最大时间 timeout := time.NewTimer(10 * time.Minute) // 执行的最大时间
userAgentParser := &user_agent.UserAgent{} userAgentParser := NewUserAgentParser()
Loop: Loop:
for { for {
select { select {
@@ -223,8 +222,8 @@ Loop:
serverId := userAgentString[:atIndex] serverId := userAgentString[:atIndex]
userAgent := userAgentString[atIndex+1:] userAgent := userAgentString[atIndex+1:]
userAgentParser.Parse(userAgent) var result = userAgentParser.Parse(userAgent)
osInfo := userAgentParser.OSInfo() var osInfo = result.os
if len(osInfo.Name) > 0 { if len(osInfo.Name) > 0 {
dotIndex := strings.Index(osInfo.Version, ".") dotIndex := strings.Index(osInfo.Version, ".")
if dotIndex > -1 { if dotIndex > -1 {
@@ -233,7 +232,7 @@ Loop:
this.systemMap[serverId+"@"+osInfo.Name+"@"+osInfo.Version]++ this.systemMap[serverId+"@"+osInfo.Name+"@"+osInfo.Version]++
} }
browser, browserVersion := userAgentParser.Browser() var browser, browserVersion = result.browserName, result.browserVersion
if len(browser) > 0 { if len(browser) > 0 {
dotIndex := strings.Index(browserVersion, ".") dotIndex := strings.Index(browserVersion, ".")
if dotIndex > -1 { if dotIndex > -1 {

View File

@@ -0,0 +1,82 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package stats
import (
"github.com/TeaOSLab/EdgeNode/internal/utils"
"github.com/mssola/user_agent"
)
// UserAgentParser UserAgent解析器
// 只支持单线程
type UserAgentParser struct {
parser *user_agent.UserAgent
cacheMap1 map[string]UserAgentParserResult
cacheMap2 map[string]UserAgentParserResult
maxCacheItems int
cacheCursor int
}
func NewUserAgentParser() *UserAgentParser {
var parser = &UserAgentParser{
parser: &user_agent.UserAgent{},
cacheMap1: map[string]UserAgentParserResult{},
cacheMap2: map[string]UserAgentParserResult{},
cacheCursor: 0,
}
parser.init()
return parser
}
func (this *UserAgentParser) init() {
var maxCacheItems = 10_000
var systemMemory = utils.SystemMemoryGB()
if systemMemory >= 16 {
maxCacheItems = 40_000
} else if systemMemory >= 8 {
maxCacheItems = 30_000
} else if systemMemory >= 4 {
maxCacheItems = 20_000
}
this.maxCacheItems = maxCacheItems
}
func (this *UserAgentParser) Parse(userAgent string) (result UserAgentParserResult) {
// 限制长度
if len(userAgent) == 0 || len(userAgent) > 256 {
return
}
cacheResult, ok := this.cacheMap1[userAgent]
if ok {
return cacheResult
}
cacheResult, ok = this.cacheMap2[userAgent]
if ok {
return cacheResult
}
this.parser.Parse(userAgent)
result.os = this.parser.OSInfo()
result.browserName, result.browserVersion = this.parser.Browser()
if this.cacheCursor == 0 {
this.cacheMap1[userAgent] = result
if len(this.cacheMap1) >= this.maxCacheItems {
this.cacheCursor = 1
this.cacheMap2 = map[string]UserAgentParserResult{}
}
} else {
this.cacheMap2[userAgent] = result
if len(this.cacheMap2) >= this.maxCacheItems {
this.cacheCursor = 0
this.cacheMap1 = map[string]UserAgentParserResult{}
}
}
return
}

View File

@@ -0,0 +1,11 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package stats
import "github.com/mssola/user_agent"
type UserAgentParserResult struct {
os user_agent.OSInfo
browserName string
browserVersion string
}

View File

@@ -0,0 +1,51 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package stats
import (
"github.com/iwind/TeaGo/rands"
"github.com/iwind/TeaGo/types"
"runtime"
"runtime/debug"
"testing"
)
func TestUserAgentParser_Parse(t *testing.T) {
var parser = NewUserAgentParser()
t.Log(parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/1"))
t.Log(parser.Parse("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"))
}
func TestUserAgentParser_Parse_Unknown(t *testing.T) {
var parser = NewUserAgentParser()
t.Log(parser.Parse("Mozilla/5.0 (Wind 10.0; WOW64; rv:49.0) Apple/537.36 (KHTML, like Gecko) Chr/88.0.4324.96 Sa/537.36 Test/1"))
t.Log(parser.Parse(""))
}
func TestUserAgentParser_Memory(t *testing.T) {
var stat1 = &runtime.MemStats{}
runtime.ReadMemStats(stat1)
var parser = NewUserAgentParser()
for i := 0; i < 1_000_000; i++ {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 100_000)))
}
runtime.GC()
debug.FreeOSMemory()
var stat2 = &runtime.MemStats{}
runtime.ReadMemStats(stat2)
t.Log("max cache items:", parser.maxCacheItems)
t.Log("cache1:", len(parser.cacheMap1), "cache2:", len(parser.cacheMap2), "cache3:", (stat2.HeapInuse-stat1.HeapInuse)/1024/1024, "MB")
}
func BenchmarkUserAgentParser_Parse(b *testing.B) {
var parser = NewUserAgentParser()
for i := 0; i < b.N; i++ {
parser.Parse("Mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.96 Safari/537.36 Test/" + types.String(rands.Int(0, 40000)))
}
b.Log(len(parser.cacheMap1), len(parser.cacheMap2))
}