增加自动Agent识别

This commit is contained in:
GoEdgeLab
2022-12-22 11:38:59 +08:00
parent d618636cf5
commit cdd268178c
15 changed files with 794 additions and 3 deletions

View File

@@ -12,5 +12,5 @@ func (this *HTTPRequest) doStat() {
// 内置的统计
stats.SharedHTTPRequestStatManager.AddRemoteAddr(this.ReqServer.Id, this.requestRemoteAddr(true), this.writer.SentBodyBytes(), this.isAttack)
stats.SharedHTTPRequestStatManager.AddUserAgent(this.ReqServer.Id, this.requestHeader("User-Agent"))
stats.SharedHTTPRequestStatManager.AddUserAgent(this.ReqServer.Id, this.requestHeader("User-Agent"), this.remoteAddr)
}

View File

@@ -25,7 +25,8 @@ import (
"github.com/TeaOSLab/EdgeNode/internal/stats"
"github.com/TeaOSLab/EdgeNode/internal/trackers"
"github.com/TeaOSLab/EdgeNode/internal/utils"
_ "github.com/TeaOSLab/EdgeNode/internal/utils/clock" // 触发时钟更新
_ "github.com/TeaOSLab/EdgeNode/internal/utils/agents" // 引入Agent管理器
_ "github.com/TeaOSLab/EdgeNode/internal/utils/clock" // 触发时钟更新
"github.com/TeaOSLab/EdgeNode/internal/utils/jsonutils"
"github.com/TeaOSLab/EdgeNode/internal/waf"
"github.com/andybalholm/brotli"

View File

@@ -50,6 +50,7 @@ type RPCClient struct {
SSLCertRPC pb.SSLCertServiceClient
ScriptRPC pb.ScriptServiceClient
UserRPC pb.UserServiceClient
ClientAgentIPRPC pb.ClientAgentIPServiceClient
}
func NewRPCClient(apiConfig *configs.APIConfig) (*RPCClient, error) {
@@ -83,6 +84,7 @@ func NewRPCClient(apiConfig *configs.APIConfig) (*RPCClient, error) {
client.SSLCertRPC = pb.NewSSLCertServiceClient(client)
client.ScriptRPC = pb.NewScriptServiceClient(client)
client.UserRPC = pb.NewUserServiceClient(client)
client.ClientAgentIPRPC = pb.NewClientAgentIPServiceClient(client)
err := client.init()
if err != nil {

View File

@@ -11,6 +11,7 @@ import (
"github.com/TeaOSLab/EdgeNode/internal/rpc"
"github.com/TeaOSLab/EdgeNode/internal/trackers"
"github.com/TeaOSLab/EdgeNode/internal/utils"
"github.com/TeaOSLab/EdgeNode/internal/utils/agents"
"github.com/TeaOSLab/EdgeNode/internal/waf"
"github.com/iwind/TeaGo/Tea"
"github.com/iwind/TeaGo/maps"
@@ -146,11 +147,16 @@ func (this *HTTPRequestStatManager) AddRemoteAddr(serverId int64, remoteAddr str
}
// AddUserAgent 添加UserAgent
func (this *HTTPRequestStatManager) AddUserAgent(serverId int64, userAgent string) {
func (this *HTTPRequestStatManager) AddUserAgent(serverId int64, userAgent string, ip string) {
if len(userAgent) == 0 {
return
}
// 是否包含一些知名Agent
if len(userAgent) > 0 && len(ip) > 0 && agents.IsAgentFromUserAgent(userAgent) {
agents.SharedQueue.Push(ip)
}
select {
case this.userAgentChan <- strconv.FormatInt(serverId, 10) + "@" + userAgent:
default:

View File

@@ -0,0 +1,39 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents
import (
"regexp"
"strings"
)
type Agent struct {
Code string
Keywords []string // user agent keywords
suffixes []string // PTR suffixes
reg *regexp.Regexp
}
func NewAgent(code string, suffixes []string, reg *regexp.Regexp, keywords []string) *Agent {
return &Agent{
Code: code,
suffixes: suffixes,
reg: reg,
Keywords: keywords,
}
}
func (this *Agent) Match(ptr string) bool {
if len(this.suffixes) > 0 {
for _, suffix := range this.suffixes {
if strings.HasSuffix(ptr, suffix) {
return true
}
}
}
if this.reg != nil {
return this.reg.MatchString(ptr)
}
return false
}

View File

@@ -0,0 +1,9 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents
type AgentIP struct {
Id int64
IP string
AgentCode string
}

View File

@@ -0,0 +1,31 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents
import "strings"
var AllAgents = []*Agent{
NewAgent("baidu", []string{".baidu.com."}, nil, []string{"Baidu"}),
NewAgent("google", []string{".googlebot.com."}, nil, []string{"Google"}),
NewAgent("bing", []string{".search.msn.com."}, nil, []string{"bingbot"}),
NewAgent("sogou", []string{".sogou.com."}, nil, []string{"Sogou"}),
NewAgent("youdao", []string{".163.com."}, nil, []string{"Youdao"}),
NewAgent("yahoo", []string{".yahoo.com."}, nil, []string{"Yahoo"}),
NewAgent("bytedance", []string{".bytedance.com."}, nil, []string{"Bytespider"}),
NewAgent("sm", []string{".sm.cn."}, nil, []string{"YisouSpider"}),
NewAgent("yandex", []string{".yandex.com.", ".yndx.net."}, nil, []string{"Yandex"}),
NewAgent("semrush", []string{".semrush.com."}, nil, []string{"SEMrush"}),
}
func IsAgentFromUserAgent(userAgent string) bool {
for _, agent := range AllAgents {
if len(agent.Keywords) > 0 {
for _, keyword := range agent.Keywords {
if strings.Contains(userAgent, keyword) {
return true
}
}
}
}
return false
}

View File

@@ -0,0 +1,19 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents_test
import (
"github.com/TeaOSLab/EdgeNode/internal/utils/agents"
"testing"
)
func TestIsAgentFromUserAgent(t *testing.T) {
t.Log(agents.IsAgentFromUserAgent("Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"))
t.Log(agents.IsAgentFromUserAgent("Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;)"))
}
func BenchmarkIsAgentFromUserAgent(b *testing.B) {
for i := 0; i < b.N; i++ {
agents.IsAgentFromUserAgent("Mozilla/5.0 (Linux;u;Android 4.2.2;zh-cn;) AppleWebKit/534.46 (KHTML,like Gecko) Version/5.1 Mobile Safari/10600.6.3 (compatible; Yaho)")
}
}

156
internal/utils/agents/db.go Normal file
View File

@@ -0,0 +1,156 @@
// Copyright 2021 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
package agents
import (
"database/sql"
"errors"
"github.com/TeaOSLab/EdgeNode/internal/events"
"github.com/TeaOSLab/EdgeNode/internal/remotelogs"
"github.com/iwind/TeaGo/Tea"
"github.com/iwind/TeaGo/types"
_ "github.com/mattn/go-sqlite3"
"log"
"os"
"path/filepath"
)
const (
tableAgentIPs = "agentIPs"
)
type DB struct {
db *sql.DB
path string
insertAgentIPStmt *sql.Stmt
listAgentIPsStmt *sql.Stmt
}
func NewDB(path string) *DB {
var db = &DB{path: path}
events.On(events.EventQuit, func() {
_ = db.Close()
})
return db
}
func (this *DB) Init() error {
// 检查目录是否存在
var dir = filepath.Dir(this.path)
_, err := os.Stat(dir)
if err != nil {
err = os.MkdirAll(dir, 0777)
if err != nil {
return err
}
remotelogs.Println("DB", "create database dir '"+dir+"'")
}
// TODO 思考 data.db 的数据安全性
db, err := sql.Open("sqlite3", "file:"+this.path+"?cache=shared&mode=rwc&_journal_mode=WAL")
if err != nil {
return err
}
db.SetMaxOpenConns(1)
/**_, err = db.Exec("VACUUM")
if err != nil {
return err
}**/
_, err = db.Exec(`CREATE TABLE IF NOT EXISTS "` + tableAgentIPs + `" (
"id" integer NOT NULL PRIMARY KEY AUTOINCREMENT,
"ip" varchar(64),
"agentCode" varchar(128)
);`)
if err != nil {
return err
}
// 预编译语句
// agent ip record statements
this.insertAgentIPStmt, err = db.Prepare(`INSERT INTO "` + tableAgentIPs + `" ("id", "ip", "agentCode") VALUES (?, ?, ?)`)
if err != nil {
return err
}
this.listAgentIPsStmt, err = db.Prepare(`SELECT "id", "ip", "agentCode" FROM "` + tableAgentIPs + `" ORDER BY "id" ASC LIMIT ? OFFSET ?`)
if err != nil {
return err
}
this.db = db
return nil
}
func (this *DB) InsertAgentIP(ipId int64, ip string, agentCode string) error {
if this.db == nil {
return errors.New("db should not be nil")
}
this.log("InsertAgentIP", "id:", ipId, "ip:", ip, "agent:", agentCode)
_, err := this.insertAgentIPStmt.Exec(ipId, ip, agentCode)
if err != nil {
return err
}
return nil
}
func (this *DB) ListAgentIPs(offset int64, size int64) (agentIPs []*AgentIP, err error) {
if this.db == nil {
return nil, errors.New("db should not be nil")
}
rows, err := this.listAgentIPsStmt.Query(size, offset)
if err != nil {
return nil, err
}
defer func() {
_ = rows.Close()
}()
for rows.Next() {
var agentIP = &AgentIP{}
err = rows.Scan(&agentIP.Id, &agentIP.IP, &agentIP.AgentCode)
if err != nil {
return nil, err
}
agentIPs = append(agentIPs, agentIP)
}
return
}
func (this *DB) Close() error {
if this.db == nil {
return nil
}
for _, stmt := range []*sql.Stmt{
this.insertAgentIPStmt,
this.listAgentIPsStmt,
} {
if stmt != nil {
_ = stmt.Close()
}
}
return this.db.Close()
}
// 打印日志
func (this *DB) log(args ...any) {
if !Tea.IsTesting() {
return
}
if len(args) == 0 {
return
}
args[0] = "[" + types.String(args[0]) + "]"
log.Println(args...)
}

View File

@@ -0,0 +1,54 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents
import (
"github.com/TeaOSLab/EdgeNode/internal/zero"
"sync"
)
type IPCacheMap struct {
m map[string]zero.Zero
list []string
locker sync.RWMutex
maxLen int
}
func NewIPCacheMap(maxLen int) *IPCacheMap {
if maxLen <= 0 {
maxLen = 65535
}
return &IPCacheMap{
m: map[string]zero.Zero{},
maxLen: maxLen,
}
}
func (this *IPCacheMap) Add(ip string) {
this.locker.Lock()
defer this.locker.Unlock()
// 是否已经存在
_, ok := this.m[ip]
if ok {
return
}
// 超出长度删除第一个
if len(this.list) >= this.maxLen {
delete(this.m, this.list[0])
this.list = this.list[1:]
}
// 加入新数据
this.m[ip] = zero.Zero{}
this.list = append(this.list, ip)
}
func (this *IPCacheMap) Contains(ip string) bool {
this.locker.RLock()
defer this.locker.RUnlock()
_, ok := this.m[ip]
return ok
}

View File

@@ -0,0 +1,33 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents
import (
"github.com/iwind/TeaGo/logs"
"testing"
)
func TestNewIPCacheMap(t *testing.T) {
var cacheMap = NewIPCacheMap(3)
t.Log("====")
cacheMap.Add("1")
cacheMap.Add("2")
logs.PrintAsJSON(cacheMap.m, t)
logs.PrintAsJSON(cacheMap.list, t)
t.Log("====")
cacheMap.Add("3")
logs.PrintAsJSON(cacheMap.m, t)
logs.PrintAsJSON(cacheMap.list, t)
t.Log("====")
cacheMap.Add("4")
logs.PrintAsJSON(cacheMap.m, t)
logs.PrintAsJSON(cacheMap.list, t)
t.Log("====")
cacheMap.Add("3")
logs.PrintAsJSON(cacheMap.m, t)
logs.PrintAsJSON(cacheMap.list, t)
}

View File

@@ -0,0 +1,200 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents
import (
"github.com/TeaOSLab/EdgeCommon/pkg/rpc/pb"
"github.com/TeaOSLab/EdgeNode/internal/events"
"github.com/TeaOSLab/EdgeNode/internal/goman"
"github.com/TeaOSLab/EdgeNode/internal/remotelogs"
"github.com/TeaOSLab/EdgeNode/internal/rpc"
"github.com/iwind/TeaGo/Tea"
"sync"
"time"
)
var SharedManager = NewManager()
func init() {
events.On(events.EventLoaded, func() {
goman.New(func() {
SharedManager.Start()
})
})
}
// Manager Agent管理器
type Manager struct {
ipMap map[string]string // ip => agentCode
locker sync.RWMutex
db *DB
lastId int64
}
func NewManager() *Manager {
return &Manager{
ipMap: map[string]string{},
}
}
func (this *Manager) SetDB(db *DB) {
this.db = db
}
func (this *Manager) Start() {
remotelogs.Println("AGENT_MANAGER", "starting ...")
err := this.loadDB()
if err != nil {
remotelogs.Error("AGENT_MANAGER", "load database failed: "+err.Error())
return
}
// 从本地数据库中加载
err = this.Load()
if err != nil {
remotelogs.Error("AGENT_MANAGER", "load failed: "+err.Error())
}
// 先从API获取
err = this.LoopAll()
if err != nil {
if rpc.IsConnError(err) {
remotelogs.Debug("AGENT_MANAGER", "retrieve latest agent ip failed: "+err.Error())
} else {
remotelogs.Error("AGENT_MANAGER", "retrieve latest agent ip failed: "+err.Error())
}
}
// 定时获取
var duration = 30 * time.Second
if Tea.IsTesting() {
duration = 30 * time.Second
}
var ticker = time.NewTicker(duration)
for range ticker.C {
err = this.LoopAll()
if err != nil {
remotelogs.Error("AGENT_MANAGER", "retrieve latest agent ip failed: "+err.Error())
}
}
}
func (this *Manager) Load() error {
var offset int64 = 0
var size int64 = 10000
for {
agentIPs, err := this.db.ListAgentIPs(offset, size)
if err != nil {
return err
}
if len(agentIPs) == 0 {
break
}
for _, agentIP := range agentIPs {
this.locker.Lock()
this.ipMap[agentIP.IP] = agentIP.AgentCode
this.locker.Unlock()
if agentIP.Id > this.lastId {
this.lastId = agentIP.Id
}
}
offset += size
}
return nil
}
func (this *Manager) LoopAll() error {
for {
hasNext, err := this.Loop()
if err != nil {
return err
}
if !hasNext {
break
}
}
return nil
}
// Loop 单次循环获取数据
func (this *Manager) Loop() (hasNext bool, err error) {
rpcClient, err := rpc.SharedRPC()
if err != nil {
return false, err
}
ipsResp, err := rpcClient.ClientAgentIPRPC.ListClientAgentIPsAfterId(rpcClient.Context(), &pb.ListClientAgentIPsAfterIdRequest{
Id: this.lastId,
Size: 10000,
})
if err != nil {
return false, err
}
if len(ipsResp.ClientAgentIPs) == 0 {
return false, nil
}
for _, agentIP := range ipsResp.ClientAgentIPs {
if agentIP.ClientAgent == nil {
// 设置ID
if agentIP.Id > this.lastId {
this.lastId = agentIP.Id
}
continue
}
// 写入到数据库
err = this.db.InsertAgentIP(agentIP.Id, agentIP.Ip, agentIP.ClientAgent.Code)
if err != nil {
return false, err
}
// 写入Map
this.locker.Lock()
this.ipMap[agentIP.Ip] = agentIP.ClientAgent.Code
this.locker.Unlock()
// 设置ID
if agentIP.Id > this.lastId {
this.lastId = agentIP.Id
}
}
return true, nil
}
// AddIP 添加记录
func (this *Manager) AddIP(ip string, agentCode string) {
this.locker.Lock()
this.ipMap[ip] = agentCode
this.locker.Unlock()
}
// LookupIP 查询IP所属Agent
func (this *Manager) LookupIP(ip string) (agentCode string) {
this.locker.RLock()
defer this.locker.RUnlock()
return this.ipMap[ip]
}
// ContainsIP 检查是否有IP相关数据
func (this *Manager) ContainsIP(ip string) bool {
this.locker.RLock()
defer this.locker.RUnlock()
_, ok := this.ipMap[ip]
return ok
}
func (this *Manager) loadDB() error {
var db = NewDB(Tea.Root + "/data/agents.db")
err := db.Init()
if err != nil {
return err
}
this.db = db
return nil
}

View File

@@ -0,0 +1,32 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents_test
import (
"github.com/TeaOSLab/EdgeNode/internal/utils/agents"
"github.com/iwind/TeaGo/Tea"
_ "github.com/iwind/TeaGo/bootstrap"
"testing"
)
func TestNewManager(t *testing.T) {
var db = agents.NewDB(Tea.Root + "/data/agents.db")
err := db.Init()
if err != nil {
t.Fatal(err)
}
var manager = agents.NewManager()
manager.SetDB(db)
err = manager.Load()
if err != nil {
t.Fatal(err)
}
_, err = manager.Loop()
if err != nil {
t.Fatal(err)
}
t.Log(manager.LookupIP("192.168.3.100"))
}

View File

@@ -0,0 +1,133 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents
import (
"github.com/TeaOSLab/EdgeCommon/pkg/rpc/pb"
"github.com/TeaOSLab/EdgeNode/internal/events"
"github.com/TeaOSLab/EdgeNode/internal/goman"
"github.com/TeaOSLab/EdgeNode/internal/remotelogs"
"github.com/TeaOSLab/EdgeNode/internal/rpc"
"github.com/iwind/TeaGo/Tea"
"net"
)
func init() {
events.On(events.EventLoaded, func() {
goman.New(func() {
SharedQueue.Start()
})
})
}
var SharedQueue = NewQueue()
type Queue struct {
c chan string // chan ip
cacheMap *IPCacheMap
}
func NewQueue() *Queue {
return &Queue{
c: make(chan string, 128),
cacheMap: NewIPCacheMap(65535),
}
}
func (this *Queue) Start() {
for ip := range this.c {
err := this.Process(ip)
if err != nil {
// 不需要上报错误
if Tea.IsTesting() {
remotelogs.Debug("SharedParseQueue", err.Error())
}
continue
}
}
}
// Push 将IP加入到处理队列
func (this *Queue) Push(ip string) {
// 是否在处理中
if this.cacheMap.Contains(ip) {
return
}
this.cacheMap.Add(ip)
// 加入到队列
select {
case this.c <- ip:
default:
}
}
// Process 处理IP
func (this *Queue) Process(ip string) error {
// 是否已经在库中
if SharedManager.ContainsIP(ip) {
return nil
}
ptr, err := this.ParseIP(ip)
if err != nil {
return err
}
if len(ptr) == 0 || ptr == "." {
return nil
}
//remotelogs.Debug("AGENT", ip+" => "+ptr)
var agentCode = this.ParsePtr(ptr)
if len(agentCode) == 0 {
return nil
}
// 加入到本地
SharedManager.AddIP(ip, agentCode)
var pbAgentIP = &pb.CreateClientAgentIPsRequest_AgentIPInfo{
AgentCode: agentCode,
Ip: ip,
Ptr: ptr,
}
rpcClient, err := rpc.SharedRPC()
if err != nil {
return err
}
_, err = rpcClient.ClientAgentIPRPC.CreateClientAgentIPs(rpcClient.Context(), &pb.CreateClientAgentIPsRequest{AgentIPs: []*pb.CreateClientAgentIPsRequest_AgentIPInfo{pbAgentIP}})
if err != nil {
return err
}
return nil
}
// ParseIP 分析IP的PTR值
func (this *Queue) ParseIP(ip string) (ptr string, err error) {
if len(ip) == 0 {
return "", nil
}
names, err := net.LookupAddr(ip)
if err != nil {
return "", err
}
if len(names) == 0 {
return "", nil
}
return names[0], nil
}
// ParsePtr 分析PTR对应的Agent
func (this *Queue) ParsePtr(ptr string) (agentCode string) {
for _, agent := range AllAgents {
if agent.Match(ptr) {
return agent.Code
}
}
return ""
}

View File

@@ -0,0 +1,76 @@
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved. Official site: https://goedge.cn .
package agents_test
import (
"github.com/TeaOSLab/EdgeNode/internal/utils/agents"
"github.com/iwind/TeaGo/assert"
_ "github.com/iwind/TeaGo/bootstrap"
"testing"
"time"
)
func TestParseQueue_Process(t *testing.T) {
var queue = agents.NewQueue()
go queue.Start()
time.Sleep(1 * time.Second)
queue.Push("220.181.13.100")
time.Sleep(1 * time.Second)
}
func TestParseQueue_ParseIP(t *testing.T) {
var queue = agents.NewQueue()
for _, ip := range []string{
"192.168.1.100",
"42.120.160.1",
"42.236.10.98",
"124.115.0.100",
} {
ptr, err := queue.ParseIP(ip)
if err != nil {
t.Log(ip, "=>", err)
continue
}
t.Log(ip, "=>", ptr)
}
}
func TestParseQueue_ParsePtr(t *testing.T) {
var a = assert.NewAssertion(t)
var queue = agents.NewQueue()
for _, s := range [][]string{
{"baiduspider-220-181-108-101.crawl.baidu.com.", "baidu"},
{"crawl-66-249-71-219.googlebot.com.", "google"},
{"msnbot-40-77-167-31.search.msn.com.", "bing"},
{"sogouspider-49-7-20-129.crawl.sogou.com.", "sogou"},
{"m13102.mail.163.com.", "youdao"},
{"yeurosport.pat1.tc2.yahoo.com.", "yahoo"},
{"shenmaspider-42-120-160-1.crawl.sm.cn.", "sm"},
{"93-158-161-39.spider.yandex.com.", "yandex"},
{"25.bl.bot.semrush.com.", "semrush"},
} {
a.IsTrue(queue.ParsePtr(s[0]) == s[1])
}
}
func BenchmarkQueue_ParsePtr(b *testing.B) {
var queue = agents.NewQueue()
for i := 0; i < b.N; i++ {
for _, s := range [][]string{
{"baiduspider-220-181-108-101.crawl.baidu.com.", "baidu"},
{"crawl-66-249-71-219.googlebot.com.", "google"},
{"msnbot-40-77-167-31.search.msn.com.", "bing"},
{"sogouspider-49-7-20-129.crawl.sogou.com.", "sogou"},
{"m13102.mail.163.com.", "youdao"},
{"yeurosport.pat1.tc2.yahoo.com.", "yahoo"},
{"shenmaspider-42-120-160-1.crawl.sm.cn.", "sm"},
{"93-158-161-39.spider.yandex.com.", "yandex"},
{"93.158.164.218-red.dhcp.yndx.net.", "yandex"},
{"25.bl.bot.semrush.com.", "semrush"},
} {
queue.ParsePtr(s[0])
}
}
}