更好地从访问日志中删除非UTF-8字符内容

This commit is contained in:
GoEdgeLab
2023-04-09 17:50:54 +08:00
parent ab3e309ea7
commit 67e18b723b
2 changed files with 28 additions and 2 deletions

View File

@@ -11,6 +11,7 @@ import (
"google.golang.org/grpc/status" "google.golang.org/grpc/status"
"strings" "strings"
"time" "time"
"unicode/utf8"
) )
var sharedHTTPAccessLogQueue = NewHTTPAccessLogQueue() var sharedHTTPAccessLogQueue = NewHTTPAccessLogQueue()
@@ -139,6 +140,7 @@ Loop:
// ToValidUTF8 处理访问日志中的非UTF-8字节 // ToValidUTF8 处理访问日志中的非UTF-8字节
func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) { func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) {
accessLog.RemoteAddr = utils.ToValidUTF8string(accessLog.RemoteAddr)
accessLog.RemoteUser = utils.ToValidUTF8string(accessLog.RemoteUser) accessLog.RemoteUser = utils.ToValidUTF8string(accessLog.RemoteUser)
accessLog.RequestURI = utils.ToValidUTF8string(accessLog.RequestURI) accessLog.RequestURI = utils.ToValidUTF8string(accessLog.RequestURI)
accessLog.RequestPath = utils.ToValidUTF8string(accessLog.RequestPath) accessLog.RequestPath = utils.ToValidUTF8string(accessLog.RequestPath)
@@ -147,7 +149,12 @@ func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) {
accessLog.Host = utils.ToValidUTF8string(accessLog.Host) accessLog.Host = utils.ToValidUTF8string(accessLog.Host)
accessLog.Hostname = utils.ToValidUTF8string(accessLog.Hostname) accessLog.Hostname = utils.ToValidUTF8string(accessLog.Hostname)
for _, v := range accessLog.SentHeader { for k, v := range accessLog.SentHeader {
if !utf8.ValidString(k) {
delete(accessLog.SentHeader, k)
continue
}
for index, s := range v.Values { for index, s := range v.Values {
v.Values[index] = utils.ToValidUTF8string(s) v.Values[index] = utils.ToValidUTF8string(s)
} }
@@ -159,13 +166,21 @@ func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) {
accessLog.ContentType = utils.ToValidUTF8string(accessLog.ContentType) accessLog.ContentType = utils.ToValidUTF8string(accessLog.ContentType)
for k, c := range accessLog.Cookie { for k, c := range accessLog.Cookie {
if !utf8.ValidString(k) {
delete(accessLog.Cookie, k)
continue
}
accessLog.Cookie[k] = utils.ToValidUTF8string(c) accessLog.Cookie[k] = utils.ToValidUTF8string(c)
} }
accessLog.Args = utils.ToValidUTF8string(accessLog.Args) accessLog.Args = utils.ToValidUTF8string(accessLog.Args)
accessLog.QueryString = utils.ToValidUTF8string(accessLog.QueryString) accessLog.QueryString = utils.ToValidUTF8string(accessLog.QueryString)
for _, v := range accessLog.Header { for k, v := range accessLog.Header {
if !utf8.ValidString(k) {
delete(accessLog.Header, k)
continue
}
for index, s := range v.Values { for index, s := range v.Values {
v.Values[index] = utils.ToValidUTF8string(s) v.Values[index] = utils.ToValidUTF8string(s)
} }

View File

@@ -17,6 +17,7 @@ import (
"strings" "strings"
"testing" "testing"
"time" "time"
"unicode/utf8"
) )
func TestHTTPAccessLogQueue_Push(t *testing.T) { func TestHTTPAccessLogQueue_Push(t *testing.T) {
@@ -135,6 +136,16 @@ func TestHTTPAccessLogQueue_Memory(t *testing.T) {
time.Sleep(5 * time.Second) time.Sleep(5 * time.Second)
} }
func TestUTF8_IsValid(t *testing.T) {
t.Log(utf8.ValidString("abc"))
var noneUTF8Bytes = []byte{}
for i := 0; i < 254; i++ {
noneUTF8Bytes = append(noneUTF8Bytes, uint8(i))
}
t.Log(utf8.ValidString(string(noneUTF8Bytes)))
}
func BenchmarkHTTPAccessLogQueue_ToValidUTF8(b *testing.B) { func BenchmarkHTTPAccessLogQueue_ToValidUTF8(b *testing.B) {
runtime.GOMAXPROCS(1) runtime.GOMAXPROCS(1)