更好地从访问日志中删除非UTF-8字符内容

This commit is contained in:
GoEdgeLab
2023-04-09 17:50:54 +08:00
parent ab3e309ea7
commit 67e18b723b
2 changed files with 28 additions and 2 deletions

View File

@@ -11,6 +11,7 @@ import (
"google.golang.org/grpc/status"
"strings"
"time"
"unicode/utf8"
)
var sharedHTTPAccessLogQueue = NewHTTPAccessLogQueue()
@@ -139,6 +140,7 @@ Loop:
// ToValidUTF8 处理访问日志中的非UTF-8字节
func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) {
accessLog.RemoteAddr = utils.ToValidUTF8string(accessLog.RemoteAddr)
accessLog.RemoteUser = utils.ToValidUTF8string(accessLog.RemoteUser)
accessLog.RequestURI = utils.ToValidUTF8string(accessLog.RequestURI)
accessLog.RequestPath = utils.ToValidUTF8string(accessLog.RequestPath)
@@ -147,7 +149,12 @@ func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) {
accessLog.Host = utils.ToValidUTF8string(accessLog.Host)
accessLog.Hostname = utils.ToValidUTF8string(accessLog.Hostname)
for _, v := range accessLog.SentHeader {
for k, v := range accessLog.SentHeader {
if !utf8.ValidString(k) {
delete(accessLog.SentHeader, k)
continue
}
for index, s := range v.Values {
v.Values[index] = utils.ToValidUTF8string(s)
}
@@ -159,13 +166,21 @@ func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) {
accessLog.ContentType = utils.ToValidUTF8string(accessLog.ContentType)
for k, c := range accessLog.Cookie {
if !utf8.ValidString(k) {
delete(accessLog.Cookie, k)
continue
}
accessLog.Cookie[k] = utils.ToValidUTF8string(c)
}
accessLog.Args = utils.ToValidUTF8string(accessLog.Args)
accessLog.QueryString = utils.ToValidUTF8string(accessLog.QueryString)
for _, v := range accessLog.Header {
for k, v := range accessLog.Header {
if !utf8.ValidString(k) {
delete(accessLog.Header, k)
continue
}
for index, s := range v.Values {
v.Values[index] = utils.ToValidUTF8string(s)
}

View File

@@ -17,6 +17,7 @@ import (
"strings"
"testing"
"time"
"unicode/utf8"
)
func TestHTTPAccessLogQueue_Push(t *testing.T) {
@@ -135,6 +136,16 @@ func TestHTTPAccessLogQueue_Memory(t *testing.T) {
time.Sleep(5 * time.Second)
}
func TestUTF8_IsValid(t *testing.T) {
t.Log(utf8.ValidString("abc"))
var noneUTF8Bytes = []byte{}
for i := 0; i < 254; i++ {
noneUTF8Bytes = append(noneUTF8Bytes, uint8(i))
}
t.Log(utf8.ValidString(string(noneUTF8Bytes)))
}
func BenchmarkHTTPAccessLogQueue_ToValidUTF8(b *testing.B) {
runtime.GOMAXPROCS(1)