From 67e18b723b0c0db6be9eb055a340c6949f51c362 Mon Sep 17 00:00:00 2001 From: GoEdgeLab Date: Sun, 9 Apr 2023 17:50:54 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E5=A5=BD=E5=9C=B0=E4=BB=8E=E8=AE=BF?= =?UTF-8?q?=E9=97=AE=E6=97=A5=E5=BF=97=E4=B8=AD=E5=88=A0=E9=99=A4=E9=9D=9E?= =?UTF-8?q?UTF-8=E5=AD=97=E7=AC=A6=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/nodes/http_access_log_queue.go | 19 +++++++++++++++++-- internal/nodes/http_access_log_queue_test.go | 11 +++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/internal/nodes/http_access_log_queue.go b/internal/nodes/http_access_log_queue.go index e03fcaa..3c7ed32 100644 --- a/internal/nodes/http_access_log_queue.go +++ b/internal/nodes/http_access_log_queue.go @@ -11,6 +11,7 @@ import ( "google.golang.org/grpc/status" "strings" "time" + "unicode/utf8" ) var sharedHTTPAccessLogQueue = NewHTTPAccessLogQueue() @@ -139,6 +140,7 @@ Loop: // ToValidUTF8 处理访问日志中的非UTF-8字节 func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) { + accessLog.RemoteAddr = utils.ToValidUTF8string(accessLog.RemoteAddr) accessLog.RemoteUser = utils.ToValidUTF8string(accessLog.RemoteUser) accessLog.RequestURI = utils.ToValidUTF8string(accessLog.RequestURI) accessLog.RequestPath = utils.ToValidUTF8string(accessLog.RequestPath) @@ -147,7 +149,12 @@ func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) { accessLog.Host = utils.ToValidUTF8string(accessLog.Host) accessLog.Hostname = utils.ToValidUTF8string(accessLog.Hostname) - for _, v := range accessLog.SentHeader { + for k, v := range accessLog.SentHeader { + if !utf8.ValidString(k) { + delete(accessLog.SentHeader, k) + continue + } + for index, s := range v.Values { v.Values[index] = utils.ToValidUTF8string(s) } @@ -159,13 +166,21 @@ func (this *HTTPAccessLogQueue) ToValidUTF8(accessLog *pb.HTTPAccessLog) { accessLog.ContentType = utils.ToValidUTF8string(accessLog.ContentType) for k, c := range accessLog.Cookie { + if !utf8.ValidString(k) { + delete(accessLog.Cookie, k) + continue + } accessLog.Cookie[k] = utils.ToValidUTF8string(c) } accessLog.Args = utils.ToValidUTF8string(accessLog.Args) accessLog.QueryString = utils.ToValidUTF8string(accessLog.QueryString) - for _, v := range accessLog.Header { + for k, v := range accessLog.Header { + if !utf8.ValidString(k) { + delete(accessLog.Header, k) + continue + } for index, s := range v.Values { v.Values[index] = utils.ToValidUTF8string(s) } diff --git a/internal/nodes/http_access_log_queue_test.go b/internal/nodes/http_access_log_queue_test.go index d2d81fa..a4ba9a9 100644 --- a/internal/nodes/http_access_log_queue_test.go +++ b/internal/nodes/http_access_log_queue_test.go @@ -17,6 +17,7 @@ import ( "strings" "testing" "time" + "unicode/utf8" ) func TestHTTPAccessLogQueue_Push(t *testing.T) { @@ -135,6 +136,16 @@ func TestHTTPAccessLogQueue_Memory(t *testing.T) { time.Sleep(5 * time.Second) } +func TestUTF8_IsValid(t *testing.T) { + t.Log(utf8.ValidString("abc")) + + var noneUTF8Bytes = []byte{} + for i := 0; i < 254; i++ { + noneUTF8Bytes = append(noneUTF8Bytes, uint8(i)) + } + t.Log(utf8.ValidString(string(noneUTF8Bytes))) +} + func BenchmarkHTTPAccessLogQueue_ToValidUTF8(b *testing.B) { runtime.GOMAXPROCS(1)