From 511e0d409f0380e4a12885db3bf79243dc0a79be Mon Sep 17 00:00:00 2001 From: GoEdgeLab Date: Fri, 7 Apr 2023 19:19:53 +0800 Subject: [PATCH] =?UTF-8?q?URL=E8=B7=B3=E8=BD=AC=E6=97=B6=E9=BB=98?= =?UTF-8?q?=E8=AE=A4=E5=AF=B9=E6=90=9C=E7=B4=A2=E5=BC=95=E6=93=8E=E8=AE=BF?= =?UTF-8?q?=E9=97=AE=E4=BD=BF=E7=94=A8301=EF=BC=8C=E4=BB=A5=E6=8F=90?= =?UTF-8?q?=E5=8D=87SEO=E6=95=88=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/nodes/http_request_host_redirect.go | 47 +++++++++---------- internal/nodes/http_request_redirect_https.go | 10 ++-- internal/nodes/http_request_utils.go | 6 ++- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/internal/nodes/http_request_host_redirect.go b/internal/nodes/http_request_host_redirect.go index 4850569..af745bf 100644 --- a/internal/nodes/http_request_host_redirect.go +++ b/internal/nodes/http_request_host_redirect.go @@ -25,6 +25,16 @@ func (this *HTTPRequest) doHostRedirect() (blocked bool) { if !u.MatchRequest(this.Format) { continue } + + var status = u.Status + if status <= 0 { + if searchEngineRegex.MatchString(this.RawReq.UserAgent()) { + status = http.StatusMovedPermanently + } else { + status = http.StatusTemporaryRedirect + } + } + if len(u.Type) == 0 || u.Type == serverconfigs.HTTPHostRedirectTypeURL { if u.MatchPrefix { // 匹配前缀 if strings.HasPrefix(fullURL, u.BeforeURL) { @@ -38,11 +48,8 @@ func (this *HTTPRequest) doHostRedirect() (blocked bool) { return false } - if u.Status <= 0 { - u.Status = http.StatusTemporaryRedirect - } - this.processResponseHeaders(this.writer.Header(), u.Status) - http.Redirect(this.RawWriter, this.RawReq, afterURL, u.Status) + this.processResponseHeaders(this.writer.Header(), status) + http.Redirect(this.RawWriter, this.RawReq, afterURL, status) return true } } else if u.MatchRegexp { // 正则匹配 @@ -83,11 +90,8 @@ func (this *HTTPRequest) doHostRedirect() (blocked bool) { } } - if u.Status <= 0 { - u.Status = http.StatusTemporaryRedirect - } - this.processResponseHeaders(this.writer.Header(), u.Status) - http.Redirect(this.RawWriter, this.RawReq, afterURL, u.Status) + this.processResponseHeaders(this.writer.Header(), status) + http.Redirect(this.RawWriter, this.RawReq, afterURL, status) return true } else { // 精准匹配 if fullURL == u.RealBeforeURL() { @@ -104,11 +108,8 @@ func (this *HTTPRequest) doHostRedirect() (blocked bool) { } } - if u.Status <= 0 { - u.Status = http.StatusTemporaryRedirect - } - this.processResponseHeaders(this.writer.Header(), u.Status) - http.Redirect(this.RawWriter, this.RawReq, afterURL, u.Status) + this.processResponseHeaders(this.writer.Header(), status) + http.Redirect(this.RawWriter, this.RawReq, afterURL, status) return true } } @@ -142,10 +143,8 @@ func (this *HTTPRequest) doHostRedirect() (blocked bool) { // 终止匹配 return false } - if u.Status <= 0 { - u.Status = http.StatusTemporaryRedirect - } - this.processResponseHeaders(this.writer.Header(), u.Status) + + this.processResponseHeaders(this.writer.Header(), status) // 参数 var qIndex = strings.Index(this.uri, "?") @@ -153,7 +152,7 @@ func (this *HTTPRequest) doHostRedirect() (blocked bool) { afterURL += this.uri[qIndex:] } - http.Redirect(this.RawWriter, this.RawReq, afterURL, u.Status) + http.Redirect(this.RawWriter, this.RawReq, afterURL, status) return true } } else if u.Type == serverconfigs.HTTPHostRedirectTypePort { @@ -200,11 +199,9 @@ func (this *HTTPRequest) doHostRedirect() (blocked bool) { // 终止匹配 return false } - if u.Status <= 0 { - u.Status = http.StatusTemporaryRedirect - } - this.processResponseHeaders(this.writer.Header(), u.Status) - http.Redirect(this.RawWriter, this.RawReq, afterURL, u.Status) + + this.processResponseHeaders(this.writer.Header(), status) + http.Redirect(this.RawWriter, this.RawReq, afterURL, status) return true } } diff --git a/internal/nodes/http_request_redirect_https.go b/internal/nodes/http_request_redirect_https.go index 3415d85..723c1a3 100644 --- a/internal/nodes/http_request_redirect_https.go +++ b/internal/nodes/http_request_redirect_https.go @@ -8,7 +8,7 @@ import ( ) func (this *HTTPRequest) doRedirectToHTTPS(redirectToHTTPSConfig *serverconfigs.HTTPRedirectToHTTPSConfig) (shouldBreak bool) { - host := this.RawReq.Host + var host = this.RawReq.Host // 检查域名是否匹配 if !redirectToHTTPSConfig.MatchDomain(host) { @@ -22,7 +22,7 @@ func (this *HTTPRequest) doRedirectToHTTPS(redirectToHTTPSConfig *serverconfigs. host = redirectToHTTPSConfig.Host } } else if redirectToHTTPSConfig.Port > 0 { - lastIndex := strings.LastIndex(host, ":") + var lastIndex = strings.LastIndex(host, ":") if lastIndex > 0 { host = host[:lastIndex] } @@ -30,18 +30,18 @@ func (this *HTTPRequest) doRedirectToHTTPS(redirectToHTTPSConfig *serverconfigs. host = host + ":" + strconv.Itoa(redirectToHTTPSConfig.Port) } } else { - lastIndex := strings.LastIndex(host, ":") + var lastIndex = strings.LastIndex(host, ":") if lastIndex > 0 { host = host[:lastIndex] } } - statusCode := http.StatusMovedPermanently + var statusCode = http.StatusMovedPermanently if redirectToHTTPSConfig.Status > 0 { statusCode = redirectToHTTPSConfig.Status } - newURL := "https://" + host + this.RawReq.RequestURI + var newURL = "https://" + host + this.RawReq.RequestURI this.processResponseHeaders(this.writer.Header(), statusCode) http.Redirect(this.writer, this.RawReq, newURL, statusCode) diff --git a/internal/nodes/http_request_utils.go b/internal/nodes/http_request_utils.go index 4c2b874..555d663 100644 --- a/internal/nodes/http_request_utils.go +++ b/internal/nodes/http_request_utils.go @@ -15,7 +15,11 @@ import ( "sync/atomic" ) -// 其中的每个括号里的内容都在被引用,不能轻易修改 +// 搜索引擎和爬虫正则 +var searchEngineRegex = regexp.MustCompile(`(?i)(60spider|adldxbot|adsbot-google|applebot|admantx|alexa|baidu|bingbot|bingpreview|facebookexternalhit|googlebot|proximic|slurp|sogou|twitterbot|yandex)`) +var spiderRegexp = regexp.MustCompile(`(?i)(python|pycurl|http-client|httpclient|apachebench|nethttp|http_request|java|perl|ruby|scrapy|php|rust)`) + +// 内容范围正则,其中的每个括号里的内容都在被引用,不能轻易修改 var contentRangeRegexp = regexp.MustCompile(`^bytes (\d+)-(\d+)/(\d+|\*)`) // 分解Range