2021-01-10 22:35:34 +08:00
|
|
|
|
package nodes
|
|
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
|
"crypto/rand"
|
|
|
|
|
|
"fmt"
|
|
|
|
|
|
"io"
|
2021-12-14 21:27:24 +08:00
|
|
|
|
"net/http"
|
2023-06-16 09:56:37 +08:00
|
|
|
|
"net/url"
|
2022-03-03 19:36:28 +08:00
|
|
|
|
"regexp"
|
2021-01-10 22:35:34 +08:00
|
|
|
|
"strconv"
|
|
|
|
|
|
"strings"
|
2021-12-02 11:30:47 +08:00
|
|
|
|
"sync/atomic"
|
2024-07-27 15:42:50 +08:00
|
|
|
|
|
|
|
|
|
|
teaconst "github.com/TeaOSLab/EdgeNode/internal/const"
|
|
|
|
|
|
"github.com/TeaOSLab/EdgeNode/internal/utils/fasttime"
|
|
|
|
|
|
"github.com/TeaOSLab/EdgeNode/internal/utils/ranges"
|
|
|
|
|
|
"github.com/iwind/TeaGo/types"
|
2021-01-10 22:35:34 +08:00
|
|
|
|
)
|
|
|
|
|
|
|
2023-04-07 19:19:53 +08:00
|
|
|
|
// 搜索引擎和爬虫正则
|
|
|
|
|
|
var searchEngineRegex = regexp.MustCompile(`(?i)(60spider|adldxbot|adsbot-google|applebot|admantx|alexa|baidu|bingbot|bingpreview|facebookexternalhit|googlebot|proximic|slurp|sogou|twitterbot|yandex)`)
|
|
|
|
|
|
var spiderRegexp = regexp.MustCompile(`(?i)(python|pycurl|http-client|httpclient|apachebench|nethttp|http_request|java|perl|ruby|scrapy|php|rust)`)
|
|
|
|
|
|
|
|
|
|
|
|
// 内容范围正则,其中的每个括号里的内容都在被引用,不能轻易修改
|
2022-03-04 11:51:59 +08:00
|
|
|
|
var contentRangeRegexp = regexp.MustCompile(`^bytes (\d+)-(\d+)/(\d+|\*)`)
|
2022-03-03 19:36:28 +08:00
|
|
|
|
|
2023-06-16 09:56:37 +08:00
|
|
|
|
// URL协议前缀
|
|
|
|
|
|
var urlSchemeRegexp = regexp.MustCompile("^(?i)(http|https|ftp)://")
|
|
|
|
|
|
|
2021-01-10 22:35:34 +08:00
|
|
|
|
// 分解Range
|
2022-03-03 19:36:28 +08:00
|
|
|
|
func httpRequestParseRangeHeader(rangeValue string) (result []rangeutils.Range, ok bool) {
|
2021-01-10 22:35:34 +08:00
|
|
|
|
// 参考RFC:https://tools.ietf.org/html/rfc7233
|
|
|
|
|
|
index := strings.Index(rangeValue, "=")
|
|
|
|
|
|
if index == -1 {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
unit := rangeValue[:index]
|
|
|
|
|
|
if unit != "bytes" {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-03-03 19:36:28 +08:00
|
|
|
|
var rangeSetString = rangeValue[index+1:]
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if len(rangeSetString) == 0 {
|
|
|
|
|
|
ok = true
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-03-03 19:36:28 +08:00
|
|
|
|
var pieces = strings.Split(rangeSetString, ", ")
|
2021-01-10 22:35:34 +08:00
|
|
|
|
for _, piece := range pieces {
|
2022-03-03 19:36:28 +08:00
|
|
|
|
index = strings.Index(piece, "-")
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if index == -1 {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
first := piece[:index]
|
2021-01-13 12:52:38 +08:00
|
|
|
|
firstInt := int64(-1)
|
2021-01-10 22:35:34 +08:00
|
|
|
|
|
|
|
|
|
|
var err error
|
|
|
|
|
|
last := piece[index+1:]
|
2021-01-13 12:52:38 +08:00
|
|
|
|
var lastInt = int64(-1)
|
2021-01-10 22:35:34 +08:00
|
|
|
|
|
|
|
|
|
|
if len(first) > 0 {
|
2021-01-13 12:52:38 +08:00
|
|
|
|
firstInt, err = strconv.ParseInt(first, 10, 64)
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if len(last) > 0 {
|
2021-01-13 12:52:38 +08:00
|
|
|
|
lastInt, err = strconv.ParseInt(last, 10, 64)
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
if lastInt < firstInt {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
} else {
|
|
|
|
|
|
if len(last) == 0 {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-01-13 12:52:38 +08:00
|
|
|
|
lastInt, err = strconv.ParseInt(last, 10, 64)
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
lastInt = -lastInt
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-03-03 19:36:28 +08:00
|
|
|
|
result = append(result, [2]int64{firstInt, lastInt})
|
2021-01-10 22:35:34 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ok = true
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// 读取内容Range
|
2021-01-13 12:52:38 +08:00
|
|
|
|
func httpRequestReadRange(reader io.Reader, buf []byte, start int64, end int64, callback func(buf []byte, n int) error) (ok bool, err error) {
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if start < 0 || end < 0 {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
|
|
|
|
|
seeker, ok := reader.(io.Seeker)
|
|
|
|
|
|
if !ok {
|
|
|
|
|
|
return
|
|
|
|
|
|
}
|
2021-01-13 12:52:38 +08:00
|
|
|
|
_, err = seeker.Seek(start, io.SeekStart)
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
|
return false, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
offset := start
|
|
|
|
|
|
for {
|
|
|
|
|
|
n, err := reader.Read(buf)
|
|
|
|
|
|
if n > 0 {
|
2021-01-13 12:52:38 +08:00
|
|
|
|
offset += int64(n)
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if end < offset {
|
2021-01-13 12:52:38 +08:00
|
|
|
|
err = callback(buf, n-int(offset-end-1))
|
2021-01-10 22:35:34 +08:00
|
|
|
|
if err != nil {
|
|
|
|
|
|
return false, err
|
|
|
|
|
|
}
|
|
|
|
|
|
return true, nil
|
|
|
|
|
|
} else {
|
|
|
|
|
|
err = callback(buf, n)
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
return false, err
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
if err == io.EOF {
|
|
|
|
|
|
return true, nil
|
|
|
|
|
|
}
|
|
|
|
|
|
return false, err
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-03-03 19:36:28 +08:00
|
|
|
|
// 分解Content-Range
|
2022-03-04 11:51:59 +08:00
|
|
|
|
func httpRequestParseContentRangeHeader(contentRange string) (start int64, total int64) {
|
2022-03-03 19:36:28 +08:00
|
|
|
|
var matches = contentRangeRegexp.FindStringSubmatch(contentRange)
|
2022-03-04 11:51:59 +08:00
|
|
|
|
if len(matches) < 4 {
|
|
|
|
|
|
return -1, -1
|
2022-03-03 19:36:28 +08:00
|
|
|
|
}
|
2022-03-04 11:51:59 +08:00
|
|
|
|
|
|
|
|
|
|
start = types.Int64(matches[1])
|
|
|
|
|
|
var sizeString = matches[3]
|
|
|
|
|
|
if sizeString != "*" {
|
|
|
|
|
|
total = types.Int64(sizeString)
|
|
|
|
|
|
}
|
|
|
|
|
|
return
|
2022-03-03 19:36:28 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2021-01-10 22:35:34 +08:00
|
|
|
|
// 生成boundary
|
|
|
|
|
|
// 仿照Golang自带的函数(multipart包)
|
|
|
|
|
|
func httpRequestGenBoundary() string {
|
2022-02-25 19:09:09 +08:00
|
|
|
|
var buf [8]byte
|
2021-01-10 22:35:34 +08:00
|
|
|
|
_, err := io.ReadFull(rand.Reader, buf[:])
|
|
|
|
|
|
if err != nil {
|
|
|
|
|
|
panic(err)
|
|
|
|
|
|
}
|
|
|
|
|
|
return fmt.Sprintf("%x", buf[:])
|
|
|
|
|
|
}
|
2021-12-02 11:30:47 +08:00
|
|
|
|
|
2022-03-03 19:36:28 +08:00
|
|
|
|
// 从content-type中读取boundary
|
|
|
|
|
|
func httpRequestParseBoundary(contentType string) string {
|
|
|
|
|
|
var delim = "boundary="
|
|
|
|
|
|
var boundaryIndex = strings.Index(contentType, delim)
|
|
|
|
|
|
if boundaryIndex < 0 {
|
|
|
|
|
|
return ""
|
|
|
|
|
|
}
|
|
|
|
|
|
var boundary = contentType[boundaryIndex+len(delim):]
|
|
|
|
|
|
semicolonIndex := strings.Index(boundary, ";")
|
|
|
|
|
|
if semicolonIndex >= 0 {
|
|
|
|
|
|
return boundary[:semicolonIndex]
|
|
|
|
|
|
}
|
|
|
|
|
|
return boundary
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-12-14 21:27:24 +08:00
|
|
|
|
// 判断状态是否为跳转
|
|
|
|
|
|
func httpStatusIsRedirect(statusCode int) bool {
|
|
|
|
|
|
return statusCode == http.StatusPermanentRedirect ||
|
|
|
|
|
|
statusCode == http.StatusTemporaryRedirect ||
|
|
|
|
|
|
statusCode == http.StatusMovedPermanently ||
|
|
|
|
|
|
statusCode == http.StatusSeeOther ||
|
|
|
|
|
|
statusCode == http.StatusFound
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-12-02 11:30:47 +08:00
|
|
|
|
// 生成请求ID
|
|
|
|
|
|
var httpRequestTimestamp int64
|
|
|
|
|
|
var httpRequestId int32 = 1_000_000
|
|
|
|
|
|
|
|
|
|
|
|
func httpRequestNextId() string {
|
2023-04-08 12:47:04 +08:00
|
|
|
|
unixTime, unixTimeString := fasttime.Now().UnixMilliString()
|
2021-12-02 11:30:47 +08:00
|
|
|
|
if unixTime > httpRequestTimestamp {
|
|
|
|
|
|
atomic.StoreInt32(&httpRequestId, 1_000_000)
|
|
|
|
|
|
httpRequestTimestamp = unixTime
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2022-08-07 11:18:16 +08:00
|
|
|
|
// timestamp + nodeId + requestId
|
2022-08-07 11:12:29 +08:00
|
|
|
|
return unixTimeString + teaconst.NodeIdString + strconv.Itoa(int(atomic.AddInt32(&httpRequestId, 1)))
|
2021-12-18 19:17:40 +08:00
|
|
|
|
}
|
2021-12-29 10:57:15 +08:00
|
|
|
|
|
|
|
|
|
|
// 检查是否可以接受某个编码
|
|
|
|
|
|
func httpAcceptEncoding(acceptEncodings string, encoding string) bool {
|
2022-02-22 19:29:27 +08:00
|
|
|
|
if len(acceptEncodings) == 0 {
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
2021-12-29 10:57:15 +08:00
|
|
|
|
var pieces = strings.Split(acceptEncodings, ",")
|
|
|
|
|
|
for _, piece := range pieces {
|
|
|
|
|
|
var qualityIndex = strings.Index(piece, ";")
|
|
|
|
|
|
if qualityIndex >= 0 {
|
|
|
|
|
|
piece = piece[:qualityIndex]
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if strings.TrimSpace(piece) == encoding {
|
|
|
|
|
|
return true
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
return false
|
|
|
|
|
|
}
|
2023-04-07 15:09:06 +08:00
|
|
|
|
|
|
|
|
|
|
// 跳转到某个URL
|
|
|
|
|
|
func httpRedirect(writer http.ResponseWriter, req *http.Request, url string, code int) {
|
|
|
|
|
|
if len(writer.Header().Get("Content-Type")) == 0 {
|
|
|
|
|
|
// 设置Content-Type,是为了让页面不输出链接
|
|
|
|
|
|
writer.Header().Set("Content-Type", "text/html; charset=utf-8")
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
http.Redirect(writer, req, url, code)
|
|
|
|
|
|
}
|
2023-06-16 09:56:37 +08:00
|
|
|
|
|
|
|
|
|
|
// 分析URL中的Host部分
|
|
|
|
|
|
func httpParseHost(urlString string) (host string, err error) {
|
|
|
|
|
|
if !urlSchemeRegexp.MatchString(urlString) {
|
|
|
|
|
|
urlString = "https://" + urlString
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
u, err := url.Parse(urlString)
|
|
|
|
|
|
if err != nil && u != nil {
|
|
|
|
|
|
return "", err
|
|
|
|
|
|
}
|
|
|
|
|
|
return u.Host, nil
|
|
|
|
|
|
}
|