mirror of
https://github.com/TeaOSLab/EdgeNode.git
synced 2025-11-03 23:20:25 +08:00
299 lines
5.8 KiB
Go
299 lines
5.8 KiB
Go
// Copyright 2022 Liuxiangchao iwind.liu@gmail.com. All rights reserved.
|
|
|
|
package re
|
|
|
|
import (
|
|
"github.com/iwind/TeaGo/types"
|
|
"regexp"
|
|
"regexp/syntax"
|
|
"strings"
|
|
"sync/atomic"
|
|
)
|
|
|
|
var prefixReg = regexp.MustCompile(`^\(\?([\w\s]+)\)`) // (?x)
|
|
var braceZeroReg = regexp.MustCompile(`^{\s*0*\s*}`) // {0}
|
|
var braceZeroReg2 = regexp.MustCompile(`^{\s*0*\s*,`) // {0, x}
|
|
|
|
var lastId uint64
|
|
|
|
type Regexp struct {
|
|
exp string
|
|
rawRegexp *regexp.Regexp
|
|
|
|
isStrict bool
|
|
isCaseInsensitive bool
|
|
keywords []string
|
|
keywordsMap RuneMap
|
|
|
|
id uint64
|
|
idString string
|
|
}
|
|
|
|
func MustCompile(exp string) *Regexp {
|
|
var reg = &Regexp{
|
|
exp: exp,
|
|
rawRegexp: regexp.MustCompile(exp),
|
|
}
|
|
reg.init()
|
|
return reg
|
|
}
|
|
|
|
func Compile(exp string) (*Regexp, error) {
|
|
reg, err := regexp.Compile(exp)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return NewRegexp(reg), nil
|
|
}
|
|
|
|
func NewRegexp(rawRegexp *regexp.Regexp) *Regexp {
|
|
var reg = &Regexp{
|
|
exp: rawRegexp.String(),
|
|
rawRegexp: rawRegexp,
|
|
}
|
|
reg.init()
|
|
return reg
|
|
}
|
|
|
|
func (this *Regexp) init() {
|
|
this.id = atomic.AddUint64(&lastId, 1)
|
|
this.idString = "re:" + types.String(this.id)
|
|
|
|
if len(this.exp) == 0 {
|
|
return
|
|
}
|
|
|
|
var exp = strings.TrimSpace(this.exp)
|
|
|
|
// 去掉前面的(?...)
|
|
if prefixReg.MatchString(exp) {
|
|
var matches = prefixReg.FindStringSubmatch(exp)
|
|
var modifiers = matches[1]
|
|
if strings.Contains(modifiers, "i") {
|
|
this.isCaseInsensitive = true
|
|
}
|
|
exp = exp[len(matches[0]):]
|
|
}
|
|
|
|
var keywords = this.ParseKeywords(exp)
|
|
|
|
var filteredKeywords = []string{}
|
|
var minLength = 1
|
|
var isValid = true
|
|
for _, keyword := range keywords {
|
|
if len(keyword) <= minLength {
|
|
isValid = false
|
|
break
|
|
}
|
|
}
|
|
if isValid {
|
|
filteredKeywords = keywords
|
|
}
|
|
|
|
this.keywords = filteredKeywords
|
|
if len(filteredKeywords) > 0 {
|
|
this.keywordsMap = NewRuneTree(filteredKeywords)
|
|
}
|
|
}
|
|
|
|
func (this *Regexp) Keywords() []string {
|
|
return this.keywords
|
|
}
|
|
|
|
func (this *Regexp) Raw() *regexp.Regexp {
|
|
return this.rawRegexp
|
|
}
|
|
|
|
func (this *Regexp) IsCaseInsensitive() bool {
|
|
return this.isCaseInsensitive
|
|
}
|
|
|
|
func (this *Regexp) MatchString(s string) bool {
|
|
if this.keywordsMap != nil {
|
|
var b = this.keywordsMap.Lookup(s, this.isCaseInsensitive)
|
|
if !b {
|
|
return false
|
|
}
|
|
if this.isStrict {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return this.rawRegexp.MatchString(s)
|
|
}
|
|
|
|
func (this *Regexp) Match(s []byte) bool {
|
|
if this.keywordsMap != nil {
|
|
var b = this.keywordsMap.Lookup(string(s), this.isCaseInsensitive)
|
|
if !b {
|
|
return false
|
|
}
|
|
if this.isStrict {
|
|
return true
|
|
}
|
|
}
|
|
return this.rawRegexp.Match(s)
|
|
}
|
|
|
|
func (this *Regexp) FindStringSubmatch(s string) []string {
|
|
return this.rawRegexp.FindStringSubmatch(s)
|
|
}
|
|
|
|
// ParseKeywords 提取表达式中的关键词
|
|
func (this *Regexp) ParseKeywords(exp string) (keywords []string) {
|
|
if len(exp) == 0 {
|
|
return nil
|
|
}
|
|
|
|
reg, err := syntax.Parse(exp, syntax.Perl)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
|
|
if len(reg.Sub) == 0 {
|
|
var keywordRunes = this.parseKeyword(reg.String())
|
|
if len(keywordRunes) > 0 {
|
|
keywords = append(keywords, string(keywordRunes))
|
|
}
|
|
return
|
|
}
|
|
if len(reg.Sub) == 1 {
|
|
if reg.Op == syntax.OpStar || reg.Op == syntax.OpQuest || reg.Op == syntax.OpRepeat {
|
|
return nil
|
|
}
|
|
return this.ParseKeywords(reg.Sub[0].String())
|
|
}
|
|
|
|
const maxComposedKeywords = 32
|
|
|
|
switch reg.Op {
|
|
case syntax.OpConcat:
|
|
var prevKeywords = []string{}
|
|
var isStarted bool
|
|
for _, sub := range reg.Sub {
|
|
if sub.String() == `\b` {
|
|
if isStarted {
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
if sub.Op != syntax.OpLiteral && sub.Op != syntax.OpCapture && sub.Op != syntax.OpAlternate {
|
|
if isStarted {
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
var subKeywords = this.ParseKeywords(sub.String())
|
|
if len(subKeywords) > 0 {
|
|
if !isStarted {
|
|
prevKeywords = subKeywords
|
|
isStarted = true
|
|
} else {
|
|
for _, prevKeyword := range prevKeywords {
|
|
for _, subKeyword := range subKeywords {
|
|
keywords = append(keywords, prevKeyword+subKeyword)
|
|
|
|
// 限制不能超出最大关键词
|
|
if len(keywords) > maxComposedKeywords {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
prevKeywords = keywords
|
|
}
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
if len(prevKeywords) > 0 && len(keywords) == 0 {
|
|
keywords = prevKeywords
|
|
}
|
|
case syntax.OpAlternate:
|
|
for _, sub := range reg.Sub {
|
|
var subKeywords = this.ParseKeywords(sub.String())
|
|
if len(subKeywords) == 0 {
|
|
keywords = nil
|
|
return
|
|
}
|
|
keywords = append(keywords, subKeywords...)
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (this *Regexp) IdString() string {
|
|
return this.idString
|
|
}
|
|
|
|
func (this *Regexp) parseKeyword(subExp string) (result []rune) {
|
|
if len(subExp) == 0 {
|
|
return nil
|
|
}
|
|
|
|
// 去除开始和结尾的()
|
|
if subExp[0] == '(' && subExp[len(subExp)-1] == ')' {
|
|
subExp = subExp[1 : len(subExp)-1]
|
|
if len(subExp) == 0 {
|
|
return
|
|
}
|
|
}
|
|
|
|
var runes = []rune(subExp)
|
|
|
|
for index, r := range runes {
|
|
if r == '[' || r == '{' || r == '.' || r == '+' || r == '$' {
|
|
if index == 0 {
|
|
return
|
|
}
|
|
if runes[index-1] != '\\' {
|
|
if r == '{' && (braceZeroReg.MatchString(subExp[index:])) || braceZeroReg2.MatchString(subExp[index:]) { // r {0, ...}
|
|
if len(result) == 0 {
|
|
return nil
|
|
}
|
|
return result[:len(result)-1]
|
|
}
|
|
|
|
return
|
|
}
|
|
}
|
|
if r == '?' || r == '*' {
|
|
if index == 0 {
|
|
return
|
|
}
|
|
if runes[index-1] != '\\' {
|
|
if len(result) > 0 {
|
|
return result[:len(result)-1]
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
if (r == 'n' || r == 't' || r == 'a' || r == 'f' || r == 'r' || r == 'v' || r == 'x') && index > 0 && runes[index-1] == '\\' {
|
|
switch r {
|
|
case 'n':
|
|
r = '\n'
|
|
case 't':
|
|
r = '\t'
|
|
case 'f':
|
|
r = '\f'
|
|
case 'r':
|
|
r = '\r'
|
|
case 'v':
|
|
r = '\v'
|
|
case 'a':
|
|
r = '\a'
|
|
case 'x':
|
|
return
|
|
}
|
|
}
|
|
|
|
if r == '\\' {
|
|
continue
|
|
}
|
|
result = append(result, r)
|
|
}
|
|
|
|
return
|
|
}
|