diff --git a/internal/re/regexp.go b/internal/re/regexp.go index a42e9f0..1a2075f 100644 --- a/internal/re/regexp.go +++ b/internal/re/regexp.go @@ -164,6 +164,8 @@ func (this *Regexp) ParseKeywords(exp string) (keywords []string) { return this.ParseKeywords(reg.Sub[0].String()) } + const maxComposedKeywords = 32 + switch reg.Op { case syntax.OpConcat: var prevKeywords = []string{} @@ -190,6 +192,11 @@ func (this *Regexp) ParseKeywords(exp string) (keywords []string) { for _, prevKeyword := range prevKeywords { for _, subKeyword := range subKeywords { keywords = append(keywords, prevKeyword+subKeyword) + + // 限制不能超出最大关键词 + if len(keywords) > maxComposedKeywords { + return nil + } } } prevKeywords = keywords diff --git a/internal/re/regexp_test.go b/internal/re/regexp_test.go index 56d10f6..695e0e9 100644 --- a/internal/re/regexp_test.go +++ b/internal/re/regexp_test.go @@ -58,59 +58,6 @@ func TestRegexp_ParseKeywords(t *testing.T) { } func TestRegexp_Special(t *testing.T) { - var unescape = func(v string) string { - //replace urlencoded characters - - var chars = [][2]string{ - {`\s`, `(\s|%09|%0A|\+)`}, - {`\(`, `(\(|%28)`}, - {`=`, `(=|%3D)`}, - {`<`, `(<|%3C)`}, - {`\*`, `(\*|%2A)`}, - {`\\`, `(\\|%2F)`}, - {`!`, `(!|%21)`}, - {`/`, `(/|%2F)`}, - {`;`, `(;|%3B)`}, - {`\+`, `(\+|%20)`}, - } - - for _, c := range chars { - if !strings.Contains(v, c[0]) { - continue - } - var pieces = strings.Split(v, c[0]) - - // 修复piece中错误的\ - for pieceIndex, piece := range pieces { - var l = len(piece) - if l == 0 { - continue - } - if piece[l-1] != '\\' { - continue - } - - // 计算\的数量 - var countBackSlashes = 0 - for i := l - 1; i >= 0; i-- { - if piece[i] == '\\' { - countBackSlashes++ - } else { - break - } - } - if countBackSlashes%2 == 1 { - // 去掉最后一个 - pieces[pieceIndex] = piece[:len(piece)-1] - } - } - - v = strings.Join(pieces, c[1]) - } - - return v - } - for _, s := range []string{ `\\s`, `\s\W`, @@ -121,7 +68,7 @@ func TestRegexp_Special(t *testing.T) { `aaaa\\\=\W`, `aaaa\\\\=\W`, } { - var es = unescape(s) + var es = testUnescape(t, s) t.Log(s, "=>", es) _, err := re.Compile(es) if err != nil { @@ -130,6 +77,17 @@ func TestRegexp_Special(t *testing.T) { } } +func TestRegexp_Special2(t *testing.T) { + r, err := re.Compile(testUnescape(t, `/api/ios/a +/api/ios/b +/api/ios/c +/report`)) + if err != nil { + t.Fatal(err) + } + t.Log(r.Keywords()) +} + func TestRegexp_ParseKeywords2(t *testing.T) { var a = assert.NewAssertion(t) @@ -232,6 +190,14 @@ func BenchmarkRegexp_MatchString_VS_FindSubString2(b *testing.B) { } } +func TestSplitAndJoin(t *testing.T) { + var pieces = strings.Split(`/api/ios/a +/api/ios/b +/api/ios/c +/report`, "/") + t.Log(strings.Join(pieces, `(/|%2F)`)) +} + func testCompareStrings(s1 []string, s2 []string) bool { if len(s1) != len(s2) { return false @@ -243,3 +209,55 @@ func testCompareStrings(s1 []string, s2 []string) bool { } return true } + +func testUnescape(t *testing.T, v string) string { + // replace urlencoded characters + var unescapeChars = [][2]string{ + {`\s`, `(\s|%09|%0A|\+)`}, + {`\(`, `(\(|%28)`}, + {`=`, `(=|%3D)`}, + {`<`, `(<|%3C)`}, + {`\*`, `(\*|%2A)`}, + {`\\`, `(\\|%2F)`}, + {`!`, `(!|%21)`}, + {`/`, `(/|%2F)`}, + {`;`, `(;|%3B)`}, + {`\+`, `(\+|%20)`}, + } + + for _, c := range unescapeChars { + if !strings.Contains(v, c[0]) { + continue + } + var pieces = strings.Split(v, c[0]) + + // 修复piece中错误的\ + for pieceIndex, piece := range pieces { + var l = len(piece) + if l == 0 { + continue + } + if piece[l-1] != '\\' { + continue + } + + // 计算\的数量 + var countBackSlashes = 0 + for i := l - 1; i >= 0; i-- { + if piece[i] == '\\' { + countBackSlashes++ + } else { + break + } + } + if countBackSlashes%2 == 1 { + // 去掉最后一个 + pieces[pieceIndex] = piece[:len(piece)-1] + } + } + + v = strings.Join(pieces, c[1]) + } + + return v +}