diff --git a/internal/re/regexp_test.go b/internal/re/regexp_test.go index 302c6de..56d10f6 100644 --- a/internal/re/regexp_test.go +++ b/internal/re/regexp_test.go @@ -57,6 +57,79 @@ func TestRegexp_ParseKeywords(t *testing.T) { } } +func TestRegexp_Special(t *testing.T) { + var unescape = func(v string) string { + //replace urlencoded characters + + var chars = [][2]string{ + {`\s`, `(\s|%09|%0A|\+)`}, + {`\(`, `(\(|%28)`}, + {`=`, `(=|%3D)`}, + {`<`, `(<|%3C)`}, + {`\*`, `(\*|%2A)`}, + {`\\`, `(\\|%2F)`}, + {`!`, `(!|%21)`}, + {`/`, `(/|%2F)`}, + {`;`, `(;|%3B)`}, + {`\+`, `(\+|%20)`}, + } + + for _, c := range chars { + if !strings.Contains(v, c[0]) { + continue + } + var pieces = strings.Split(v, c[0]) + + // 修复piece中错误的\ + for pieceIndex, piece := range pieces { + var l = len(piece) + if l == 0 { + continue + } + if piece[l-1] != '\\' { + continue + } + + // 计算\的数量 + var countBackSlashes = 0 + for i := l - 1; i >= 0; i-- { + if piece[i] == '\\' { + countBackSlashes++ + } else { + break + } + } + if countBackSlashes%2 == 1 { + // 去掉最后一个 + pieces[pieceIndex] = piece[:len(piece)-1] + } + } + + v = strings.Join(pieces, c[1]) + } + + return v + } + + for _, s := range []string{ + `\\s`, + `\s\W`, + `aaaa/\W`, + `aaaa\/\W`, + `aaaa\=\W`, + `aaaa\\=\W`, + `aaaa\\\=\W`, + `aaaa\\\\=\W`, + } { + var es = unescape(s) + t.Log(s, "=>", es) + _, err := re.Compile(es) + if err != nil { + t.Fatal(err) + } + } +} + func TestRegexp_ParseKeywords2(t *testing.T) { var a = assert.NewAssertion(t) diff --git a/internal/waf/rule.go b/internal/waf/rule.go index dd927a4..8d1491d 100644 --- a/internal/waf/rule.go +++ b/internal/waf/rule.go @@ -590,18 +590,56 @@ func (this *Rule) SetCheckpointFinder(finder func(prefix string) checkpoints.Che this.checkpointFinder = finder } +var unescapeChars = [][2]string{ + {`\s`, `(\s|%09|%0A|\+)`}, + {`\(`, `(\(|%28)`}, + {`=`, `(=|%3D)`}, + {`<`, `(<|%3C)`}, + {`\*`, `(\*|%2A)`}, + {`\\`, `(\\|%2F)`}, + {`!`, `(!|%21)`}, + {`/`, `(/|%2F)`}, + {`;`, `(;|%3B)`}, + {`\+`, `(\+|%20)`}, +} + func (this *Rule) unescape(v string) string { - //replace urlencoded characters - v = strings.Replace(v, `\s`, `(\s|%09|%0A|\+)`, -1) - v = strings.Replace(v, `\(`, `(\(|%28)`, -1) - v = strings.Replace(v, `=`, `(=|%3D)`, -1) - v = strings.Replace(v, `<`, `(<|%3C)`, -1) - v = strings.Replace(v, `\*`, `(\*|%2A)`, -1) - v = strings.Replace(v, `\\`, `(\\|%2F)`, -1) - v = strings.Replace(v, `!`, `(!|%21)`, -1) - v = strings.Replace(v, `/`, `(/|%2F)`, -1) - v = strings.Replace(v, `;`, `(;|%3B)`, -1) - v = strings.Replace(v, `\+`, `(\+|%20)`, -1) + // replace urlencoded characters + + for _, c := range unescapeChars { + if !strings.Contains(v, c[0]) { + continue + } + var pieces = strings.Split(v, c[0]) + + // 修复piece中错误的\ + for pieceIndex, piece := range pieces { + var l = len(piece) + if l == 0 { + continue + } + if piece[l-1] != '\\' { + continue + } + + // 计算\的数量 + var countBackSlashes = 0 + for i := l - 1; i >= 0; i-- { + if piece[i] == '\\' { + countBackSlashes++ + } else { + break + } + } + if countBackSlashes%2 == 1 { + // 去掉最后一个 + pieces[pieceIndex] = piece[:len(piece)-1] + } + } + + v = strings.Join(pieces, c[1]) + } + return v } diff --git a/internal/waf/utils/utils.go b/internal/waf/utils/utils.go index 975d0c3..7a7e0c7 100644 --- a/internal/waf/utils/utils.go +++ b/internal/waf/utils/utils.go @@ -13,6 +13,10 @@ var cache = ttlcache.NewCache() // MatchStringCache 正则表达式匹配字符串,并缓存结果 func MatchStringCache(regex *re.Regexp, s string) bool { + if regex == nil { + return false + } + // 如果长度超过4096,大概率是不能重用的 if len(s) > 4096 { return regex.MatchString(s) @@ -35,6 +39,10 @@ func MatchStringCache(regex *re.Regexp, s string) bool { // MatchBytesCache 正则表达式匹配字节slice,并缓存结果 func MatchBytesCache(regex *re.Regexp, byteSlice []byte) bool { + if regex == nil { + return false + } + // 如果长度超过4096,大概率是不能重用的 if len(byteSlice) > 4096 { return regex.Match(byteSlice)