refactor: 移除antlr4减小包体积&ai助手优化

This commit is contained in:
meilin.huang
2026-05-08 20:45:13 +08:00
parent 3768cef62d
commit f23b243fc5
154 changed files with 13054 additions and 396804 deletions

View File

@@ -1,11 +1,5 @@
package base
import (
"fmt"
"github.com/antlr4-go/antlr/v4"
)
// SingleSQL is a separate SQL split from multi-SQL.
type SingleSQL struct {
Text string
@@ -49,51 +43,6 @@ func (e *SyntaxError) Error() string {
return e.Message
}
// ParseErrorListener is a custom error listener for PLSQL parser.
type ParseErrorListener struct {
BaseLine int
Err *SyntaxError
}
// SyntaxError returns the errors.
func (l *ParseErrorListener) SyntaxError(_ antlr.Recognizer, token any, line, column int, msg string, _ antlr.RecognitionException) {
if l.Err == nil {
errMessage := ""
if token, ok := token.(*antlr.CommonToken); ok {
stream := token.GetInputStream()
start := token.GetStart() - 40
if start < 0 {
start = 0
}
stop := token.GetStop()
if stop >= stream.Size() {
stop = stream.Size() - 1
}
errMessage = fmt.Sprintf("related text: %s", stream.GetTextFromInterval(antlr.NewInterval(start, stop)))
}
l.Err = &SyntaxError{
Line: line + l.BaseLine,
Column: column,
Message: fmt.Sprintf("Syntax error at line %d:%d \n%s", line+l.BaseLine, column, errMessage),
}
}
}
// ReportAmbiguity reports an ambiguity.
func (*ParseErrorListener) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs *antlr.ATNConfigSet) {
antlr.ConsoleErrorListenerINSTANCE.ReportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
}
// ReportAttemptingFullContext reports an attempting full context.
func (*ParseErrorListener) ReportAttemptingFullContext(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, conflictingAlts *antlr.BitSet, configs *antlr.ATNConfigSet) {
antlr.ConsoleErrorListenerINSTANCE.ReportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
}
// ReportContextSensitivity reports a context sensitivity.
func (*ParseErrorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex, prediction int, configs *antlr.ATNConfigSet) {
antlr.ConsoleErrorListenerINSTANCE.ReportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
}
func FilterEmptySQL(list []SingleSQL) []SingleSQL {
var result []SingleSQL
for _, sql := range list {

View File

@@ -0,0 +1,340 @@
package base
import (
"strings"
"mayfly-go/internal/db/dbm/sqlparser/tokenizer"
)
// Lexer 提供词法分析辅助方法,不包含方言相关逻辑
type Lexer struct {
SQL string
Tokens []tokenizer.Token
Pos int
Length int
}
// NewLexer 创建基础词法分析器
func NewLexer(sql string, cfg tokenizer.DialectConfig) *Lexer {
tok := tokenizer.New(sql, cfg)
return &Lexer{
SQL: sql,
Tokens: tok.Tokens,
Pos: 0,
Length: len(tok.Tokens),
}
}
// Current 返回当前 token
func (l *Lexer) Current() tokenizer.Token {
if l.Pos >= l.Length {
return l.Tokens[l.Length-1] // EOF
}
return l.Tokens[l.Pos]
}
// Peek 预览指定偏移位置的 token
func (l *Lexer) Peek(offset int) tokenizer.Token {
idx := l.Pos + offset
if idx >= l.Length {
return l.Tokens[l.Length-1]
}
return l.Tokens[idx]
}
// Consume 消费当前 token 并前进
func (l *Lexer) Consume() tokenizer.Token {
if l.Pos >= l.Length {
return l.Tokens[l.Length-1]
}
tok := l.Tokens[l.Pos]
l.Pos++
return tok
}
// ExpectValue 如果当前 token 值匹配则消费,否则返回当前 token
func (l *Lexer) ExpectValue(val string) tokenizer.Token {
if l.Current().Value == val {
return l.Consume()
}
return l.Current()
}
// SkipSemicolons 跳过分号
func (l *Lexer) SkipSemicolons() {
for l.Current().Value == ";" {
l.Consume()
}
}
// SkipToNextStatement 跳过到下一个语句
func (l *Lexer) SkipToNextStatement() {
for !l.Current().IsEOF() && l.Current().Value != ";" {
l.Consume()
}
if l.Current().Value == ";" {
l.Consume()
}
}
// SkipParentheses 跳过一对括号及其内容
func (l *Lexer) SkipParentheses() {
if l.Current().Value != "(" {
return
}
l.Consume() // (
depth := 1
for !l.Current().IsEOF() && depth > 0 {
if l.Current().Value == "(" {
depth++
} else if l.Current().Value == ")" {
depth--
}
l.Consume()
}
}
// SkipExpr 跳过整个表达式(用于 WHERE, HAVING 等)
func (l *Lexer) SkipExpr() {
for !l.Current().IsEOF() {
tok := l.Current()
if tok.Value == "(" {
l.SkipParentheses()
continue
}
if tok.Value == "," || l.IsExprEnd() {
break
}
l.Consume()
}
}
// SkipGroupByExpr 跳过 GROUP BY 表达式(允许逗号分隔)
func (l *Lexer) SkipGroupByExpr() {
for !l.Current().IsEOF() {
tok := l.Current()
if tok.Value == "(" {
l.SkipParentheses()
continue
}
if l.IsExprEnd() || tok.Value == ";" {
break
}
l.Consume()
}
}
// SkipOrderByExpr 跳过 ORDER BY 表达式
func (l *Lexer) SkipOrderByExpr() {
for !l.Current().IsEOF() {
if l.Current().Value == "," {
l.Consume()
continue
}
if l.Current().IsKeyword("ASC", "DESC") {
l.Consume()
continue
}
if l.IsExprEnd() || l.Current().Value == ";" {
break
}
if l.Current().Value == "(" {
l.SkipParentheses()
continue
}
l.Consume()
}
}
// IsExprEnd 判断表达式是否结束
func (l *Lexer) IsExprEnd() bool {
tok := l.Current()
return tok.IsKeyword("FROM", "WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET",
"UNION", "INTO", "SET", "VALUES", "ON", "USING", "FOR", "RETURNING",
"LEFT", "RIGHT", "INNER", "OUTER", "CROSS", "NATURAL", "FULL", "JOIN") ||
tok.Value == ";" || tok.Value == ")"
}
// IsFromClauseEnd 判断 FROM 子句是否结束
func (l *Lexer) IsFromClauseEnd() bool {
tok := l.Current()
return tok.IsKeyword("WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET",
"UNION", "INTO", "FOR", "RETURNING") || tok.Value == ";" || tok.Value == ")"
}
// IsSelectClauseEnd 判断是否到达 SELECT 子句末尾
func (l *Lexer) IsSelectClauseEnd() bool {
tok := l.Current()
return tok.IsKeyword("FROM", "WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET", "UNION", "INTO", "FOR") ||
tok.Value == ";" || tok.Value == ")"
}
// IsJoinStart 判断是否为 JOIN 起始
func (l *Lexer) IsJoinStart() bool {
tok := l.Current()
return tok.IsKeyword("LEFT", "RIGHT", "INNER", "OUTER", "NATURAL", "CROSS", "FULL", "STRAIGHT_JOIN")
}
// TextFrom 返回从 start 到当前位置的原始 SQL 文本
func (l *Lexer) TextFrom(start int) string {
if start >= l.Length {
return ""
}
end := l.Pos
if end >= l.Length {
end = l.Length - 1
}
if end < start {
end = start
}
startTok := l.Tokens[start]
endTok := l.Tokens[end]
if endTok.Type == tokenizer.TokenEOF && end > 0 {
endTok = l.Tokens[end-1]
}
if endTok.End <= startTok.Pos {
return ""
}
return l.SQL[startTok.Pos:endTok.End]
}
// TextFromExclusive 返回从 start 到当前位置之前(不包含当前 token的原始 SQL 文本
func (l *Lexer) TextFromExclusive(start int) string {
if start >= l.Length {
return ""
}
end := l.Pos - 1
if end < start {
end = start
}
startTok := l.Tokens[start]
endTok := l.Tokens[end]
if endTok.Type == tokenizer.TokenEOF && end > 0 {
endTok = l.Tokens[end-1]
}
if endTok.End <= startTok.Pos {
return ""
}
return l.SQL[startTok.Pos:endTok.End]
}
// ParseInt 解析整数,忽略错误
func (l *Lexer) ParseInt(s string) int {
n := 0
for _, ch := range s {
if ch >= '0' && ch <= '9' {
n = n*10 + int(ch-'0')
}
}
return n
}
// Unquote 去除标识符引号
func (l *Lexer) Unquote(s string) string {
if len(s) >= 2 {
if (s[0] == '`' && s[len(s)-1] == '`') ||
(s[0] == '"' && s[len(s)-1] == '"') {
return s[1 : len(s)-1]
}
}
return s
}
// SplitDotParts 按点分割标识符,考虑引号
func (l *Lexer) SplitDotParts(text string) []string {
var parts []string
var current strings.Builder
inQuote := false
var quoteChar byte = 0
for i := 0; i < len(text); i++ {
ch := text[i]
if inQuote {
current.WriteByte(ch)
if ch == quoteChar {
if i+1 < len(text) && text[i+1] == quoteChar {
i++
} else {
inQuote = false
quoteChar = 0
}
}
} else if ch == '.' {
parts = append(parts, strings.TrimSpace(current.String()))
current.Reset()
} else if ch == '`' || ch == '"' {
inQuote = true
quoteChar = ch
current.WriteByte(ch)
} else {
current.WriteByte(ch)
}
}
if current.Len() > 0 {
parts = append(parts, strings.TrimSpace(current.String()))
}
return parts
}
// SplitIdentifiers 按空白分割标识符
func (l *Lexer) SplitIdentifiers(text string) []string {
var parts []string
var current strings.Builder
for i := 0; i < len(text); i++ {
ch := text[i]
if ch == ' ' || ch == '\t' {
if current.Len() > 0 {
parts = append(parts, current.String())
current.Reset()
}
} else {
current.WriteByte(ch)
}
}
if current.Len() > 0 {
parts = append(parts, current.String())
}
return parts
}
// ExtractColumnName 从文本中提取列名(不带表前缀)
func (l *Lexer) ExtractColumnName(text string) string {
text = strings.TrimSpace(text)
if text == "" {
return ""
}
parts := l.SplitDotParts(text)
if len(parts) >= 2 {
return l.Unquote(parts[len(parts)-1])
}
return l.Unquote(text)
}
// ExtractColumnAndAlias 从列文本中提取列名和别名
func (l *Lexer) ExtractColumnAndAlias(text string) (string, string) {
upper := strings.ToUpper(text)
if idx := strings.LastIndex(upper, " AS "); idx >= 0 {
colPart := strings.TrimSpace(text[:idx])
aliasPart := strings.TrimSpace(text[idx+4:])
return l.ExtractColumnName(colPart), aliasPart
}
parts := l.SplitIdentifiers(text)
if len(parts) >= 2 {
lastPart := parts[len(parts)-1]
beforeLast := strings.TrimSpace(text[:len(text)-len(lastPart)])
if !strings.Contains(beforeLast, ".") {
if !strings.Contains(beforeLast, "(") || strings.HasSuffix(beforeLast, ")") {
return l.ExtractColumnName(beforeLast), strings.TrimSpace(lastPart)
}
}
}
return l.ExtractColumnName(text), ""
}
// TrimTrailingComma 去除尾部逗号
func TrimTrailingComma(s string) string {
s = strings.TrimSpace(s)
if strings.HasSuffix(s, ",") {
return strings.TrimSpace(s[:len(s)-1])
}
return s
}