refactor: 移除antlr4减小包体积&ai助手优化

2026-05-18 17:05:21 +08:00 · 2026-05-08 20:45:13 +08:00
parent 3768cef62d
commit f23b243fc5
154 changed files with 13054 additions and 396804 deletions
--- a/server/internal/db/dbm/sqlparser/base/base.go
+++ b/server/internal/db/dbm/sqlparser/base/base.go
@@ -1,11 +1,5 @@
 package base

-import (
-	"fmt"
-
-	"github.com/antlr4-go/antlr/v4"
-)
-
 // SingleSQL is a separate SQL split from multi-SQL.
 type SingleSQL struct {
 	Text string
@@ -49,51 +43,6 @@ func (e *SyntaxError) Error() string {
 	return e.Message
 }

-// ParseErrorListener is a custom error listener for PLSQL parser.
-type ParseErrorListener struct {
-	BaseLine int
-	Err      *SyntaxError
-}
-
-// SyntaxError returns the errors.
-func (l *ParseErrorListener) SyntaxError(_ antlr.Recognizer, token any, line, column int, msg string, _ antlr.RecognitionException) {
-	if l.Err == nil {
-		errMessage := ""
-		if token, ok := token.(*antlr.CommonToken); ok {
-			stream := token.GetInputStream()
-			start := token.GetStart() - 40
-			if start < 0 {
-				start = 0
-			}
-			stop := token.GetStop()
-			if stop >= stream.Size() {
-				stop = stream.Size() - 1
-			}
-			errMessage = fmt.Sprintf("related text: %s", stream.GetTextFromInterval(antlr.NewInterval(start, stop)))
-		}
-		l.Err = &SyntaxError{
-			Line:    line + l.BaseLine,
-			Column:  column,
-			Message: fmt.Sprintf("Syntax error at line %d:%d \n%s", line+l.BaseLine, column, errMessage),
-		}
-	}
-}
-
-// ReportAmbiguity reports an ambiguity.
-func (*ParseErrorListener) ReportAmbiguity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, exact bool, ambigAlts *antlr.BitSet, configs *antlr.ATNConfigSet) {
-	antlr.ConsoleErrorListenerINSTANCE.ReportAmbiguity(recognizer, dfa, startIndex, stopIndex, exact, ambigAlts, configs)
-}
-
-// ReportAttemptingFullContext reports an attempting full context.
-func (*ParseErrorListener) ReportAttemptingFullContext(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex int, conflictingAlts *antlr.BitSet, configs *antlr.ATNConfigSet) {
-	antlr.ConsoleErrorListenerINSTANCE.ReportAttemptingFullContext(recognizer, dfa, startIndex, stopIndex, conflictingAlts, configs)
-}
-
-// ReportContextSensitivity reports a context sensitivity.
-func (*ParseErrorListener) ReportContextSensitivity(recognizer antlr.Parser, dfa *antlr.DFA, startIndex, stopIndex, prediction int, configs *antlr.ATNConfigSet) {
-	antlr.ConsoleErrorListenerINSTANCE.ReportContextSensitivity(recognizer, dfa, startIndex, stopIndex, prediction, configs)
-}
-
 func FilterEmptySQL(list []SingleSQL) []SingleSQL {
 	var result []SingleSQL
 	for _, sql := range list {
--- a/server/internal/db/dbm/sqlparser/base/lexer.go
+++ b/server/internal/db/dbm/sqlparser/base/lexer.go
@@ -0,0 +1,340 @@
+package base
+
+import (
+	"strings"
+
+	"mayfly-go/internal/db/dbm/sqlparser/tokenizer"
+)
+
+// Lexer 提供词法分析辅助方法，不包含方言相关逻辑
+type Lexer struct {
+	SQL    string
+	Tokens []tokenizer.Token
+	Pos    int
+	Length int
+}
+
+// NewLexer 创建基础词法分析器
+func NewLexer(sql string, cfg tokenizer.DialectConfig) *Lexer {
+	tok := tokenizer.New(sql, cfg)
+	return &Lexer{
+		SQL:    sql,
+		Tokens: tok.Tokens,
+		Pos:    0,
+		Length: len(tok.Tokens),
+	}
+}
+
+// Current 返回当前 token
+func (l *Lexer) Current() tokenizer.Token {
+	if l.Pos >= l.Length {
+		return l.Tokens[l.Length-1] // EOF
+	}
+	return l.Tokens[l.Pos]
+}
+
+// Peek 预览指定偏移位置的 token
+func (l *Lexer) Peek(offset int) tokenizer.Token {
+	idx := l.Pos + offset
+	if idx >= l.Length {
+		return l.Tokens[l.Length-1]
+	}
+	return l.Tokens[idx]
+}
+
+// Consume 消费当前 token 并前进
+func (l *Lexer) Consume() tokenizer.Token {
+	if l.Pos >= l.Length {
+		return l.Tokens[l.Length-1]
+	}
+	tok := l.Tokens[l.Pos]
+	l.Pos++
+	return tok
+}
+
+// ExpectValue 如果当前 token 值匹配则消费，否则返回当前 token
+func (l *Lexer) ExpectValue(val string) tokenizer.Token {
+	if l.Current().Value == val {
+		return l.Consume()
+	}
+	return l.Current()
+}
+
+// SkipSemicolons 跳过分号
+func (l *Lexer) SkipSemicolons() {
+	for l.Current().Value == ";" {
+		l.Consume()
+	}
+}
+
+// SkipToNextStatement 跳过到下一个语句
+func (l *Lexer) SkipToNextStatement() {
+	for !l.Current().IsEOF() && l.Current().Value != ";" {
+		l.Consume()
+	}
+	if l.Current().Value == ";" {
+		l.Consume()
+	}
+}
+
+// SkipParentheses 跳过一对括号及其内容
+func (l *Lexer) SkipParentheses() {
+	if l.Current().Value != "(" {
+		return
+	}
+	l.Consume() // (
+	depth := 1
+	for !l.Current().IsEOF() && depth > 0 {
+		if l.Current().Value == "(" {
+			depth++
+		} else if l.Current().Value == ")" {
+			depth--
+		}
+		l.Consume()
+	}
+}
+
+// SkipExpr 跳过整个表达式（用于 WHERE, HAVING 等）
+func (l *Lexer) SkipExpr() {
+	for !l.Current().IsEOF() {
+		tok := l.Current()
+		if tok.Value == "(" {
+			l.SkipParentheses()
+			continue
+		}
+		if tok.Value == "," || l.IsExprEnd() {
+			break
+		}
+		l.Consume()
+	}
+}
+
+// SkipGroupByExpr 跳过 GROUP BY 表达式（允许逗号分隔）
+func (l *Lexer) SkipGroupByExpr() {
+	for !l.Current().IsEOF() {
+		tok := l.Current()
+		if tok.Value == "(" {
+			l.SkipParentheses()
+			continue
+		}
+		if l.IsExprEnd() || tok.Value == ";" {
+			break
+		}
+		l.Consume()
+	}
+}
+
+// SkipOrderByExpr 跳过 ORDER BY 表达式
+func (l *Lexer) SkipOrderByExpr() {
+	for !l.Current().IsEOF() {
+		if l.Current().Value == "," {
+			l.Consume()
+			continue
+		}
+		if l.Current().IsKeyword("ASC", "DESC") {
+			l.Consume()
+			continue
+		}
+		if l.IsExprEnd() || l.Current().Value == ";" {
+			break
+		}
+		if l.Current().Value == "(" {
+			l.SkipParentheses()
+			continue
+		}
+		l.Consume()
+	}
+}
+
+// IsExprEnd 判断表达式是否结束
+func (l *Lexer) IsExprEnd() bool {
+	tok := l.Current()
+	return tok.IsKeyword("FROM", "WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET",
+		"UNION", "INTO", "SET", "VALUES", "ON", "USING", "FOR", "RETURNING",
+		"LEFT", "RIGHT", "INNER", "OUTER", "CROSS", "NATURAL", "FULL", "JOIN") ||
+		tok.Value == ";" || tok.Value == ")"
+}
+
+// IsFromClauseEnd 判断 FROM 子句是否结束
+func (l *Lexer) IsFromClauseEnd() bool {
+	tok := l.Current()
+	return tok.IsKeyword("WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET",
+		"UNION", "INTO", "FOR", "RETURNING") || tok.Value == ";" || tok.Value == ")"
+}
+
+// IsSelectClauseEnd 判断是否到达 SELECT 子句末尾
+func (l *Lexer) IsSelectClauseEnd() bool {
+	tok := l.Current()
+	return tok.IsKeyword("FROM", "WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET", "UNION", "INTO", "FOR") ||
+		tok.Value == ";" || tok.Value == ")"
+}
+
+// IsJoinStart 判断是否为 JOIN 起始
+func (l *Lexer) IsJoinStart() bool {
+	tok := l.Current()
+	return tok.IsKeyword("LEFT", "RIGHT", "INNER", "OUTER", "NATURAL", "CROSS", "FULL", "STRAIGHT_JOIN")
+}
+
+// TextFrom 返回从 start 到当前位置的原始 SQL 文本
+func (l *Lexer) TextFrom(start int) string {
+	if start >= l.Length {
+		return ""
+	}
+	end := l.Pos
+	if end >= l.Length {
+		end = l.Length - 1
+	}
+	if end < start {
+		end = start
+	}
+	startTok := l.Tokens[start]
+	endTok := l.Tokens[end]
+	if endTok.Type == tokenizer.TokenEOF && end > 0 {
+		endTok = l.Tokens[end-1]
+	}
+	if endTok.End <= startTok.Pos {
+		return ""
+	}
+	return l.SQL[startTok.Pos:endTok.End]
+}
+
+// TextFromExclusive 返回从 start 到当前位置之前（不包含当前 token）的原始 SQL 文本
+func (l *Lexer) TextFromExclusive(start int) string {
+	if start >= l.Length {
+		return ""
+	}
+	end := l.Pos - 1
+	if end < start {
+		end = start
+	}
+	startTok := l.Tokens[start]
+	endTok := l.Tokens[end]
+	if endTok.Type == tokenizer.TokenEOF && end > 0 {
+		endTok = l.Tokens[end-1]
+	}
+	if endTok.End <= startTok.Pos {
+		return ""
+	}
+	return l.SQL[startTok.Pos:endTok.End]
+}
+
+// ParseInt 解析整数，忽略错误
+func (l *Lexer) ParseInt(s string) int {
+	n := 0
+	for _, ch := range s {
+		if ch >= '0' && ch <= '9' {
+			n = n*10 + int(ch-'0')
+		}
+	}
+	return n
+}
+
+// Unquote 去除标识符引号
+func (l *Lexer) Unquote(s string) string {
+	if len(s) >= 2 {
+		if (s[0] == '`' && s[len(s)-1] == '`') ||
+			(s[0] == '"' && s[len(s)-1] == '"') {
+			return s[1 : len(s)-1]
+		}
+	}
+	return s
+}
+
+// SplitDotParts 按点分割标识符，考虑引号
+func (l *Lexer) SplitDotParts(text string) []string {
+	var parts []string
+	var current strings.Builder
+	inQuote := false
+	var quoteChar byte = 0
+	for i := 0; i < len(text); i++ {
+		ch := text[i]
+		if inQuote {
+			current.WriteByte(ch)
+			if ch == quoteChar {
+				if i+1 < len(text) && text[i+1] == quoteChar {
+					i++
+				} else {
+					inQuote = false
+					quoteChar = 0
+				}
+			}
+		} else if ch == '.' {
+			parts = append(parts, strings.TrimSpace(current.String()))
+			current.Reset()
+		} else if ch == '`' || ch == '"' {
+			inQuote = true
+			quoteChar = ch
+			current.WriteByte(ch)
+		} else {
+			current.WriteByte(ch)
+		}
+	}
+	if current.Len() > 0 {
+		parts = append(parts, strings.TrimSpace(current.String()))
+	}
+	return parts
+}
+
+// SplitIdentifiers 按空白分割标识符
+func (l *Lexer) SplitIdentifiers(text string) []string {
+	var parts []string
+	var current strings.Builder
+	for i := 0; i < len(text); i++ {
+		ch := text[i]
+		if ch == ' ' || ch == '\t' {
+			if current.Len() > 0 {
+				parts = append(parts, current.String())
+				current.Reset()
+			}
+		} else {
+			current.WriteByte(ch)
+		}
+	}
+	if current.Len() > 0 {
+		parts = append(parts, current.String())
+	}
+	return parts
+}
+
+// ExtractColumnName 从文本中提取列名（不带表前缀）
+func (l *Lexer) ExtractColumnName(text string) string {
+	text = strings.TrimSpace(text)
+	if text == "" {
+		return ""
+	}
+	parts := l.SplitDotParts(text)
+	if len(parts) >= 2 {
+		return l.Unquote(parts[len(parts)-1])
+	}
+	return l.Unquote(text)
+}
+
+// ExtractColumnAndAlias 从列文本中提取列名和别名
+func (l *Lexer) ExtractColumnAndAlias(text string) (string, string) {
+	upper := strings.ToUpper(text)
+	if idx := strings.LastIndex(upper, " AS "); idx >= 0 {
+		colPart := strings.TrimSpace(text[:idx])
+		aliasPart := strings.TrimSpace(text[idx+4:])
+		return l.ExtractColumnName(colPart), aliasPart
+	}
+	parts := l.SplitIdentifiers(text)
+	if len(parts) >= 2 {
+		lastPart := parts[len(parts)-1]
+		beforeLast := strings.TrimSpace(text[:len(text)-len(lastPart)])
+		if !strings.Contains(beforeLast, ".") {
+			if !strings.Contains(beforeLast, "(") || strings.HasSuffix(beforeLast, ")") {
+				return l.ExtractColumnName(beforeLast), strings.TrimSpace(lastPart)
+			}
+		}
+	}
+	return l.ExtractColumnName(text), ""
+}
+
+// TrimTrailingComma 去除尾部逗号
+func TrimTrailingComma(s string) string {
+	s = strings.TrimSpace(s)
+	if strings.HasSuffix(s, ",") {
+		return strings.TrimSpace(s[:len(s)-1])
+	}
+	return s
+}