Files
mayfly-go/server/internal/db/dbm/sqlparser/base/lexer.go
2026-05-08 20:45:13 +08:00

341 lines
7.7 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package base
import (
"strings"
"mayfly-go/internal/db/dbm/sqlparser/tokenizer"
)
// Lexer 提供词法分析辅助方法,不包含方言相关逻辑
type Lexer struct {
SQL string
Tokens []tokenizer.Token
Pos int
Length int
}
// NewLexer 创建基础词法分析器
func NewLexer(sql string, cfg tokenizer.DialectConfig) *Lexer {
tok := tokenizer.New(sql, cfg)
return &Lexer{
SQL: sql,
Tokens: tok.Tokens,
Pos: 0,
Length: len(tok.Tokens),
}
}
// Current 返回当前 token
func (l *Lexer) Current() tokenizer.Token {
if l.Pos >= l.Length {
return l.Tokens[l.Length-1] // EOF
}
return l.Tokens[l.Pos]
}
// Peek 预览指定偏移位置的 token
func (l *Lexer) Peek(offset int) tokenizer.Token {
idx := l.Pos + offset
if idx >= l.Length {
return l.Tokens[l.Length-1]
}
return l.Tokens[idx]
}
// Consume 消费当前 token 并前进
func (l *Lexer) Consume() tokenizer.Token {
if l.Pos >= l.Length {
return l.Tokens[l.Length-1]
}
tok := l.Tokens[l.Pos]
l.Pos++
return tok
}
// ExpectValue 如果当前 token 值匹配则消费,否则返回当前 token
func (l *Lexer) ExpectValue(val string) tokenizer.Token {
if l.Current().Value == val {
return l.Consume()
}
return l.Current()
}
// SkipSemicolons 跳过分号
func (l *Lexer) SkipSemicolons() {
for l.Current().Value == ";" {
l.Consume()
}
}
// SkipToNextStatement 跳过到下一个语句
func (l *Lexer) SkipToNextStatement() {
for !l.Current().IsEOF() && l.Current().Value != ";" {
l.Consume()
}
if l.Current().Value == ";" {
l.Consume()
}
}
// SkipParentheses 跳过一对括号及其内容
func (l *Lexer) SkipParentheses() {
if l.Current().Value != "(" {
return
}
l.Consume() // (
depth := 1
for !l.Current().IsEOF() && depth > 0 {
if l.Current().Value == "(" {
depth++
} else if l.Current().Value == ")" {
depth--
}
l.Consume()
}
}
// SkipExpr 跳过整个表达式(用于 WHERE, HAVING 等)
func (l *Lexer) SkipExpr() {
for !l.Current().IsEOF() {
tok := l.Current()
if tok.Value == "(" {
l.SkipParentheses()
continue
}
if tok.Value == "," || l.IsExprEnd() {
break
}
l.Consume()
}
}
// SkipGroupByExpr 跳过 GROUP BY 表达式(允许逗号分隔)
func (l *Lexer) SkipGroupByExpr() {
for !l.Current().IsEOF() {
tok := l.Current()
if tok.Value == "(" {
l.SkipParentheses()
continue
}
if l.IsExprEnd() || tok.Value == ";" {
break
}
l.Consume()
}
}
// SkipOrderByExpr 跳过 ORDER BY 表达式
func (l *Lexer) SkipOrderByExpr() {
for !l.Current().IsEOF() {
if l.Current().Value == "," {
l.Consume()
continue
}
if l.Current().IsKeyword("ASC", "DESC") {
l.Consume()
continue
}
if l.IsExprEnd() || l.Current().Value == ";" {
break
}
if l.Current().Value == "(" {
l.SkipParentheses()
continue
}
l.Consume()
}
}
// IsExprEnd 判断表达式是否结束
func (l *Lexer) IsExprEnd() bool {
tok := l.Current()
return tok.IsKeyword("FROM", "WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET",
"UNION", "INTO", "SET", "VALUES", "ON", "USING", "FOR", "RETURNING",
"LEFT", "RIGHT", "INNER", "OUTER", "CROSS", "NATURAL", "FULL", "JOIN") ||
tok.Value == ";" || tok.Value == ")"
}
// IsFromClauseEnd 判断 FROM 子句是否结束
func (l *Lexer) IsFromClauseEnd() bool {
tok := l.Current()
return tok.IsKeyword("WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET",
"UNION", "INTO", "FOR", "RETURNING") || tok.Value == ";" || tok.Value == ")"
}
// IsSelectClauseEnd 判断是否到达 SELECT 子句末尾
func (l *Lexer) IsSelectClauseEnd() bool {
tok := l.Current()
return tok.IsKeyword("FROM", "WHERE", "GROUP", "HAVING", "ORDER", "LIMIT", "OFFSET", "UNION", "INTO", "FOR") ||
tok.Value == ";" || tok.Value == ")"
}
// IsJoinStart 判断是否为 JOIN 起始
func (l *Lexer) IsJoinStart() bool {
tok := l.Current()
return tok.IsKeyword("LEFT", "RIGHT", "INNER", "OUTER", "NATURAL", "CROSS", "FULL", "STRAIGHT_JOIN")
}
// TextFrom 返回从 start 到当前位置的原始 SQL 文本
func (l *Lexer) TextFrom(start int) string {
if start >= l.Length {
return ""
}
end := l.Pos
if end >= l.Length {
end = l.Length - 1
}
if end < start {
end = start
}
startTok := l.Tokens[start]
endTok := l.Tokens[end]
if endTok.Type == tokenizer.TokenEOF && end > 0 {
endTok = l.Tokens[end-1]
}
if endTok.End <= startTok.Pos {
return ""
}
return l.SQL[startTok.Pos:endTok.End]
}
// TextFromExclusive 返回从 start 到当前位置之前(不包含当前 token的原始 SQL 文本
func (l *Lexer) TextFromExclusive(start int) string {
if start >= l.Length {
return ""
}
end := l.Pos - 1
if end < start {
end = start
}
startTok := l.Tokens[start]
endTok := l.Tokens[end]
if endTok.Type == tokenizer.TokenEOF && end > 0 {
endTok = l.Tokens[end-1]
}
if endTok.End <= startTok.Pos {
return ""
}
return l.SQL[startTok.Pos:endTok.End]
}
// ParseInt 解析整数,忽略错误
func (l *Lexer) ParseInt(s string) int {
n := 0
for _, ch := range s {
if ch >= '0' && ch <= '9' {
n = n*10 + int(ch-'0')
}
}
return n
}
// Unquote 去除标识符引号
func (l *Lexer) Unquote(s string) string {
if len(s) >= 2 {
if (s[0] == '`' && s[len(s)-1] == '`') ||
(s[0] == '"' && s[len(s)-1] == '"') {
return s[1 : len(s)-1]
}
}
return s
}
// SplitDotParts 按点分割标识符,考虑引号
func (l *Lexer) SplitDotParts(text string) []string {
var parts []string
var current strings.Builder
inQuote := false
var quoteChar byte = 0
for i := 0; i < len(text); i++ {
ch := text[i]
if inQuote {
current.WriteByte(ch)
if ch == quoteChar {
if i+1 < len(text) && text[i+1] == quoteChar {
i++
} else {
inQuote = false
quoteChar = 0
}
}
} else if ch == '.' {
parts = append(parts, strings.TrimSpace(current.String()))
current.Reset()
} else if ch == '`' || ch == '"' {
inQuote = true
quoteChar = ch
current.WriteByte(ch)
} else {
current.WriteByte(ch)
}
}
if current.Len() > 0 {
parts = append(parts, strings.TrimSpace(current.String()))
}
return parts
}
// SplitIdentifiers 按空白分割标识符
func (l *Lexer) SplitIdentifiers(text string) []string {
var parts []string
var current strings.Builder
for i := 0; i < len(text); i++ {
ch := text[i]
if ch == ' ' || ch == '\t' {
if current.Len() > 0 {
parts = append(parts, current.String())
current.Reset()
}
} else {
current.WriteByte(ch)
}
}
if current.Len() > 0 {
parts = append(parts, current.String())
}
return parts
}
// ExtractColumnName 从文本中提取列名(不带表前缀)
func (l *Lexer) ExtractColumnName(text string) string {
text = strings.TrimSpace(text)
if text == "" {
return ""
}
parts := l.SplitDotParts(text)
if len(parts) >= 2 {
return l.Unquote(parts[len(parts)-1])
}
return l.Unquote(text)
}
// ExtractColumnAndAlias 从列文本中提取列名和别名
func (l *Lexer) ExtractColumnAndAlias(text string) (string, string) {
upper := strings.ToUpper(text)
if idx := strings.LastIndex(upper, " AS "); idx >= 0 {
colPart := strings.TrimSpace(text[:idx])
aliasPart := strings.TrimSpace(text[idx+4:])
return l.ExtractColumnName(colPart), aliasPart
}
parts := l.SplitIdentifiers(text)
if len(parts) >= 2 {
lastPart := parts[len(parts)-1]
beforeLast := strings.TrimSpace(text[:len(text)-len(lastPart)])
if !strings.Contains(beforeLast, ".") {
if !strings.Contains(beforeLast, "(") || strings.HasSuffix(beforeLast, ")") {
return l.ExtractColumnName(beforeLast), strings.TrimSpace(lastPart)
}
}
}
return l.ExtractColumnName(text), ""
}
// TrimTrailingComma 去除尾部逗号
func TrimTrailingComma(s string) string {
s = strings.TrimSpace(s)
if strings.HasSuffix(s, ",") {
return strings.TrimSpace(s[:len(s)-1])
}
return s
}