mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	Add Tabular Diff for CSV files (#14661)
Implements request #14320 The rendering of CSV files does match the diff style. * Moved CSV logic into base package. * Added method to create a tabular diff. * Added CSV compare context. * Added CSV diff template. * Use new table style in CSV markup. * Added file size limit for CSV rendering. * Display CSV parser errors in diff. * Lazy read single file. * Lazy read rows for full diff. * Added unit tests for various CSV changes.
This commit is contained in:
		@@ -6,24 +6,20 @@ package markup
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"encoding/csv"
 | 
			
		||||
	"html"
 | 
			
		||||
	"io"
 | 
			
		||||
	"regexp"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"strconv"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/csv"
 | 
			
		||||
	"code.gitea.io/gitea/modules/markup"
 | 
			
		||||
	"code.gitea.io/gitea/modules/util"
 | 
			
		||||
	"code.gitea.io/gitea/modules/setting"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
 | 
			
		||||
 | 
			
		||||
func init() {
 | 
			
		||||
	markup.RegisterParser(Parser{})
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Parser implements markup.Parser for orgmode
 | 
			
		||||
// Parser implements markup.Parser for csv files
 | 
			
		||||
type Parser struct {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -38,11 +34,35 @@ func (Parser) Extensions() []string {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Render implements markup.Parser
 | 
			
		||||
func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte {
 | 
			
		||||
	rd := csv.NewReader(bytes.NewReader(rawBytes))
 | 
			
		||||
	rd.Comma = p.bestDelimiter(rawBytes)
 | 
			
		||||
func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte {
 | 
			
		||||
	var tmpBlock bytes.Buffer
 | 
			
		||||
	tmpBlock.WriteString(`<table class="table">`)
 | 
			
		||||
 | 
			
		||||
	if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) {
 | 
			
		||||
		tmpBlock.WriteString("<pre>")
 | 
			
		||||
		tmpBlock.WriteString(html.EscapeString(string(rawBytes)))
 | 
			
		||||
		tmpBlock.WriteString("</pre>")
 | 
			
		||||
		return tmpBlock.Bytes()
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rd := csv.CreateReaderAndGuessDelimiter(rawBytes)
 | 
			
		||||
 | 
			
		||||
	writeField := func(element, class, field string) {
 | 
			
		||||
		tmpBlock.WriteString("<")
 | 
			
		||||
		tmpBlock.WriteString(element)
 | 
			
		||||
		if len(class) > 0 {
 | 
			
		||||
			tmpBlock.WriteString(" class=\"")
 | 
			
		||||
			tmpBlock.WriteString(class)
 | 
			
		||||
			tmpBlock.WriteString("\"")
 | 
			
		||||
		}
 | 
			
		||||
		tmpBlock.WriteString(">")
 | 
			
		||||
		tmpBlock.WriteString(html.EscapeString(field))
 | 
			
		||||
		tmpBlock.WriteString("</")
 | 
			
		||||
		tmpBlock.WriteString(element)
 | 
			
		||||
		tmpBlock.WriteString(">")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	tmpBlock.WriteString(`<table class="data-table">`)
 | 
			
		||||
	row := 1
 | 
			
		||||
	for {
 | 
			
		||||
		fields, err := rd.Read()
 | 
			
		||||
		if err == io.EOF {
 | 
			
		||||
@@ -52,62 +72,19 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		tmpBlock.WriteString("<tr>")
 | 
			
		||||
		element := "td"
 | 
			
		||||
		if row == 1 {
 | 
			
		||||
			element = "th"
 | 
			
		||||
		}
 | 
			
		||||
		writeField(element, "line-num", strconv.Itoa(row))
 | 
			
		||||
		for _, field := range fields {
 | 
			
		||||
			tmpBlock.WriteString("<td>")
 | 
			
		||||
			tmpBlock.WriteString(html.EscapeString(field))
 | 
			
		||||
			tmpBlock.WriteString("</td>")
 | 
			
		||||
			writeField(element, "", field)
 | 
			
		||||
		}
 | 
			
		||||
		tmpBlock.WriteString("</tr>")
 | 
			
		||||
 | 
			
		||||
		row++
 | 
			
		||||
	}
 | 
			
		||||
	tmpBlock.WriteString("</table>")
 | 
			
		||||
 | 
			
		||||
	return tmpBlock.Bytes()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// bestDelimiter scores the input CSV data against delimiters, and returns the best match.
 | 
			
		||||
// Reads at most 10k bytes & 10 lines.
 | 
			
		||||
func (p Parser) bestDelimiter(data []byte) rune {
 | 
			
		||||
	maxLines := 10
 | 
			
		||||
	maxBytes := util.Min(len(data), 1e4)
 | 
			
		||||
	text := string(data[:maxBytes])
 | 
			
		||||
	text = quoteRegexp.ReplaceAllLiteralString(text, "")
 | 
			
		||||
	lines := strings.SplitN(text, "\n", maxLines+1)
 | 
			
		||||
	lines = lines[:util.Min(maxLines, len(lines))]
 | 
			
		||||
 | 
			
		||||
	delimiters := []rune{',', ';', '\t', '|'}
 | 
			
		||||
	bestDelim := delimiters[0]
 | 
			
		||||
	bestScore := 0.0
 | 
			
		||||
	for _, delim := range delimiters {
 | 
			
		||||
		score := p.scoreDelimiter(lines, delim)
 | 
			
		||||
		if score > bestScore {
 | 
			
		||||
			bestScore = score
 | 
			
		||||
			bestDelim = delim
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return bestDelim
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV
 | 
			
		||||
func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) {
 | 
			
		||||
	countTotal := 0
 | 
			
		||||
	countLineMax := 0
 | 
			
		||||
	linesNotEqual := 0
 | 
			
		||||
 | 
			
		||||
	for _, line := range lines {
 | 
			
		||||
		if len(line) == 0 {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		countLine := strings.Count(line, string(delim))
 | 
			
		||||
		countTotal += countLine
 | 
			
		||||
		if countLine != countLineMax {
 | 
			
		||||
			if countLineMax != 0 {
 | 
			
		||||
				linesNotEqual++
 | 
			
		||||
			}
 | 
			
		||||
			countLineMax = util.Max(countLine, countLineMax)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines)))
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -13,14 +13,10 @@ import (
 | 
			
		||||
func TestRenderCSV(t *testing.T) {
 | 
			
		||||
	var parser Parser
 | 
			
		||||
	var kases = map[string]string{
 | 
			
		||||
		"a":                         "<table class=\"table\"><tr><td>a</td></tr></table>",
 | 
			
		||||
		"1,2":                       "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
 | 
			
		||||
		"1;2":                       "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
 | 
			
		||||
		"1\t2":                      "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
 | 
			
		||||
		"1|2":                       "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
 | 
			
		||||
		"1,2,3;4,5,6;7,8,9\na;b;c":  "<table class=\"table\"><tr><td>1,2,3</td><td>4,5,6</td><td>7,8,9</td></tr><tr><td>a</td><td>b</td><td>c</td></tr></table>",
 | 
			
		||||
		"\"1,2,3,4\";\"a\nb\"\nc;d": "<table class=\"table\"><tr><td>1,2,3,4</td><td>a\nb</td></tr><tr><td>c</td><td>d</td></tr></table>",
 | 
			
		||||
		"<br/>":                     "<table class=\"table\"><tr><td><br/></td></tr></table>",
 | 
			
		||||
		"a":        "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>a</th></tr></table>",
 | 
			
		||||
		"1,2":      "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>1</th><th>2</th></tr></table>",
 | 
			
		||||
		"1;2\n3;4": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>1</th><th>2</th></tr><tr><td class=\"line-num\">2</td><td>3</td><td>4</td></tr></table>",
 | 
			
		||||
		"<br/>":    "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th><br/></th></tr></table>",
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for k, v := range kases {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user