mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Convert EOL to UNIX-style to render MD properly (#8925)
* Convert EOL to UNIX-style to render MD properly * Update modules/markup/markdown/markdown.go Co-Authored-By: zeripath <art27@cantab.net> * Fix lint optimization * Check for empty content before conversion * Update modules/util/util.go Co-Authored-By: zeripath <art27@cantab.net> * Improved checks and tests * Add paragraph render test * Improve speed even more, improve tests * Small improvement by @gary-kim * Fix test for DOS * More improvements * Restart CI
This commit is contained in:
		
				
					committed by
					
						
						Antoine GIRARD
					
				
			
			
				
	
			
			
			
						parent
						
							cda8de2004
						
					
				
				
					commit
					7b97e04555
				
			@@ -157,7 +157,8 @@ func RenderRaw(body []byte, urlPrefix string, wikiMarkdown bool) []byte {
 | 
			
		||||
		exts |= blackfriday.HardLineBreak
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	body = blackfriday.Run(body, blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts))
 | 
			
		||||
	// Need to normalize EOL to UNIX LF to have consistent results in rendering
 | 
			
		||||
	body = blackfriday.Run(util.NormalizeEOL(body), blackfriday.WithRenderer(renderer), blackfriday.WithExtensions(exts))
 | 
			
		||||
	return markup.SanitizeBytes(body)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -294,3 +294,25 @@ func TestTotal_RenderString(t *testing.T) {
 | 
			
		||||
		assert.Equal(t, testCases[i+1], line)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestRender_RenderParagraphs(t *testing.T) {
 | 
			
		||||
	test := func(t *testing.T, str string, cnt int) {
 | 
			
		||||
		unix := []byte(str)
 | 
			
		||||
		res := string(RenderRaw(unix, "", false))
 | 
			
		||||
		assert.Equal(t, strings.Count(res, "<p"), cnt)
 | 
			
		||||
 | 
			
		||||
		mac := []byte(strings.ReplaceAll(str, "\n", "\r"))
 | 
			
		||||
		res = string(RenderRaw(mac, "", false))
 | 
			
		||||
		assert.Equal(t, strings.Count(res, "<p"), cnt)
 | 
			
		||||
 | 
			
		||||
		dos := []byte(strings.ReplaceAll(str, "\n", "\r\n"))
 | 
			
		||||
		res = string(RenderRaw(dos, "", false))
 | 
			
		||||
		assert.Equal(t, strings.Count(res, "<p"), cnt)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	test(t, "\nOne\nTwo\nThree", 1)
 | 
			
		||||
	test(t, "\n\nOne\nTwo\nThree", 1)
 | 
			
		||||
	test(t, "\n\nOne\nTwo\nThree\n\n\n", 1)
 | 
			
		||||
	test(t, "A\n\nB\nC\n", 2)
 | 
			
		||||
	test(t, "A\n\n\nB\nC\n", 2)
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@
 | 
			
		||||
package util
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"strings"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -63,3 +64,39 @@ func Min(a, b int) int {
 | 
			
		||||
func IsEmptyString(s string) bool {
 | 
			
		||||
	return len(strings.TrimSpace(s)) == 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NormalizeEOL will convert Windows (CRLF) and Mac (CR) EOLs to UNIX (LF)
 | 
			
		||||
func NormalizeEOL(input []byte) []byte {
 | 
			
		||||
	var right, left, pos int
 | 
			
		||||
	if right = bytes.IndexByte(input, '\r'); right == -1 {
 | 
			
		||||
		return input
 | 
			
		||||
	}
 | 
			
		||||
	length := len(input)
 | 
			
		||||
	tmp := make([]byte, length)
 | 
			
		||||
 | 
			
		||||
	// We know that left < length because otherwise right would be -1 from IndexByte.
 | 
			
		||||
	copy(tmp[pos:pos+right], input[left:left+right])
 | 
			
		||||
	pos += right
 | 
			
		||||
	tmp[pos] = '\n'
 | 
			
		||||
	left += right + 1
 | 
			
		||||
	pos++
 | 
			
		||||
 | 
			
		||||
	for left < length {
 | 
			
		||||
		if input[left] == '\n' {
 | 
			
		||||
			left++
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		right = bytes.IndexByte(input[left:], '\r')
 | 
			
		||||
		if right == -1 {
 | 
			
		||||
			copy(tmp[pos:], input[left:])
 | 
			
		||||
			pos += length - left
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		copy(tmp[pos:pos+right], input[left:left+right])
 | 
			
		||||
		pos += right
 | 
			
		||||
		tmp[pos] = '\n'
 | 
			
		||||
		left += right + 1
 | 
			
		||||
		pos++
 | 
			
		||||
	}
 | 
			
		||||
	return tmp[:pos]
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@
 | 
			
		||||
package util
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"strings"
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/setting"
 | 
			
		||||
@@ -94,3 +95,61 @@ func TestIsEmptyString(t *testing.T) {
 | 
			
		||||
		assert.Equal(t, v.expected, IsEmptyString(v.s))
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func Test_NormalizeEOL(t *testing.T) {
 | 
			
		||||
	data1 := []string{
 | 
			
		||||
		"",
 | 
			
		||||
		"This text starts with empty lines",
 | 
			
		||||
		"another",
 | 
			
		||||
		"",
 | 
			
		||||
		"",
 | 
			
		||||
		"",
 | 
			
		||||
		"Some other empty lines in the middle",
 | 
			
		||||
		"more.",
 | 
			
		||||
		"And more.",
 | 
			
		||||
		"Ends with empty lines too.",
 | 
			
		||||
		"",
 | 
			
		||||
		"",
 | 
			
		||||
		"",
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	data2 := []string{
 | 
			
		||||
		"This text does not start with empty lines",
 | 
			
		||||
		"another",
 | 
			
		||||
		"",
 | 
			
		||||
		"",
 | 
			
		||||
		"",
 | 
			
		||||
		"Some other empty lines in the middle",
 | 
			
		||||
		"more.",
 | 
			
		||||
		"And more.",
 | 
			
		||||
		"Ends without EOLtoo.",
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	buildEOLData := func(data []string, eol string) []byte {
 | 
			
		||||
		return []byte(strings.Join(data, eol))
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dos := buildEOLData(data1, "\r\n")
 | 
			
		||||
	unix := buildEOLData(data1, "\n")
 | 
			
		||||
	mac := buildEOLData(data1, "\r")
 | 
			
		||||
 | 
			
		||||
	assert.Equal(t, unix, NormalizeEOL(dos))
 | 
			
		||||
	assert.Equal(t, unix, NormalizeEOL(mac))
 | 
			
		||||
	assert.Equal(t, unix, NormalizeEOL(unix))
 | 
			
		||||
 | 
			
		||||
	dos = buildEOLData(data2, "\r\n")
 | 
			
		||||
	unix = buildEOLData(data2, "\n")
 | 
			
		||||
	mac = buildEOLData(data2, "\r")
 | 
			
		||||
 | 
			
		||||
	assert.Equal(t, unix, NormalizeEOL(dos))
 | 
			
		||||
	assert.Equal(t, unix, NormalizeEOL(mac))
 | 
			
		||||
	assert.Equal(t, unix, NormalizeEOL(unix))
 | 
			
		||||
 | 
			
		||||
	assert.Equal(t, []byte("one liner"), NormalizeEOL([]byte("one liner")))
 | 
			
		||||
	assert.Equal(t, []byte("\n"), NormalizeEOL([]byte("\n")))
 | 
			
		||||
	assert.Equal(t, []byte("\ntwo liner"), NormalizeEOL([]byte("\ntwo liner")))
 | 
			
		||||
	assert.Equal(t, []byte("two liner\n"), NormalizeEOL([]byte("two liner\n")))
 | 
			
		||||
	assert.Equal(t, []byte{}, NormalizeEOL([]byte{}))
 | 
			
		||||
 | 
			
		||||
	assert.Equal(t, []byte("mix\nand\nmatch\n."), NormalizeEOL([]byte("mix\r\nand\rmatch\n.")))
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user