mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	Fix several render issues (#14986)
* Fix an issue with panics related to attributes * Wrap goldmark render in a recovery function * Reduce memory use in render emoji * Use a pipe for rendering goldmark - still needs more work and a limiter Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Lauris BH <lauris@nix.lv>
This commit is contained in:
		@@ -30,6 +30,9 @@ var (
 | 
			
		||||
	// aliasMap provides a map of the alias to its emoji data.
 | 
			
		||||
	aliasMap map[string]int
 | 
			
		||||
 | 
			
		||||
	// emptyReplacer is the string replacer for emoji codes.
 | 
			
		||||
	emptyReplacer *strings.Replacer
 | 
			
		||||
 | 
			
		||||
	// codeReplacer is the string replacer for emoji codes.
 | 
			
		||||
	codeReplacer *strings.Replacer
 | 
			
		||||
 | 
			
		||||
@@ -49,6 +52,7 @@ func loadMap() {
 | 
			
		||||
 | 
			
		||||
		// process emoji codes and aliases
 | 
			
		||||
		codePairs := make([]string, 0)
 | 
			
		||||
		emptyPairs := make([]string, 0)
 | 
			
		||||
		aliasPairs := make([]string, 0)
 | 
			
		||||
 | 
			
		||||
		// sort from largest to small so we match combined emoji first
 | 
			
		||||
@@ -64,6 +68,7 @@ func loadMap() {
 | 
			
		||||
			// setup codes
 | 
			
		||||
			codeMap[e.Emoji] = i
 | 
			
		||||
			codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
 | 
			
		||||
			emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
 | 
			
		||||
 | 
			
		||||
			// setup aliases
 | 
			
		||||
			for _, a := range e.Aliases {
 | 
			
		||||
@@ -77,6 +82,7 @@ func loadMap() {
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// create replacers
 | 
			
		||||
		emptyReplacer = strings.NewReplacer(emptyPairs...)
 | 
			
		||||
		codeReplacer = strings.NewReplacer(codePairs...)
 | 
			
		||||
		aliasReplacer = strings.NewReplacer(aliasPairs...)
 | 
			
		||||
	})
 | 
			
		||||
@@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
 | 
			
		||||
	return aliasReplacer.Replace(s)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type rememberSecondWriteWriter struct {
 | 
			
		||||
	pos        int
 | 
			
		||||
	idx        int
 | 
			
		||||
	end        int
 | 
			
		||||
	writecount int
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
 | 
			
		||||
	n.writecount++
 | 
			
		||||
	if n.writecount == 2 {
 | 
			
		||||
		n.idx = n.pos
 | 
			
		||||
		n.end = n.pos + len(p)
 | 
			
		||||
	}
 | 
			
		||||
	n.pos += len(p)
 | 
			
		||||
	return len(p), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
 | 
			
		||||
	n.writecount++
 | 
			
		||||
	if n.writecount == 2 {
 | 
			
		||||
		n.idx = n.pos
 | 
			
		||||
		n.end = n.pos + len(s)
 | 
			
		||||
	}
 | 
			
		||||
	n.pos += len(s)
 | 
			
		||||
	return len(s), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
 | 
			
		||||
func FindEmojiSubmatchIndex(s string) []int {
 | 
			
		||||
	loadMap()
 | 
			
		||||
	found := make(map[int]int)
 | 
			
		||||
	keys := make([]int, 0)
 | 
			
		||||
	secondWriteWriter := rememberSecondWriteWriter{}
 | 
			
		||||
 | 
			
		||||
	//see if there are any emoji in string before looking for position of specific ones
 | 
			
		||||
	//no performance difference when there is a match but 10x faster when there are not
 | 
			
		||||
	if s == ReplaceCodes(s) {
 | 
			
		||||
	// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
 | 
			
		||||
	// we can be lazy here.
 | 
			
		||||
	//
 | 
			
		||||
	// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
 | 
			
		||||
	// submatch is simply the second thing that is written to WriteString in the writer.
 | 
			
		||||
	//
 | 
			
		||||
	// Therefore we can simply take the index of the second write as our first emoji
 | 
			
		||||
	//
 | 
			
		||||
	// FIXME: just copy the trie implementation from strings.NewReplacer
 | 
			
		||||
	_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
 | 
			
		||||
 | 
			
		||||
	// if we wrote less than twice then we never "replaced"
 | 
			
		||||
	if secondWriteWriter.writecount < 2 {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// get index of first emoji occurrence while also checking for longest combination
 | 
			
		||||
	for j := range GemojiData {
 | 
			
		||||
		i := strings.Index(s, GemojiData[j].Emoji)
 | 
			
		||||
		if i != -1 {
 | 
			
		||||
			if _, ok := found[i]; !ok {
 | 
			
		||||
				if len(keys) == 0 || i < keys[0] {
 | 
			
		||||
					found[i] = j
 | 
			
		||||
					keys = []int{i}
 | 
			
		||||
				}
 | 
			
		||||
				if i == 0 {
 | 
			
		||||
					break
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(keys) > 0 {
 | 
			
		||||
		index := keys[0]
 | 
			
		||||
		return []int{index, index + len(GemojiData[found[index]].Emoji)}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return nil
 | 
			
		||||
	return []int{secondWriteWriter.idx, secondWriteWriter.end}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,8 @@ package emoji
 | 
			
		||||
import (
 | 
			
		||||
	"reflect"
 | 
			
		||||
	"testing"
 | 
			
		||||
 | 
			
		||||
	"github.com/stretchr/testify/assert"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func TestDumpInfo(t *testing.T) {
 | 
			
		||||
@@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestFindEmojiSubmatchIndex(t *testing.T) {
 | 
			
		||||
	type testcase struct {
 | 
			
		||||
		teststring string
 | 
			
		||||
		expected   []int
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	testcases := []testcase{
 | 
			
		||||
		{
 | 
			
		||||
			"\U0001f44d",
 | 
			
		||||
			[]int{0, len("\U0001f44d")},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			"\U0001f44d +1 \U0001f44d \U0001f37a",
 | 
			
		||||
			[]int{0, 4},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			" \U0001f44d",
 | 
			
		||||
			[]int{1, 1 + len("\U0001f44d")},
 | 
			
		||||
		},
 | 
			
		||||
		{
 | 
			
		||||
			string([]byte{'\u0001'}) + "\U0001f44d",
 | 
			
		||||
			[]int{1, 1 + len("\U0001f44d")},
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, kase := range testcases {
 | 
			
		||||
		actual := FindEmojiSubmatchIndex(kase.teststring)
 | 
			
		||||
		assert.Equal(t, kase.expected, actual)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -313,41 +313,27 @@ func RenderEmoji(
 | 
			
		||||
	return ctx.postProcess(rawHTML)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`)
 | 
			
		||||
var nulCleaner = strings.NewReplacer("\000", "")
 | 
			
		||||
 | 
			
		||||
func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
 | 
			
		||||
	if ctx.procs == nil {
 | 
			
		||||
		ctx.procs = defaultProcessors
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// give a generous extra 50 bytes
 | 
			
		||||
	res := make([]byte, 0, len(rawHTML)+50)
 | 
			
		||||
 | 
			
		||||
	res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
 | 
			
		||||
	// prepend "<html><body>"
 | 
			
		||||
	res = append(res, "<html><body>"...)
 | 
			
		||||
	_, _ = res.WriteString("<html><body>")
 | 
			
		||||
 | 
			
		||||
	// Strip out nuls - they're always invalid
 | 
			
		||||
	start := bytes.IndexByte(rawHTML, '\000')
 | 
			
		||||
	if start >= 0 {
 | 
			
		||||
		res = append(res, rawHTML[:start]...)
 | 
			
		||||
		start++
 | 
			
		||||
		for start < len(rawHTML) {
 | 
			
		||||
			end := bytes.IndexByte(rawHTML[start:], '\000')
 | 
			
		||||
			if end < 0 {
 | 
			
		||||
				res = append(res, rawHTML[start:]...)
 | 
			
		||||
				break
 | 
			
		||||
			} else if end > 0 {
 | 
			
		||||
				res = append(res, rawHTML[start:start+end]...)
 | 
			
		||||
			}
 | 
			
		||||
			start += end + 1
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		res = append(res, rawHTML...)
 | 
			
		||||
	}
 | 
			
		||||
	_, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("<$1"))))
 | 
			
		||||
 | 
			
		||||
	// close the tags
 | 
			
		||||
	res = append(res, "</body></html>"...)
 | 
			
		||||
	_, _ = res.WriteString("</body></html>")
 | 
			
		||||
 | 
			
		||||
	// parse the HTML
 | 
			
		||||
	nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
 | 
			
		||||
	nodes, err := html.ParseFragment(res, nil)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, &postProcessError{"invalid HTML", err}
 | 
			
		||||
	}
 | 
			
		||||
@@ -384,17 +370,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
 | 
			
		||||
	// Create buffer in which the data will be placed again. We know that the
 | 
			
		||||
	// length will be at least that of res; to spare a few alloc+copy, we
 | 
			
		||||
	// reuse res, resetting its length to 0.
 | 
			
		||||
	buf := bytes.NewBuffer(res[:0])
 | 
			
		||||
	res.Reset()
 | 
			
		||||
	// Render everything to buf.
 | 
			
		||||
	for _, node := range nodes {
 | 
			
		||||
		err = html.Render(buf, node)
 | 
			
		||||
		err = html.Render(res, node)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, &postProcessError{"error rendering processed HTML", err}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Everything done successfully, return parsed data.
 | 
			
		||||
	return buf.Bytes(), nil
 | 
			
		||||
	return res.Bytes(), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
 | 
			
		||||
 
 | 
			
		||||
@@ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
 | 
			
		||||
					header.ID = util.BytesToReadOnlyString(id.([]byte))
 | 
			
		||||
				}
 | 
			
		||||
				toc = append(toc, header)
 | 
			
		||||
			} else {
 | 
			
		||||
				for _, attr := range v.Attributes() {
 | 
			
		||||
					if _, ok := attr.Value.([]byte); !ok {
 | 
			
		||||
						v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		case *ast.Image:
 | 
			
		||||
			// Images need two things:
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,8 @@
 | 
			
		||||
package markdown
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"sync"
 | 
			
		||||
 | 
			
		||||
@@ -18,7 +19,7 @@ import (
 | 
			
		||||
 | 
			
		||||
	chromahtml "github.com/alecthomas/chroma/formatters/html"
 | 
			
		||||
	"github.com/yuin/goldmark"
 | 
			
		||||
	"github.com/yuin/goldmark-highlighting"
 | 
			
		||||
	highlighting "github.com/yuin/goldmark-highlighting"
 | 
			
		||||
	meta "github.com/yuin/goldmark-meta"
 | 
			
		||||
	"github.com/yuin/goldmark/extension"
 | 
			
		||||
	"github.com/yuin/goldmark/parser"
 | 
			
		||||
@@ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey()
 | 
			
		||||
var isWikiKey = parser.NewContextKey()
 | 
			
		||||
var renderMetasKey = parser.NewContextKey()
 | 
			
		||||
 | 
			
		||||
type closesWithError interface {
 | 
			
		||||
	io.WriteCloser
 | 
			
		||||
	CloseWithError(err error) error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type limitWriter struct {
 | 
			
		||||
	w     closesWithError
 | 
			
		||||
	sum   int64
 | 
			
		||||
	limit int64
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Write implements the standard Write interface:
 | 
			
		||||
func (l *limitWriter) Write(data []byte) (int, error) {
 | 
			
		||||
	leftToWrite := l.limit - l.sum
 | 
			
		||||
	if leftToWrite < int64(len(data)) {
 | 
			
		||||
		n, err := l.w.Write(data[:leftToWrite])
 | 
			
		||||
		l.sum += int64(n)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return n, err
 | 
			
		||||
		}
 | 
			
		||||
		_ = l.w.Close()
 | 
			
		||||
		return n, fmt.Errorf("Rendered content too large - truncating render")
 | 
			
		||||
	}
 | 
			
		||||
	n, err := l.w.Write(data)
 | 
			
		||||
	l.sum += int64(n)
 | 
			
		||||
	return n, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Close closes the writer
 | 
			
		||||
func (l *limitWriter) Close() error {
 | 
			
		||||
	return l.w.Close()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CloseWithError closes the writer
 | 
			
		||||
func (l *limitWriter) CloseWithError(err error) error {
 | 
			
		||||
	return l.w.CloseWithError(err)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewGiteaParseContext creates a parser.Context with the gitea context set
 | 
			
		||||
func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
 | 
			
		||||
	pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
 | 
			
		||||
@@ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
 | 
			
		||||
	return pc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// render renders Markdown to HTML without handling special links.
 | 
			
		||||
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
 | 
			
		||||
// actualRender renders Markdown to HTML without handling special links.
 | 
			
		||||
func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
 | 
			
		||||
	once.Do(func() {
 | 
			
		||||
		converter = goldmark.New(
 | 
			
		||||
			goldmark.WithExtensions(extension.Table,
 | 
			
		||||
@@ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
 | 
			
		||||
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
 | 
			
		||||
	var buf bytes.Buffer
 | 
			
		||||
	if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil {
 | 
			
		||||
		log.Error("Unable to render: %v", err)
 | 
			
		||||
	rd, wr := io.Pipe()
 | 
			
		||||
	defer func() {
 | 
			
		||||
		_ = rd.Close()
 | 
			
		||||
		_ = wr.Close()
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
	lw := &limitWriter{
 | 
			
		||||
		w:     wr,
 | 
			
		||||
		limit: setting.UI.MaxDisplayFileSize * 3,
 | 
			
		||||
	}
 | 
			
		||||
	return markup.SanitizeReader(&buf).Bytes()
 | 
			
		||||
 | 
			
		||||
	// FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
 | 
			
		||||
	go func() {
 | 
			
		||||
		defer func() {
 | 
			
		||||
			err := recover()
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
 | 
			
		||||
			if log.IsDebug() {
 | 
			
		||||
				log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
 | 
			
		||||
			}
 | 
			
		||||
			_ = lw.CloseWithError(fmt.Errorf("%v", err))
 | 
			
		||||
		}()
 | 
			
		||||
 | 
			
		||||
		pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
 | 
			
		||||
		if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil {
 | 
			
		||||
			log.Error("Unable to render: %v", err)
 | 
			
		||||
			_ = lw.CloseWithError(err)
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		_ = lw.Close()
 | 
			
		||||
	}()
 | 
			
		||||
	return markup.SanitizeReader(rd).Bytes()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
 | 
			
		||||
	defer func() {
 | 
			
		||||
		err := recover()
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
 | 
			
		||||
		if log.IsDebug() {
 | 
			
		||||
			log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
 | 
			
		||||
		}
 | 
			
		||||
		ret = markup.SanitizeBytes(body)
 | 
			
		||||
	}()
 | 
			
		||||
	return actualRender(body, urlPrefix, metas, wikiMarkdown)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
 
 | 
			
		||||
@@ -309,6 +309,25 @@ func TestRender_RenderParagraphs(t *testing.T) {
 | 
			
		||||
	test(t, "A\n\n\nB\nC\n", 2)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestMarkdownRenderRaw(t *testing.T) {
 | 
			
		||||
	testcases := [][]byte{
 | 
			
		||||
		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
 | 
			
		||||
			0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60,
 | 
			
		||||
			0x5b,
 | 
			
		||||
		},
 | 
			
		||||
		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
 | 
			
		||||
			0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60,
 | 
			
		||||
		},
 | 
			
		||||
		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
 | 
			
		||||
			0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d,
 | 
			
		||||
		},
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, testcase := range testcases {
 | 
			
		||||
		_ = RenderRaw(testcase, "", false)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func TestRenderSiblingImages_Issue12925(t *testing.T) {
 | 
			
		||||
	testcase := `
 | 
			
		||||

 | 
			
		||||
@@ -318,4 +337,5 @@ func TestRenderSiblingImages_Issue12925(t *testing.T) {
 | 
			
		||||
`
 | 
			
		||||
	res := string(RenderRaw([]byte(testcase), "", false))
 | 
			
		||||
	assert.Equal(t, expected, res)
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user