mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Run processors on whole of text (#16155)
There is an inefficiency in the design of our processors which means that Emoji and other processors run in order n^2 time. This PR forces the processors to process the entirety of text node before passing back up. The fundamental inefficiency remains but it should be significantly ameliorated. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		@@ -6,6 +6,7 @@
 | 
			
		||||
package emoji
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"io"
 | 
			
		||||
	"sort"
 | 
			
		||||
	"strings"
 | 
			
		||||
	"sync"
 | 
			
		||||
@@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
 | 
			
		||||
	if n.writecount == 2 {
 | 
			
		||||
		n.idx = n.pos
 | 
			
		||||
		n.end = n.pos + len(p)
 | 
			
		||||
		n.pos += len(p)
 | 
			
		||||
		return len(p), io.EOF
 | 
			
		||||
	}
 | 
			
		||||
	n.pos += len(p)
 | 
			
		||||
	return len(p), nil
 | 
			
		||||
@@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
 | 
			
		||||
	if n.writecount == 2 {
 | 
			
		||||
		n.idx = n.pos
 | 
			
		||||
		n.end = n.pos + len(s)
 | 
			
		||||
		n.pos += len(s)
 | 
			
		||||
		return len(s), io.EOF
 | 
			
		||||
	}
 | 
			
		||||
	n.pos += len(s)
 | 
			
		||||
	return len(s), nil
 | 
			
		||||
 
 | 
			
		||||
@@ -89,6 +89,7 @@ func isLinkStr(link string) bool {
 | 
			
		||||
	return validLinksPattern.MatchString(link)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// FIXME: This function is not concurrent safe
 | 
			
		||||
func getIssueFullPattern() *regexp.Regexp {
 | 
			
		||||
	if issueFullPattern == nil {
 | 
			
		||||
		issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
 | 
			
		||||
@@ -566,11 +567,16 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func mentionProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	start := 0
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next && start < len(node.Data) {
 | 
			
		||||
		// We replace only the first mention; other mentions will be addressed later
 | 
			
		||||
	found, loc := references.FindFirstMentionBytes([]byte(node.Data))
 | 
			
		||||
		found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
 | 
			
		||||
		if !found {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		loc.Start += start
 | 
			
		||||
		loc.End += start
 | 
			
		||||
		mention := node.Data[loc.Start:loc.End]
 | 
			
		||||
		var teams string
 | 
			
		||||
		teams, ok := ctx.Metas["teams"]
 | 
			
		||||
@@ -582,10 +588,17 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
			mentionOrgAndTeam := strings.Split(mention, "/")
 | 
			
		||||
			if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
 | 
			
		||||
				replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
 | 
			
		||||
				node = node.NextSibling.NextSibling
 | 
			
		||||
				start = 0
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
		return
 | 
			
		||||
			start = loc.End
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
		start = 0
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
@@ -593,6 +606,8 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next {
 | 
			
		||||
		m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
@@ -672,7 +687,7 @@ func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
 | 
			
		||||
		switch ext := filepath.Ext(link); ext {
 | 
			
		||||
		// fast path: empty string, ignore
 | 
			
		||||
		case "":
 | 
			
		||||
		break
 | 
			
		||||
			// leave image as false
 | 
			
		||||
		case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
 | 
			
		||||
			image = true
 | 
			
		||||
		}
 | 
			
		||||
@@ -748,12 +763,17 @@ func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
 | 
			
		||||
			linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
 | 
			
		||||
		}
 | 
			
		||||
		replaceContent(node, m[0], m[1], linkNode)
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	if ctx.Metas == nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next {
 | 
			
		||||
		m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
@@ -771,23 +791,25 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
			// TODO if m[4]:m[5] is not nil, then link is to a comment,
 | 
			
		||||
			// and we should indicate that in the text somehow
 | 
			
		||||
			replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
 | 
			
		||||
 | 
			
		||||
		} else {
 | 
			
		||||
			orgRepoID := matchOrg + "/" + matchRepo + id
 | 
			
		||||
			replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
 | 
			
		||||
		}
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	if ctx.Metas == nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	var (
 | 
			
		||||
		found bool
 | 
			
		||||
		ref   *references.RenderizableReference
 | 
			
		||||
	)
 | 
			
		||||
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next {
 | 
			
		||||
		_, exttrack := ctx.Metas["format"]
 | 
			
		||||
		alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
 | 
			
		||||
 | 
			
		||||
@@ -828,7 +850,8 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
 | 
			
		||||
		if ref.Action == references.XRefActionNone {
 | 
			
		||||
			replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
 | 
			
		||||
		return
 | 
			
		||||
			node = node.NextSibling.NextSibling
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Decorate action keywords if actionable
 | 
			
		||||
@@ -846,6 +869,8 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
			Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
 | 
			
		||||
		}
 | 
			
		||||
		replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
 | 
			
		||||
		node = node.NextSibling.NextSibling.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// fullSha1PatternProcessor renders SHA containing URLs
 | 
			
		||||
@@ -853,6 +878,9 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	if ctx.Metas == nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next {
 | 
			
		||||
		m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
@@ -897,14 +925,23 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// emojiShortCodeProcessor for rendering text like :smile: into emoji
 | 
			
		||||
func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data)
 | 
			
		||||
	start := 0
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next && start < len(node.Data) {
 | 
			
		||||
		m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		m[0] += start
 | 
			
		||||
		m[1] += start
 | 
			
		||||
 | 
			
		||||
		start = m[1]
 | 
			
		||||
 | 
			
		||||
		alias := node.Data[m[0]:m[1]]
 | 
			
		||||
		alias = strings.ReplaceAll(alias, ":", "")
 | 
			
		||||
@@ -914,25 +951,39 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
			s := strings.Join(setting.UI.Reactions, " ") + "gitea"
 | 
			
		||||
			if strings.Contains(s, alias) {
 | 
			
		||||
				replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
 | 
			
		||||
			return
 | 
			
		||||
				node = node.NextSibling.NextSibling
 | 
			
		||||
				start = 0
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
		return
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
		start = 0
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// emoji processor to match emoji and add emoji class
 | 
			
		||||
func emojiProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	m := emoji.FindEmojiSubmatchIndex(node.Data)
 | 
			
		||||
	start := 0
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next && start < len(node.Data) {
 | 
			
		||||
		m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		m[0] += start
 | 
			
		||||
		m[1] += start
 | 
			
		||||
 | 
			
		||||
		codepoint := node.Data[m[0]:m[1]]
 | 
			
		||||
		start = m[1]
 | 
			
		||||
		val := emoji.FromCode(codepoint)
 | 
			
		||||
		if val != nil {
 | 
			
		||||
			replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
 | 
			
		||||
			node = node.NextSibling.NextSibling
 | 
			
		||||
			start = 0
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -942,10 +993,17 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data)
 | 
			
		||||
 | 
			
		||||
	start := 0
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next && start < len(node.Data) {
 | 
			
		||||
		m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		m[2] += start
 | 
			
		||||
		m[3] += start
 | 
			
		||||
 | 
			
		||||
		hash := node.Data[m[2]:m[3]]
 | 
			
		||||
		// The regex does not lie, it matches the hash pattern.
 | 
			
		||||
		// However, a regex cannot know if a hash actually exists or not.
 | 
			
		||||
@@ -959,32 +1017,46 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
			if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
 | 
			
		||||
				log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
 | 
			
		||||
			}
 | 
			
		||||
		return
 | 
			
		||||
			start = m[3]
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		replaceContent(node, m[2], m[3],
 | 
			
		||||
			createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
 | 
			
		||||
		start = 0
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// emailAddressProcessor replaces raw email addresses with a mailto: link.
 | 
			
		||||
func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next {
 | 
			
		||||
		m := emailRegex.FindStringSubmatchIndex(node.Data)
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		mail := node.Data[m[2]:m[3]]
 | 
			
		||||
		replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// linkProcessor creates links for any HTTP or HTTPS URL not captured by
 | 
			
		||||
// markdown.
 | 
			
		||||
func linkProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next {
 | 
			
		||||
		m := common.LinkRegex.FindStringIndex(node.Data)
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		uri := node.Data[m[0]:m[1]]
 | 
			
		||||
		replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func genDefaultLinkProcessor(defaultLink string) processor {
 | 
			
		||||
@@ -1008,12 +1080,17 @@ func genDefaultLinkProcessor(defaultLink string) processor {
 | 
			
		||||
 | 
			
		||||
// descriptionLinkProcessor creates links for DescriptionHTML
 | 
			
		||||
func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
 | 
			
		||||
	next := node.NextSibling
 | 
			
		||||
	for node != nil && node != next {
 | 
			
		||||
		m := common.LinkRegex.FindStringIndex(node.Data)
 | 
			
		||||
		if m == nil {
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		uri := node.Data[m[0]:m[1]]
 | 
			
		||||
		replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
 | 
			
		||||
		node = node.NextSibling.NextSibling
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func createDescriptionLink(href, content string) *html.Node {
 | 
			
		||||
 
 | 
			
		||||
@@ -464,3 +464,19 @@ func TestIssue16020(t *testing.T) {
 | 
			
		||||
	assert.NoError(t, err)
 | 
			
		||||
	assert.Equal(t, data, res.String())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func BenchmarkEmojiPostprocess(b *testing.B) {
 | 
			
		||||
	data := "🥰 "
 | 
			
		||||
	for len(data) < 1<<16 {
 | 
			
		||||
		data += data
 | 
			
		||||
	}
 | 
			
		||||
	b.ResetTimer()
 | 
			
		||||
	for i := 0; i < b.N; i++ {
 | 
			
		||||
		var res strings.Builder
 | 
			
		||||
		err := PostProcess(&RenderContext{
 | 
			
		||||
			URLPrefix: "https://example.com",
 | 
			
		||||
			Metas:     localMetas,
 | 
			
		||||
		}, strings.NewReader(data), &res)
 | 
			
		||||
		assert.NoError(b, err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user