mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Add sanitizer rules per renderer (#16110)
* Added sanitizer rules per renderer. * Updated documentation. Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
		@@ -907,13 +907,17 @@ Gitea supports customizing the sanitization policy for rendered HTML. The exampl
 | 
			
		||||
ELEMENT = span
 | 
			
		||||
ALLOW_ATTR = class
 | 
			
		||||
REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
 | 
			
		||||
ALLOW_DATA_URI_IMAGES = true
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
 - `ELEMENT`: The element this policy applies to. Must be non-empty.
 | 
			
		||||
 - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty.
 | 
			
		||||
 - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute.
 | 
			
		||||
 - `ALLOW_DATA_URI_IMAGES`: **false** Allow data uri images (`<img src="data:image/png;base64,..."/>`).
 | 
			
		||||
 | 
			
		||||
Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[markup.sanitizer.TeX-2]`.
 | 
			
		||||
To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`.
 | 
			
		||||
If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer.
 | 
			
		||||
 | 
			
		||||
## Time (`time`)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -64,8 +64,8 @@ IS_INPUT_FILE = false
 | 
			
		||||
[markup.jupyter]
 | 
			
		||||
ENABLED = true
 | 
			
		||||
FILE_EXTENSIONS = .ipynb
 | 
			
		||||
RENDER_COMMAND = "jupyter nbconvert --stdout --to html --template basic "
 | 
			
		||||
IS_INPUT_FILE = true
 | 
			
		||||
RENDER_COMMAND = "jupyter nbconvert --stdin --stdout --to html --template basic"
 | 
			
		||||
IS_INPUT_FILE = false
 | 
			
		||||
 | 
			
		||||
[markup.restructuredtext]
 | 
			
		||||
ENABLED = true
 | 
			
		||||
@@ -90,15 +90,50 @@ FILE_EXTENSIONS = .md,.markdown
 | 
			
		||||
RENDER_COMMAND  = pandoc -f markdown -t html --katex
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each section.
 | 
			
		||||
You must define `ELEMENT` and `ALLOW_ATTR` in each section.
 | 
			
		||||
 | 
			
		||||
To define multiple entries, add a unique alphanumeric suffix (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.something]`).
 | 
			
		||||
 | 
			
		||||
To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`, `[markup.sanitizer.<renderer>.rule-1]`.
 | 
			
		||||
 | 
			
		||||
**Note**: If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer.
 | 
			
		||||
 | 
			
		||||
Once your configuration changes have been made, restart Gitea to have changes take effect.
 | 
			
		||||
 | 
			
		||||
**Note**: Prior to Gitea 1.12 there was a single `markup.sanitiser` section with keys that were redefined for multiple rules, however,
 | 
			
		||||
there were significant problems with this method of configuration necessitating configuration through multiple sections.
 | 
			
		||||
 | 
			
		||||
### Example: Office DOCX
 | 
			
		||||
 | 
			
		||||
Display Office DOCX files with [`pandoc`](https://pandoc.org/):
 | 
			
		||||
```ini
 | 
			
		||||
[markup.docx]
 | 
			
		||||
ENABLED = true
 | 
			
		||||
FILE_EXTENSIONS = .docx
 | 
			
		||||
RENDER_COMMAND = "pandoc --from docx --to html --self-contained --template /path/to/basic.html"
 | 
			
		||||
 | 
			
		||||
[markup.sanitizer.docx.img]
 | 
			
		||||
ALLOW_DATA_URI_IMAGES = true
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The template file has the following content:
 | 
			
		||||
```
 | 
			
		||||
$body$
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Example: Jupyter Notebook
 | 
			
		||||
 | 
			
		||||
Display Jupyter Notebook files with [`nbconvert`](https://github.com/jupyter/nbconvert):
 | 
			
		||||
```ini
 | 
			
		||||
[markup.jupyter]
 | 
			
		||||
ENABLED = true
 | 
			
		||||
FILE_EXTENSIONS = .ipynb
 | 
			
		||||
RENDER_COMMAND = "jupyter-nbconvert --stdin --stdout --to html --template basic"
 | 
			
		||||
 | 
			
		||||
[markup.sanitizer.jupyter.img]
 | 
			
		||||
ALLOW_DATA_URI_IMAGES = true
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Customizing CSS
 | 
			
		||||
The external renderer is specified in the .ini in the format `[markup.XXXXX]` and the HTML supplied by your external renderer will be wrapped in a `<div>` with classes `markup` and `XXXXX`. The `markup` class provides out of the box styling (as does `markdown` if `XXXXX` is `markdown`). Otherwise you can use these classes to specifically target the contents of your rendered HTML. 
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,7 @@ import (
 | 
			
		||||
	"html"
 | 
			
		||||
	"io"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"regexp"
 | 
			
		||||
	"strconv"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/csv"
 | 
			
		||||
@@ -38,6 +39,15 @@ func (Renderer) Extensions() []string {
 | 
			
		||||
	return []string{".csv", ".tsv"}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SanitizerRules implements markup.Renderer
 | 
			
		||||
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 | 
			
		||||
	return []setting.MarkupSanitizerRule{
 | 
			
		||||
		{Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)},
 | 
			
		||||
		{Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
 | 
			
		||||
		{Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func writeField(w io.Writer, element, class, field string) error {
 | 
			
		||||
	if _, err := io.WriteString(w, "<"); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										7
									
								
								modules/markup/external/external.go
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										7
									
								
								modules/markup/external/external.go
									
									
									
									
										vendored
									
									
								
							@@ -30,7 +30,7 @@ func RegisterRenderers() {
 | 
			
		||||
 | 
			
		||||
// Renderer implements markup.Renderer for external tools
 | 
			
		||||
type Renderer struct {
 | 
			
		||||
	setting.MarkupRenderer
 | 
			
		||||
	*setting.MarkupRenderer
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Name returns the external tool name
 | 
			
		||||
@@ -48,6 +48,11 @@ func (p *Renderer) Extensions() []string {
 | 
			
		||||
	return p.FileExtensions
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SanitizerRules implements markup.Renderer
 | 
			
		||||
func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 | 
			
		||||
	return p.MarkupSanitizerRules
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func envMark(envName string) string {
 | 
			
		||||
	if runtime.GOOS == "windows" {
 | 
			
		||||
		return "%" + envName + "%"
 | 
			
		||||
 
 | 
			
		||||
@@ -112,7 +112,7 @@ func TestRender_links(t *testing.T) {
 | 
			
		||||
 | 
			
		||||
	defaultCustom := setting.Markdown.CustomURLSchemes
 | 
			
		||||
	setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"}
 | 
			
		||||
	ReplaceSanitizer()
 | 
			
		||||
	InitializeSanitizer()
 | 
			
		||||
	CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
 | 
			
		||||
 | 
			
		||||
	test(
 | 
			
		||||
@@ -192,7 +192,7 @@ func TestRender_links(t *testing.T) {
 | 
			
		||||
 | 
			
		||||
	// Restore previous settings
 | 
			
		||||
	setting.Markdown.CustomURLSchemes = defaultCustom
 | 
			
		||||
	ReplaceSanitizer()
 | 
			
		||||
	InitializeSanitizer()
 | 
			
		||||
	CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -199,7 +199,7 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
 | 
			
		||||
		}
 | 
			
		||||
		_ = lw.Close()
 | 
			
		||||
	}()
 | 
			
		||||
	buf := markup.SanitizeReader(rd)
 | 
			
		||||
	buf := markup.SanitizeReader(rd, "")
 | 
			
		||||
	_, err := io.Copy(output, buf)
 | 
			
		||||
	return err
 | 
			
		||||
}
 | 
			
		||||
@@ -215,7 +215,7 @@ func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error
 | 
			
		||||
		if log.IsDebug() {
 | 
			
		||||
			log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
 | 
			
		||||
		}
 | 
			
		||||
		ret := markup.SanitizeReader(input)
 | 
			
		||||
		ret := markup.SanitizeReader(input, "")
 | 
			
		||||
		_, err = io.Copy(output, ret)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Error("SanitizeReader failed: %v", err)
 | 
			
		||||
@@ -249,6 +249,11 @@ func (Renderer) Extensions() []string {
 | 
			
		||||
	return setting.Markdown.FileExtensions
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SanitizerRules implements markup.Renderer
 | 
			
		||||
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 | 
			
		||||
	return []setting.MarkupSanitizerRule{}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Render implements markup.Renderer
 | 
			
		||||
func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
 | 
			
		||||
	return render(ctx, input, output)
 | 
			
		||||
 
 | 
			
		||||
@@ -13,6 +13,7 @@ import (
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/highlight"
 | 
			
		||||
	"code.gitea.io/gitea/modules/markup"
 | 
			
		||||
	"code.gitea.io/gitea/modules/setting"
 | 
			
		||||
	"code.gitea.io/gitea/modules/util"
 | 
			
		||||
 | 
			
		||||
	"github.com/alecthomas/chroma"
 | 
			
		||||
@@ -41,6 +42,11 @@ func (Renderer) Extensions() []string {
 | 
			
		||||
	return []string{".org"}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SanitizerRules implements markup.Renderer
 | 
			
		||||
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
 | 
			
		||||
	return []setting.MarkupSanitizerRule{}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Render renders orgmode rawbytes to HTML
 | 
			
		||||
func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
 | 
			
		||||
	htmlWriter := org.NewHTMLWriter()
 | 
			
		||||
 
 | 
			
		||||
@@ -81,6 +81,7 @@ type Renderer interface {
 | 
			
		||||
	Name() string // markup format name
 | 
			
		||||
	Extensions() []string
 | 
			
		||||
	NeedPostProcess() bool
 | 
			
		||||
	SanitizerRules() []setting.MarkupSanitizerRule
 | 
			
		||||
	Render(ctx *RenderContext, input io.Reader, output io.Writer) error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -136,37 +137,32 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr
 | 
			
		||||
		_ = pw.Close()
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
	if renderer.NeedPostProcess() {
 | 
			
		||||
		pr2, pw2 := io.Pipe()
 | 
			
		||||
		defer func() {
 | 
			
		||||
			_ = pr2.Close()
 | 
			
		||||
			_ = pw2.Close()
 | 
			
		||||
		}()
 | 
			
		||||
	pr2, pw2 := io.Pipe()
 | 
			
		||||
	defer func() {
 | 
			
		||||
		_ = pr2.Close()
 | 
			
		||||
		_ = pw2.Close()
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
		wg.Add(1)
 | 
			
		||||
		go func() {
 | 
			
		||||
			buf := SanitizeReader(pr2)
 | 
			
		||||
			_, err = io.Copy(output, buf)
 | 
			
		||||
			_ = pr2.Close()
 | 
			
		||||
			wg.Done()
 | 
			
		||||
		}()
 | 
			
		||||
	wg.Add(1)
 | 
			
		||||
	go func() {
 | 
			
		||||
		buf := SanitizeReader(pr2, renderer.Name())
 | 
			
		||||
		_, err = io.Copy(output, buf)
 | 
			
		||||
		_ = pr2.Close()
 | 
			
		||||
		wg.Done()
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
		wg.Add(1)
 | 
			
		||||
		go func() {
 | 
			
		||||
	wg.Add(1)
 | 
			
		||||
	go func() {
 | 
			
		||||
		if renderer.NeedPostProcess() {
 | 
			
		||||
			err = PostProcess(ctx, pr, pw2)
 | 
			
		||||
			_ = pr.Close()
 | 
			
		||||
			_ = pw2.Close()
 | 
			
		||||
			wg.Done()
 | 
			
		||||
		}()
 | 
			
		||||
	} else {
 | 
			
		||||
		wg.Add(1)
 | 
			
		||||
		go func() {
 | 
			
		||||
			buf := SanitizeReader(pr)
 | 
			
		||||
			_, err = io.Copy(output, buf)
 | 
			
		||||
			_ = pr.Close()
 | 
			
		||||
			wg.Done()
 | 
			
		||||
		}()
 | 
			
		||||
	}
 | 
			
		||||
		} else {
 | 
			
		||||
			_, err = io.Copy(pw2, pr)
 | 
			
		||||
		}
 | 
			
		||||
		_ = pr.Close()
 | 
			
		||||
		_ = pw2.Close()
 | 
			
		||||
		wg.Done()
 | 
			
		||||
	}()
 | 
			
		||||
 | 
			
		||||
	if err1 := renderer.Render(ctx, input, pw); err1 != nil {
 | 
			
		||||
		return err1
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
@@ -19,8 +19,9 @@ import (
 | 
			
		||||
// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
 | 
			
		||||
// any modification to the underlying policies once it's been created.
 | 
			
		||||
type Sanitizer struct {
 | 
			
		||||
	policy *bluemonday.Policy
 | 
			
		||||
	init   sync.Once
 | 
			
		||||
	defaultPolicy    *bluemonday.Policy
 | 
			
		||||
	rendererPolicies map[string]*bluemonday.Policy
 | 
			
		||||
	init             sync.Once
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var sanitizer = &Sanitizer{}
 | 
			
		||||
@@ -30,47 +31,57 @@ var sanitizer = &Sanitizer{}
 | 
			
		||||
// entire application lifecycle.
 | 
			
		||||
func NewSanitizer() {
 | 
			
		||||
	sanitizer.init.Do(func() {
 | 
			
		||||
		ReplaceSanitizer()
 | 
			
		||||
		InitializeSanitizer()
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ReplaceSanitizer replaces the current sanitizer to account for changes in settings
 | 
			
		||||
func ReplaceSanitizer() {
 | 
			
		||||
	sanitizer.policy = bluemonday.UGCPolicy()
 | 
			
		||||
// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings
 | 
			
		||||
func InitializeSanitizer() {
 | 
			
		||||
	sanitizer.rendererPolicies = map[string]*bluemonday.Policy{}
 | 
			
		||||
	sanitizer.defaultPolicy = createDefaultPolicy()
 | 
			
		||||
 | 
			
		||||
	for name, renderer := range renderers {
 | 
			
		||||
		sanitizerRules := renderer.SanitizerRules()
 | 
			
		||||
		if len(sanitizerRules) > 0 {
 | 
			
		||||
			policy := createDefaultPolicy()
 | 
			
		||||
			addSanitizerRules(policy, sanitizerRules)
 | 
			
		||||
			sanitizer.rendererPolicies[name] = policy
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func createDefaultPolicy() *bluemonday.Policy {
 | 
			
		||||
	policy := bluemonday.UGCPolicy()
 | 
			
		||||
	// For Chroma markdown plugin
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre")
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
 | 
			
		||||
	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre")
 | 
			
		||||
	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
 | 
			
		||||
 | 
			
		||||
	// Checkboxes
 | 
			
		||||
	sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
 | 
			
		||||
	sanitizer.policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
 | 
			
		||||
	policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
 | 
			
		||||
	policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
 | 
			
		||||
 | 
			
		||||
	// Custom URL-Schemes
 | 
			
		||||
	if len(setting.Markdown.CustomURLSchemes) > 0 {
 | 
			
		||||
		sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
 | 
			
		||||
		policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Allow classes for anchors
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a")
 | 
			
		||||
	policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a")
 | 
			
		||||
 | 
			
		||||
	// Allow classes for task lists
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
 | 
			
		||||
	policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
 | 
			
		||||
 | 
			
		||||
	// Allow icons
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
 | 
			
		||||
	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
 | 
			
		||||
 | 
			
		||||
	// Allow unlabelled labels
 | 
			
		||||
	sanitizer.policy.AllowNoAttrs().OnElements("label")
 | 
			
		||||
	policy.AllowNoAttrs().OnElements("label")
 | 
			
		||||
 | 
			
		||||
	// Allow classes for emojis
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
 | 
			
		||||
	policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
 | 
			
		||||
 | 
			
		||||
	// Allow icons, emojis, chroma syntax and keyword markup on span
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
 | 
			
		||||
 | 
			
		||||
	// Allow data tables
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table")
 | 
			
		||||
	sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td")
 | 
			
		||||
	policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
 | 
			
		||||
 | 
			
		||||
	// Allow generally safe attributes
 | 
			
		||||
	generalSafeAttrs := []string{"abbr", "accept", "accept-charset",
 | 
			
		||||
@@ -101,18 +112,29 @@ func ReplaceSanitizer() {
 | 
			
		||||
		"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr",
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	sanitizer.policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
 | 
			
		||||
	policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
 | 
			
		||||
 | 
			
		||||
	sanitizer.policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
 | 
			
		||||
	policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
 | 
			
		||||
 | 
			
		||||
	// FIXME: Need to handle longdesc in img but there is no easy way to do it
 | 
			
		||||
 | 
			
		||||
	// Custom keyword markup
 | 
			
		||||
	for _, rule := range setting.ExternalSanitizerRules {
 | 
			
		||||
		if rule.Regexp != nil {
 | 
			
		||||
			sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
 | 
			
		||||
		} else {
 | 
			
		||||
			sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
 | 
			
		||||
	addSanitizerRules(policy, setting.ExternalSanitizerRules)
 | 
			
		||||
 | 
			
		||||
	return policy
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) {
 | 
			
		||||
	for _, rule := range rules {
 | 
			
		||||
		if rule.AllowDataURIImages {
 | 
			
		||||
			policy.AllowDataURIImages()
 | 
			
		||||
		}
 | 
			
		||||
		if rule.Element != "" {
 | 
			
		||||
			if rule.Regexp != nil {
 | 
			
		||||
				policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
 | 
			
		||||
			} else {
 | 
			
		||||
				policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@@ -120,11 +142,15 @@ func ReplaceSanitizer() {
 | 
			
		||||
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
 | 
			
		||||
func Sanitize(s string) string {
 | 
			
		||||
	NewSanitizer()
 | 
			
		||||
	return sanitizer.policy.Sanitize(s)
 | 
			
		||||
	return sanitizer.defaultPolicy.Sanitize(s)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SanitizeReader sanitizes a Reader
 | 
			
		||||
func SanitizeReader(r io.Reader) *bytes.Buffer {
 | 
			
		||||
func SanitizeReader(r io.Reader, renderer string) *bytes.Buffer {
 | 
			
		||||
	NewSanitizer()
 | 
			
		||||
	return sanitizer.policy.SanitizeReader(r)
 | 
			
		||||
	policy, exist := sanitizer.rendererPolicies[renderer]
 | 
			
		||||
	if !exist {
 | 
			
		||||
		policy = sanitizer.defaultPolicy
 | 
			
		||||
	}
 | 
			
		||||
	return policy.SanitizeReader(r)
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -15,31 +15,34 @@ import (
 | 
			
		||||
 | 
			
		||||
// ExternalMarkupRenderers represents the external markup renderers
 | 
			
		||||
var (
 | 
			
		||||
	ExternalMarkupRenderers []MarkupRenderer
 | 
			
		||||
	ExternalMarkupRenderers []*MarkupRenderer
 | 
			
		||||
	ExternalSanitizerRules  []MarkupSanitizerRule
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// MarkupRenderer defines the external parser configured in ini
 | 
			
		||||
type MarkupRenderer struct {
 | 
			
		||||
	Enabled         bool
 | 
			
		||||
	MarkupName      string
 | 
			
		||||
	Command         string
 | 
			
		||||
	FileExtensions  []string
 | 
			
		||||
	IsInputFile     bool
 | 
			
		||||
	NeedPostProcess bool
 | 
			
		||||
	Enabled              bool
 | 
			
		||||
	MarkupName           string
 | 
			
		||||
	Command              string
 | 
			
		||||
	FileExtensions       []string
 | 
			
		||||
	IsInputFile          bool
 | 
			
		||||
	NeedPostProcess      bool
 | 
			
		||||
	MarkupSanitizerRules []MarkupSanitizerRule
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// MarkupSanitizerRule defines the policy for whitelisting attributes on
 | 
			
		||||
// certain elements.
 | 
			
		||||
type MarkupSanitizerRule struct {
 | 
			
		||||
	Element   string
 | 
			
		||||
	AllowAttr string
 | 
			
		||||
	Regexp    *regexp.Regexp
 | 
			
		||||
	Element            string
 | 
			
		||||
	AllowAttr          string
 | 
			
		||||
	Regexp             *regexp.Regexp
 | 
			
		||||
	AllowDataURIImages bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newMarkup() {
 | 
			
		||||
	ExternalMarkupRenderers = make([]MarkupRenderer, 0, 10)
 | 
			
		||||
	ExternalMarkupRenderers = make([]*MarkupRenderer, 0, 10)
 | 
			
		||||
	ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, 10)
 | 
			
		||||
 | 
			
		||||
	for _, sec := range Cfg.Section("markup").ChildSections() {
 | 
			
		||||
		name := strings.TrimPrefix(sec.Name(), "markup.")
 | 
			
		||||
		if name == "" {
 | 
			
		||||
@@ -56,50 +59,62 @@ func newMarkup() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newMarkupSanitizer(name string, sec *ini.Section) {
 | 
			
		||||
	haveElement := sec.HasKey("ELEMENT")
 | 
			
		||||
	haveAttr := sec.HasKey("ALLOW_ATTR")
 | 
			
		||||
	haveRegexp := sec.HasKey("REGEXP")
 | 
			
		||||
	rule, ok := createMarkupSanitizerRule(name, sec)
 | 
			
		||||
	if ok {
 | 
			
		||||
		if strings.HasPrefix(name, "sanitizer.") {
 | 
			
		||||
			names := strings.SplitN(strings.TrimPrefix(name, "sanitizer."), ".", 2)
 | 
			
		||||
			name = names[0]
 | 
			
		||||
		}
 | 
			
		||||
		for _, renderer := range ExternalMarkupRenderers {
 | 
			
		||||
			if name == renderer.MarkupName {
 | 
			
		||||
				renderer.MarkupSanitizerRules = append(renderer.MarkupSanitizerRules, rule)
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
	if !haveElement && !haveAttr && !haveRegexp {
 | 
			
		||||
		log.Warn("Skipping empty section: markup.%s.", name)
 | 
			
		||||
		return
 | 
			
		||||
func createMarkupSanitizerRule(name string, sec *ini.Section) (MarkupSanitizerRule, bool) {
 | 
			
		||||
	var rule MarkupSanitizerRule
 | 
			
		||||
 | 
			
		||||
	ok := false
 | 
			
		||||
	if sec.HasKey("ALLOW_DATA_URI_IMAGES") {
 | 
			
		||||
		rule.AllowDataURIImages = sec.Key("ALLOW_DATA_URI_IMAGES").MustBool(false)
 | 
			
		||||
		ok = true
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !haveElement || !haveAttr || !haveRegexp {
 | 
			
		||||
		log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	if sec.HasKey("ELEMENT") || sec.HasKey("ALLOW_ATTR") {
 | 
			
		||||
		rule.Element = sec.Key("ELEMENT").Value()
 | 
			
		||||
		rule.AllowAttr = sec.Key("ALLOW_ATTR").Value()
 | 
			
		||||
 | 
			
		||||
	elements := sec.Key("ELEMENT").Value()
 | 
			
		||||
	allowAttrs := sec.Key("ALLOW_ATTR").Value()
 | 
			
		||||
	regexpStr := sec.Key("REGEXP").Value()
 | 
			
		||||
 | 
			
		||||
	if regexpStr == "" {
 | 
			
		||||
		rule := MarkupSanitizerRule{
 | 
			
		||||
			Element:   elements,
 | 
			
		||||
			AllowAttr: allowAttrs,
 | 
			
		||||
			Regexp:    nil,
 | 
			
		||||
		if rule.Element == "" || rule.AllowAttr == "" {
 | 
			
		||||
			log.Error("Missing required values from markup.%s. Must have ELEMENT and ALLOW_ATTR defined!", name)
 | 
			
		||||
			return rule, false
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
 | 
			
		||||
		return
 | 
			
		||||
		regexpStr := sec.Key("REGEXP").Value()
 | 
			
		||||
		if regexpStr != "" {
 | 
			
		||||
			// Validate when parsing the config that this is a valid regular
 | 
			
		||||
			// expression. Then we can use regexp.MustCompile(...) later.
 | 
			
		||||
			compiled, err := regexp.Compile(regexpStr)
 | 
			
		||||
			if err != nil {
 | 
			
		||||
				log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err)
 | 
			
		||||
				return rule, false
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			rule.Regexp = compiled
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		ok = true
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Validate when parsing the config that this is a valid regular
 | 
			
		||||
	// expression. Then we can use regexp.MustCompile(...) later.
 | 
			
		||||
	compiled, err := regexp.Compile(regexpStr)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		log.Error("In module.%s: REGEXP (%s) at definition %d failed to compile: %v", regexpStr, name, err)
 | 
			
		||||
		return
 | 
			
		||||
	if !ok {
 | 
			
		||||
		log.Error("Missing required keys from markup.%s. Must have ELEMENT and ALLOW_ATTR or ALLOW_DATA_URI_IMAGES defined!", name)
 | 
			
		||||
		return rule, false
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	rule := MarkupSanitizerRule{
 | 
			
		||||
		Element:   elements,
 | 
			
		||||
		AllowAttr: allowAttrs,
 | 
			
		||||
		Regexp:    compiled,
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
 | 
			
		||||
	return rule, true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newMarkupRenderer(name string, sec *ini.Section) {
 | 
			
		||||
@@ -126,7 +141,7 @@ func newMarkupRenderer(name string, sec *ini.Section) {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ExternalMarkupRenderers = append(ExternalMarkupRenderers, MarkupRenderer{
 | 
			
		||||
	ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{
 | 
			
		||||
		Enabled:         sec.Key("ENABLED").MustBool(false),
 | 
			
		||||
		MarkupName:      name,
 | 
			
		||||
		FileExtensions:  exts,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user