mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	Markdown: Sanitizier Configuration (#9075)
* Support custom sanitization policy Allowing the gitea administrator to configure sanitization policy allows them to couple external renders and custom templates to support more markup. In particular, the `pandoc` renderer allows generating KaTeX annotations, wrapping them in `<span>` elements with class `math` and either `inline` or `display` (depending on whether or not inline or block mode was requested). This iteration gives the administrator whitelisting powers; carefully crafted regexes will thus let through only the desired attributes necessary to support their custom markup. Resolves: #9054 Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Document new sanitization configuration - Adds basic documentation to app.ini.sample, - Adds an example to the Configuration Cheat Sheet, and - Adds extended information to External Renderers section. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Drop extraneous length check in newMarkupSanitizer(...) Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Fix plural ELEMENT and ALLOW_ATTR in docs These were left over from their initial names. Make them singular to conform with the current expectations. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
This commit is contained in:
		
				
					committed by
					
						
						techknowlogick
					
				
			
			
				
	
			
			
			
						parent
						
							cecc31951c
						
					
				
				
					commit
					ee7df7ba8c
				
			@@ -877,6 +877,12 @@ SHOW_FOOTER_VERSION = true
 | 
			
		||||
; Show template execution time in the footer
 | 
			
		||||
SHOW_FOOTER_TEMPLATE_LOAD_TIME = true
 | 
			
		||||
 | 
			
		||||
[markup.sanitizer]
 | 
			
		||||
; The following keys can be used multiple times to define sanitation policy rules.
 | 
			
		||||
;ELEMENT = span
 | 
			
		||||
;ALLOW_ATTR = class
 | 
			
		||||
;REGEXP = ^(info|warning|error)$
 | 
			
		||||
 | 
			
		||||
[markup.asciidoc]
 | 
			
		||||
ENABLED = false
 | 
			
		||||
; List of file extensions that should be rendered by an external command
 | 
			
		||||
 
 | 
			
		||||
@@ -578,6 +578,24 @@ Two special environment variables are passed to the render command:
 | 
			
		||||
- `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links.
 | 
			
		||||
- `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc.
 | 
			
		||||
 | 
			
		||||
```ini
 | 
			
		||||
[markup.sanitizer]
 | 
			
		||||
; Pandoc renders TeX segments as <span>s with the "math" class, optionally
 | 
			
		||||
; with "inline" or "display" classes depending on context.
 | 
			
		||||
ELEMENT = span
 | 
			
		||||
ALLOW_ATTR = class
 | 
			
		||||
REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
 - `ELEMENT`: The element this policy applies to. Must be non-empty.
 | 
			
		||||
 - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty.
 | 
			
		||||
 - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute.
 | 
			
		||||
 | 
			
		||||
You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry.
 | 
			
		||||
 | 
			
		||||
## Time (`time`)
 | 
			
		||||
 | 
			
		||||
- `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05
 | 
			
		||||
 
 | 
			
		||||
@@ -68,4 +68,22 @@ RENDER_COMMAND = rst2html.py
 | 
			
		||||
IS_INPUT_FILE = false
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/).
 | 
			
		||||
 | 
			
		||||
```ini
 | 
			
		||||
[markup.sanitizer]
 | 
			
		||||
; Pandoc renders TeX segments as <span>s with the "math" class, optionally
 | 
			
		||||
; with "inline" or "display" classes depending on context.
 | 
			
		||||
ELEMENT = span
 | 
			
		||||
ALLOW_ATTR = class
 | 
			
		||||
REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
 | 
			
		||||
 | 
			
		||||
[markup.markdown]
 | 
			
		||||
ENABLED         = true
 | 
			
		||||
FILE_EXTENSIONS = .md,.markdown
 | 
			
		||||
RENDER_COMMAND  = pandoc -f markdown -t html --katex
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. All three must be defined, but `REGEXP` may be blank to allow unconditional whitelisting of that attribute.
 | 
			
		||||
 | 
			
		||||
Once your configuration changes have been made, restart Gitea to have changes take effect.
 | 
			
		||||
 
 | 
			
		||||
@@ -50,6 +50,15 @@ func ReplaceSanitizer() {
 | 
			
		||||
 | 
			
		||||
	// Allow <kbd> tags for keyboard shortcut styling
 | 
			
		||||
	sanitizer.policy.AllowElements("kbd")
 | 
			
		||||
 | 
			
		||||
	// Custom keyword markup
 | 
			
		||||
	for _, rule := range setting.ExternalSanitizerRules {
 | 
			
		||||
		if rule.Regexp != nil {
 | 
			
		||||
			sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
 | 
			
		||||
		} else {
 | 
			
		||||
			sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
 | 
			
		||||
 
 | 
			
		||||
@@ -9,11 +9,14 @@ import (
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/log"
 | 
			
		||||
 | 
			
		||||
	"gopkg.in/ini.v1"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ExternalMarkupParsers represents the external markup parsers
 | 
			
		||||
var (
 | 
			
		||||
	ExternalMarkupParsers  []MarkupParser
 | 
			
		||||
	ExternalSanitizerRules []MarkupSanitizerRule
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// MarkupParser defines the external parser configured in ini
 | 
			
		||||
@@ -25,8 +28,15 @@ type MarkupParser struct {
 | 
			
		||||
	IsInputFile    bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// MarkupSanitizerRule defines the policy for whitelisting attributes on
 | 
			
		||||
// certain elements.
 | 
			
		||||
type MarkupSanitizerRule struct {
 | 
			
		||||
	Element   string
 | 
			
		||||
	AllowAttr string
 | 
			
		||||
	Regexp    *regexp.Regexp
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newMarkup() {
 | 
			
		||||
	extensionReg := regexp.MustCompile(`\.\w`)
 | 
			
		||||
	for _, sec := range Cfg.Section("markup").ChildSections() {
 | 
			
		||||
		name := strings.TrimPrefix(sec.Name(), "markup.")
 | 
			
		||||
		if name == "" {
 | 
			
		||||
@@ -34,6 +44,72 @@ func newMarkup() {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if name == "sanitizer" {
 | 
			
		||||
			newMarkupSanitizer(name, sec)
 | 
			
		||||
		} else {
 | 
			
		||||
			newMarkupRenderer(name, sec)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newMarkupSanitizer(name string, sec *ini.Section) {
 | 
			
		||||
	haveElement := sec.HasKey("ELEMENT")
 | 
			
		||||
	haveAttr := sec.HasKey("ALLOW_ATTR")
 | 
			
		||||
	haveRegexp := sec.HasKey("REGEXP")
 | 
			
		||||
 | 
			
		||||
	if !haveElement && !haveAttr && !haveRegexp {
 | 
			
		||||
		log.Warn("Skipping empty section: markup.%s.", name)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if !haveElement || !haveAttr || !haveRegexp {
 | 
			
		||||
		log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name)
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	elements := sec.Key("ELEMENT").ValueWithShadows()
 | 
			
		||||
	allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows()
 | 
			
		||||
	regexps := sec.Key("REGEXP").ValueWithShadows()
 | 
			
		||||
 | 
			
		||||
	if len(elements) != len(allowAttrs) ||
 | 
			
		||||
		len(elements) != len(regexps) {
 | 
			
		||||
		log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps))
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements))
 | 
			
		||||
 | 
			
		||||
	for index, pattern := range regexps {
 | 
			
		||||
		if pattern == "" {
 | 
			
		||||
			rule := MarkupSanitizerRule{
 | 
			
		||||
				Element:   elements[index],
 | 
			
		||||
				AllowAttr: allowAttrs[index],
 | 
			
		||||
				Regexp:    nil,
 | 
			
		||||
			}
 | 
			
		||||
			ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Validate when parsing the config that this is a valid regular
 | 
			
		||||
		// expression. Then we can use regexp.MustCompile(...) later.
 | 
			
		||||
		compiled, err := regexp.Compile(pattern)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err)
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		rule := MarkupSanitizerRule{
 | 
			
		||||
			Element:   elements[index],
 | 
			
		||||
			AllowAttr: allowAttrs[index],
 | 
			
		||||
			Regexp:    compiled,
 | 
			
		||||
		}
 | 
			
		||||
		ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newMarkupRenderer(name string, sec *ini.Section) {
 | 
			
		||||
	extensionReg := regexp.MustCompile(`\.\w`)
 | 
			
		||||
 | 
			
		||||
	extensions := sec.Key("FILE_EXTENSIONS").Strings(",")
 | 
			
		||||
	var exts = make([]string, 0, len(extensions))
 | 
			
		||||
	for _, extension := range extensions {
 | 
			
		||||
@@ -46,13 +122,13 @@ func newMarkup() {
 | 
			
		||||
 | 
			
		||||
	if len(exts) == 0 {
 | 
			
		||||
		log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored")
 | 
			
		||||
			continue
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	command := sec.Key("RENDER_COMMAND").MustString("")
 | 
			
		||||
	if command == "" {
 | 
			
		||||
		log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored")
 | 
			
		||||
			continue
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{
 | 
			
		||||
@@ -62,5 +138,4 @@ func newMarkup() {
 | 
			
		||||
		Command:        command,
 | 
			
		||||
		IsInputFile:    sec.Key("IS_INPUT_FILE").MustBool(false),
 | 
			
		||||
	})
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user