mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Markdown: Sanitizier Configuration (#9075)
* Support custom sanitization policy Allowing the gitea administrator to configure sanitization policy allows them to couple external renders and custom templates to support more markup. In particular, the `pandoc` renderer allows generating KaTeX annotations, wrapping them in `<span>` elements with class `math` and either `inline` or `display` (depending on whether or not inline or block mode was requested). This iteration gives the administrator whitelisting powers; carefully crafted regexes will thus let through only the desired attributes necessary to support their custom markup. Resolves: #9054 Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Document new sanitization configuration - Adds basic documentation to app.ini.sample, - Adds an example to the Configuration Cheat Sheet, and - Adds extended information to External Renderers section. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Drop extraneous length check in newMarkupSanitizer(...) Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Fix plural ELEMENT and ALLOW_ATTR in docs These were left over from their initial names. Make them singular to conform with the current expectations. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
This commit is contained in:
		
				
					committed by
					
						
						techknowlogick
					
				
			
			
				
	
			
			
			
						parent
						
							cecc31951c
						
					
				
				
					commit
					ee7df7ba8c
				
			@@ -877,6 +877,12 @@ SHOW_FOOTER_VERSION = true
 | 
				
			|||||||
; Show template execution time in the footer
 | 
					; Show template execution time in the footer
 | 
				
			||||||
SHOW_FOOTER_TEMPLATE_LOAD_TIME = true
 | 
					SHOW_FOOTER_TEMPLATE_LOAD_TIME = true
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[markup.sanitizer]
 | 
				
			||||||
 | 
					; The following keys can be used multiple times to define sanitation policy rules.
 | 
				
			||||||
 | 
					;ELEMENT = span
 | 
				
			||||||
 | 
					;ALLOW_ATTR = class
 | 
				
			||||||
 | 
					;REGEXP = ^(info|warning|error)$
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[markup.asciidoc]
 | 
					[markup.asciidoc]
 | 
				
			||||||
ENABLED = false
 | 
					ENABLED = false
 | 
				
			||||||
; List of file extensions that should be rendered by an external command
 | 
					; List of file extensions that should be rendered by an external command
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -578,6 +578,24 @@ Two special environment variables are passed to the render command:
 | 
				
			|||||||
- `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links.
 | 
					- `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links.
 | 
				
			||||||
- `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths.
 | 
					- `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```ini
 | 
				
			||||||
 | 
					[markup.sanitizer]
 | 
				
			||||||
 | 
					; Pandoc renders TeX segments as <span>s with the "math" class, optionally
 | 
				
			||||||
 | 
					; with "inline" or "display" classes depending on context.
 | 
				
			||||||
 | 
					ELEMENT = span
 | 
				
			||||||
 | 
					ALLOW_ATTR = class
 | 
				
			||||||
 | 
					REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 - `ELEMENT`: The element this policy applies to. Must be non-empty.
 | 
				
			||||||
 | 
					 - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty.
 | 
				
			||||||
 | 
					 - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
## Time (`time`)
 | 
					## Time (`time`)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05
 | 
					- `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -68,4 +68,22 @@ RENDER_COMMAND = rst2html.py
 | 
				
			|||||||
IS_INPUT_FILE = false
 | 
					IS_INPUT_FILE = false
 | 
				
			||||||
```
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					```ini
 | 
				
			||||||
 | 
					[markup.sanitizer]
 | 
				
			||||||
 | 
					; Pandoc renders TeX segments as <span>s with the "math" class, optionally
 | 
				
			||||||
 | 
					; with "inline" or "display" classes depending on context.
 | 
				
			||||||
 | 
					ELEMENT = span
 | 
				
			||||||
 | 
					ALLOW_ATTR = class
 | 
				
			||||||
 | 
					REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[markup.markdown]
 | 
				
			||||||
 | 
					ENABLED         = true
 | 
				
			||||||
 | 
					FILE_EXTENSIONS = .md,.markdown
 | 
				
			||||||
 | 
					RENDER_COMMAND  = pandoc -f markdown -t html --katex
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. All three must be defined, but `REGEXP` may be blank to allow unconditional whitelisting of that attribute.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Once your configuration changes have been made, restart Gitea to have changes take effect.
 | 
					Once your configuration changes have been made, restart Gitea to have changes take effect.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -50,6 +50,15 @@ func ReplaceSanitizer() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	// Allow <kbd> tags for keyboard shortcut styling
 | 
						// Allow <kbd> tags for keyboard shortcut styling
 | 
				
			||||||
	sanitizer.policy.AllowElements("kbd")
 | 
						sanitizer.policy.AllowElements("kbd")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// Custom keyword markup
 | 
				
			||||||
 | 
						for _, rule := range setting.ExternalSanitizerRules {
 | 
				
			||||||
 | 
							if rule.Regexp != nil {
 | 
				
			||||||
 | 
								sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
 | 
					// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -9,11 +9,14 @@ import (
 | 
				
			|||||||
	"strings"
 | 
						"strings"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"code.gitea.io/gitea/modules/log"
 | 
						"code.gitea.io/gitea/modules/log"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						"gopkg.in/ini.v1"
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ExternalMarkupParsers represents the external markup parsers
 | 
					// ExternalMarkupParsers represents the external markup parsers
 | 
				
			||||||
var (
 | 
					var (
 | 
				
			||||||
	ExternalMarkupParsers []MarkupParser
 | 
						ExternalMarkupParsers  []MarkupParser
 | 
				
			||||||
 | 
						ExternalSanitizerRules []MarkupSanitizerRule
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// MarkupParser defines the external parser configured in ini
 | 
					// MarkupParser defines the external parser configured in ini
 | 
				
			||||||
@@ -25,8 +28,15 @@ type MarkupParser struct {
 | 
				
			|||||||
	IsInputFile    bool
 | 
						IsInputFile    bool
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// MarkupSanitizerRule defines the policy for whitelisting attributes on
 | 
				
			||||||
 | 
					// certain elements.
 | 
				
			||||||
 | 
					type MarkupSanitizerRule struct {
 | 
				
			||||||
 | 
						Element   string
 | 
				
			||||||
 | 
						AllowAttr string
 | 
				
			||||||
 | 
						Regexp    *regexp.Regexp
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func newMarkup() {
 | 
					func newMarkup() {
 | 
				
			||||||
	extensionReg := regexp.MustCompile(`\.\w`)
 | 
					 | 
				
			||||||
	for _, sec := range Cfg.Section("markup").ChildSections() {
 | 
						for _, sec := range Cfg.Section("markup").ChildSections() {
 | 
				
			||||||
		name := strings.TrimPrefix(sec.Name(), "markup.")
 | 
							name := strings.TrimPrefix(sec.Name(), "markup.")
 | 
				
			||||||
		if name == "" {
 | 
							if name == "" {
 | 
				
			||||||
@@ -34,33 +44,98 @@ func newMarkup() {
 | 
				
			|||||||
			continue
 | 
								continue
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		extensions := sec.Key("FILE_EXTENSIONS").Strings(",")
 | 
							if name == "sanitizer" {
 | 
				
			||||||
		var exts = make([]string, 0, len(extensions))
 | 
								newMarkupSanitizer(name, sec)
 | 
				
			||||||
		for _, extension := range extensions {
 | 
							} else {
 | 
				
			||||||
			if !extensionReg.MatchString(extension) {
 | 
								newMarkupRenderer(name, sec)
 | 
				
			||||||
				log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored")
 | 
					 | 
				
			||||||
			} else {
 | 
					 | 
				
			||||||
				exts = append(exts, extension)
 | 
					 | 
				
			||||||
			}
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					 | 
				
			||||||
		if len(exts) == 0 {
 | 
					 | 
				
			||||||
			log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored")
 | 
					 | 
				
			||||||
			continue
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		command := sec.Key("RENDER_COMMAND").MustString("")
 | 
					 | 
				
			||||||
		if command == "" {
 | 
					 | 
				
			||||||
			log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored")
 | 
					 | 
				
			||||||
			continue
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{
 | 
					 | 
				
			||||||
			Enabled:        sec.Key("ENABLED").MustBool(false),
 | 
					 | 
				
			||||||
			MarkupName:     name,
 | 
					 | 
				
			||||||
			FileExtensions: exts,
 | 
					 | 
				
			||||||
			Command:        command,
 | 
					 | 
				
			||||||
			IsInputFile:    sec.Key("IS_INPUT_FILE").MustBool(false),
 | 
					 | 
				
			||||||
		})
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func newMarkupSanitizer(name string, sec *ini.Section) {
 | 
				
			||||||
 | 
						haveElement := sec.HasKey("ELEMENT")
 | 
				
			||||||
 | 
						haveAttr := sec.HasKey("ALLOW_ATTR")
 | 
				
			||||||
 | 
						haveRegexp := sec.HasKey("REGEXP")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if !haveElement && !haveAttr && !haveRegexp {
 | 
				
			||||||
 | 
							log.Warn("Skipping empty section: markup.%s.", name)
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if !haveElement || !haveAttr || !haveRegexp {
 | 
				
			||||||
 | 
							log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name)
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						elements := sec.Key("ELEMENT").ValueWithShadows()
 | 
				
			||||||
 | 
						allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows()
 | 
				
			||||||
 | 
						regexps := sec.Key("REGEXP").ValueWithShadows()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if len(elements) != len(allowAttrs) ||
 | 
				
			||||||
 | 
							len(elements) != len(regexps) {
 | 
				
			||||||
 | 
							log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps))
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						for index, pattern := range regexps {
 | 
				
			||||||
 | 
							if pattern == "" {
 | 
				
			||||||
 | 
								rule := MarkupSanitizerRule{
 | 
				
			||||||
 | 
									Element:   elements[index],
 | 
				
			||||||
 | 
									AllowAttr: allowAttrs[index],
 | 
				
			||||||
 | 
									Regexp:    nil,
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							// Validate when parsing the config that this is a valid regular
 | 
				
			||||||
 | 
							// expression. Then we can use regexp.MustCompile(...) later.
 | 
				
			||||||
 | 
							compiled, err := regexp.Compile(pattern)
 | 
				
			||||||
 | 
							if err != nil {
 | 
				
			||||||
 | 
								log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err)
 | 
				
			||||||
 | 
								continue
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							rule := MarkupSanitizerRule{
 | 
				
			||||||
 | 
								Element:   elements[index],
 | 
				
			||||||
 | 
								AllowAttr: allowAttrs[index],
 | 
				
			||||||
 | 
								Regexp:    compiled,
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func newMarkupRenderer(name string, sec *ini.Section) {
 | 
				
			||||||
 | 
						extensionReg := regexp.MustCompile(`\.\w`)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						extensions := sec.Key("FILE_EXTENSIONS").Strings(",")
 | 
				
			||||||
 | 
						var exts = make([]string, 0, len(extensions))
 | 
				
			||||||
 | 
						for _, extension := range extensions {
 | 
				
			||||||
 | 
							if !extensionReg.MatchString(extension) {
 | 
				
			||||||
 | 
								log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored")
 | 
				
			||||||
 | 
							} else {
 | 
				
			||||||
 | 
								exts = append(exts, extension)
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if len(exts) == 0 {
 | 
				
			||||||
 | 
							log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored")
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						command := sec.Key("RENDER_COMMAND").MustString("")
 | 
				
			||||||
 | 
						if command == "" {
 | 
				
			||||||
 | 
							log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored")
 | 
				
			||||||
 | 
							return
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{
 | 
				
			||||||
 | 
							Enabled:        sec.Key("ENABLED").MustBool(false),
 | 
				
			||||||
 | 
							MarkupName:     name,
 | 
				
			||||||
 | 
							FileExtensions: exts,
 | 
				
			||||||
 | 
							Command:        command,
 | 
				
			||||||
 | 
							IsInputFile:    sec.Key("IS_INPUT_FILE").MustBool(false),
 | 
				
			||||||
 | 
						})
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user