mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	Don't treat BOM escape sequence as hidden character. (#18909)
* Don't treat BOM escape sequence as hidden character. - BOM sequence is a common non-harmfull escape sequence, it shouldn't be shown as hidden character. - Follows GitHub's behavior. - Resolves #18837 Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
		@@ -63,6 +63,7 @@ func EscapeControlBytes(text []byte) (EscapeStatus, []byte) {
 | 
				
			|||||||
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
 | 
					func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
 | 
				
			||||||
	buf := make([]byte, 4096)
 | 
						buf := make([]byte, 4096)
 | 
				
			||||||
	readStart := 0
 | 
						readStart := 0
 | 
				
			||||||
 | 
						runeCount := 0
 | 
				
			||||||
	var n int
 | 
						var n int
 | 
				
			||||||
	var writePos int
 | 
						var writePos int
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -79,6 +80,8 @@ readingloop:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
		for i < len(bs) {
 | 
							for i < len(bs) {
 | 
				
			||||||
			r, size := utf8.DecodeRune(bs[i:])
 | 
								r, size := utf8.DecodeRune(bs[i:])
 | 
				
			||||||
 | 
								runeCount++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			// Now handle the codepoints
 | 
								// Now handle the codepoints
 | 
				
			||||||
			switch {
 | 
								switch {
 | 
				
			||||||
			case r == utf8.RuneError:
 | 
								case r == utf8.RuneError:
 | 
				
			||||||
@@ -113,6 +116,8 @@ readingloop:
 | 
				
			|||||||
				lineHasRTLScript = false
 | 
									lineHasRTLScript = false
 | 
				
			||||||
				lineHasLTRScript = false
 | 
									lineHasLTRScript = false
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								case runeCount == 1 && r == 0xFEFF: // UTF BOM
 | 
				
			||||||
 | 
									// the first BOM is safe
 | 
				
			||||||
			case r == '\r' || r == '\t' || r == ' ':
 | 
								case r == '\r' || r == '\t' || r == ' ':
 | 
				
			||||||
				// These are acceptable control characters and space characters
 | 
									// These are acceptable control characters and space characters
 | 
				
			||||||
			case unicode.IsSpace(r):
 | 
								case unicode.IsSpace(r):
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -129,6 +129,14 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
 | 
				
			|||||||
			"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n",
 | 
								"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n",
 | 
				
			||||||
		status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
 | 
							status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
 | 
				
			||||||
	},
 | 
						},
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							// UTF-8/16/32 all use the same codepoint for BOM
 | 
				
			||||||
 | 
							// Gitea could read UTF-16/32 content and convert into UTF-8 internally then render it, so we only process UTF-8 internally
 | 
				
			||||||
 | 
							name:   "UTF BOM",
 | 
				
			||||||
 | 
							text:   "\xef\xbb\xbftest",
 | 
				
			||||||
 | 
							result: "\xef\xbb\xbftest",
 | 
				
			||||||
 | 
							status: EscapeStatus{HasLTRScript: true},
 | 
				
			||||||
 | 
						},
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestEscapeControlString(t *testing.T) {
 | 
					func TestEscapeControlString(t *testing.T) {
 | 
				
			||||||
@@ -163,10 +171,18 @@ func TestEscapeControlReader(t *testing.T) {
 | 
				
			|||||||
	// lets add some control characters to the tests
 | 
						// lets add some control characters to the tests
 | 
				
			||||||
	tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
 | 
						tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
 | 
				
			||||||
	copy(tests, escapeControlTests)
 | 
						copy(tests, escapeControlTests)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// if there is a BOM, we should keep the BOM
 | 
				
			||||||
 | 
						addPrefix := func(prefix, s string) string {
 | 
				
			||||||
 | 
							if strings.HasPrefix(s, "\xef\xbb\xbf") {
 | 
				
			||||||
 | 
								return s[:3] + prefix + s[3:]
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							return prefix + s
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	for _, test := range escapeControlTests {
 | 
						for _, test := range escapeControlTests {
 | 
				
			||||||
		test.name += " (+Control)"
 | 
							test.name += " (+Control)"
 | 
				
			||||||
		test.text = "\u001E" + test.text
 | 
							test.text = addPrefix("\u001E", test.text)
 | 
				
			||||||
		test.result = `<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">` + "\u001e" + `</span></span>` + test.result
 | 
							test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">`+"\u001e"+`</span></span>`, test.result)
 | 
				
			||||||
		test.status.Escaped = true
 | 
							test.status.Escaped = true
 | 
				
			||||||
		test.status.HasControls = true
 | 
							test.status.HasControls = true
 | 
				
			||||||
		tests = append(tests, test)
 | 
							tests = append(tests, test)
 | 
				
			||||||
@@ -174,8 +190,8 @@ func TestEscapeControlReader(t *testing.T) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	for _, test := range escapeControlTests {
 | 
						for _, test := range escapeControlTests {
 | 
				
			||||||
		test.name += " (+Mark)"
 | 
							test.name += " (+Mark)"
 | 
				
			||||||
		test.text = "\u0300" + test.text
 | 
							test.text = addPrefix("\u0300", test.text)
 | 
				
			||||||
		test.result = `<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">` + "\u0300" + `</span></span>` + test.result
 | 
							test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">`+"\u0300"+`</span></span>`, test.result)
 | 
				
			||||||
		test.status.Escaped = true
 | 
							test.status.Escaped = true
 | 
				
			||||||
		test.status.HasMarks = true
 | 
							test.status.HasMarks = true
 | 
				
			||||||
		tests = append(tests, test)
 | 
							tests = append(tests, test)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user