mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	Do not recognize text files as audio (#23355)
Close #17108 This PR uses a trick (removing the ID3 tag) to detect the content again to to see whether the content is text type. --------- Co-authored-by: delvh <dev.lh@web.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
		@@ -106,6 +106,16 @@ func DetectContentType(data []byte) SniffedType {
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
 | 
				
			||||||
 | 
							// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
 | 
				
			||||||
 | 
							// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
 | 
				
			||||||
 | 
							// This works especially because audio files contain many unprintable/invalid characters like `0x00`
 | 
				
			||||||
 | 
							ct2 := http.DetectContentType(data[3:])
 | 
				
			||||||
 | 
							if strings.HasPrefix(ct2, "text/") {
 | 
				
			||||||
 | 
								ct = ct2
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return SniffedType{ct}
 | 
						return SniffedType{ct}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -109,6 +109,10 @@ func TestIsAudio(t *testing.T) {
 | 
				
			|||||||
	mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
 | 
						mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
 | 
				
			||||||
	assert.True(t, DetectContentType(mp3).IsAudio())
 | 
						assert.True(t, DetectContentType(mp3).IsAudio())
 | 
				
			||||||
	assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
 | 
						assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
 | 
				
			||||||
 | 
						assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText())          // test ID3 tag for plain text
 | 
				
			||||||
 | 
						assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func TestDetectContentTypeFromReader(t *testing.T) {
 | 
					func TestDetectContentTypeFromReader(t *testing.T) {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user