Do not recognize text files as audio (#23355) (#23368)

Backport #23355

Close #17108

This PR uses a trick (removing the ID3 tag) to detect the content again
to to see whether the content is text type.

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
Co-authored-by: delvh <dev.lh@web.de>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
Giteabot
2023-03-08 03:03:05 -05:00
committed by GitHub
parent ed25e094ab
commit cf80f829b4
2 changed files with 14 additions and 0 deletions

View File

@@ -106,6 +106,16 @@ func DetectContentType(data []byte) SniffedType {
}
}
if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
ct2 := http.DetectContentType(data[3:])
if strings.HasPrefix(ct2, "text/") {
ct = ct2
}
}
return SniffedType{ct}
}