mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	"ogg" is just a "container" format for audio and video. Golang's `DetectContentType` only reports "application/ogg" for potential ogg files. Actually it could do more "guess" to see whether it is a audio file or a video file.
		
			
				
	
	
		
			144 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			144 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright 2021 The Gitea Authors. All rights reserved.
 | 
						|
// SPDX-License-Identifier: MIT
 | 
						|
 | 
						|
package typesniffer
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"net/http"
 | 
						|
	"regexp"
 | 
						|
	"strings"
 | 
						|
 | 
						|
	"code.gitea.io/gitea/modules/util"
 | 
						|
)
 | 
						|
 | 
						|
// Use at most this many bytes to determine Content Type.
 | 
						|
const sniffLen = 1024
 | 
						|
 | 
						|
const (
 | 
						|
	// SvgMimeType MIME type of SVG images.
 | 
						|
	SvgMimeType = "image/svg+xml"
 | 
						|
	// ApplicationOctetStream MIME type of binary files.
 | 
						|
	ApplicationOctetStream = "application/octet-stream"
 | 
						|
)
 | 
						|
 | 
						|
var (
 | 
						|
	svgComment       = regexp.MustCompile(`(?s)<!--.*?-->`)
 | 
						|
	svgTagRegex      = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
 | 
						|
	svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
 | 
						|
)
 | 
						|
 | 
						|
// SniffedType contains information about a blobs type.
 | 
						|
type SniffedType struct {
 | 
						|
	contentType string
 | 
						|
}
 | 
						|
 | 
						|
// IsText etects if content format is plain text.
 | 
						|
func (ct SniffedType) IsText() bool {
 | 
						|
	return strings.Contains(ct.contentType, "text/")
 | 
						|
}
 | 
						|
 | 
						|
// IsImage detects if data is an image format
 | 
						|
func (ct SniffedType) IsImage() bool {
 | 
						|
	return strings.Contains(ct.contentType, "image/")
 | 
						|
}
 | 
						|
 | 
						|
// IsSvgImage detects if data is an SVG image format
 | 
						|
func (ct SniffedType) IsSvgImage() bool {
 | 
						|
	return strings.Contains(ct.contentType, SvgMimeType)
 | 
						|
}
 | 
						|
 | 
						|
// IsPDF detects if data is a PDF format
 | 
						|
func (ct SniffedType) IsPDF() bool {
 | 
						|
	return strings.Contains(ct.contentType, "application/pdf")
 | 
						|
}
 | 
						|
 | 
						|
// IsVideo detects if data is an video format
 | 
						|
func (ct SniffedType) IsVideo() bool {
 | 
						|
	return strings.Contains(ct.contentType, "video/")
 | 
						|
}
 | 
						|
 | 
						|
// IsAudio detects if data is an video format
 | 
						|
func (ct SniffedType) IsAudio() bool {
 | 
						|
	return strings.Contains(ct.contentType, "audio/")
 | 
						|
}
 | 
						|
 | 
						|
// IsRepresentableAsText returns true if file content can be represented as
 | 
						|
// plain text or is empty.
 | 
						|
func (ct SniffedType) IsRepresentableAsText() bool {
 | 
						|
	return ct.IsText() || ct.IsSvgImage()
 | 
						|
}
 | 
						|
 | 
						|
// IsBrowsableBinaryType returns whether a non-text type can be displayed in a browser
 | 
						|
func (ct SniffedType) IsBrowsableBinaryType() bool {
 | 
						|
	return ct.IsImage() || ct.IsSvgImage() || ct.IsPDF() || ct.IsVideo() || ct.IsAudio()
 | 
						|
}
 | 
						|
 | 
						|
// GetMimeType returns the mime type
 | 
						|
func (ct SniffedType) GetMimeType() string {
 | 
						|
	return strings.SplitN(ct.contentType, ";", 2)[0]
 | 
						|
}
 | 
						|
 | 
						|
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
 | 
						|
func DetectContentType(data []byte) SniffedType {
 | 
						|
	if len(data) == 0 {
 | 
						|
		return SniffedType{"text/unknown"}
 | 
						|
	}
 | 
						|
 | 
						|
	ct := http.DetectContentType(data)
 | 
						|
 | 
						|
	if len(data) > sniffLen {
 | 
						|
		data = data[:sniffLen]
 | 
						|
	}
 | 
						|
 | 
						|
	// SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
 | 
						|
 | 
						|
	detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")
 | 
						|
	detectByXML := strings.Contains(ct, "text/xml")
 | 
						|
	if detectByHTML || detectByXML {
 | 
						|
		dataProcessed := svgComment.ReplaceAll(data, nil)
 | 
						|
		dataProcessed = bytes.TrimSpace(dataProcessed)
 | 
						|
		if detectByHTML && svgTagRegex.Match(dataProcessed) ||
 | 
						|
			detectByXML && svgTagInXMLRegex.Match(dataProcessed) {
 | 
						|
			ct = SvgMimeType
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
 | 
						|
		// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
 | 
						|
		// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
 | 
						|
		// This works especially because audio files contain many unprintable/invalid characters like `0x00`
 | 
						|
		ct2 := http.DetectContentType(data[3:])
 | 
						|
		if strings.HasPrefix(ct2, "text/") {
 | 
						|
			ct = ct2
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if ct == "application/ogg" {
 | 
						|
		dataHead := data
 | 
						|
		if len(dataHead) > 256 {
 | 
						|
			dataHead = dataHead[:256] // only need to do a quick check for the file header
 | 
						|
		}
 | 
						|
		if bytes.Contains(dataHead, []byte("theora")) || bytes.Contains(dataHead, []byte("dirac")) {
 | 
						|
			ct = "video/ogg" // ogg is only used for some video formats, and it's not popular
 | 
						|
		} else {
 | 
						|
			ct = "audio/ogg" // for most cases, it is used as an audio container
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return SniffedType{ct}
 | 
						|
}
 | 
						|
 | 
						|
// DetectContentTypeFromReader guesses the content type contained in the reader.
 | 
						|
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
 | 
						|
	buf := make([]byte, sniffLen)
 | 
						|
	n, err := util.ReadAtMost(r, buf)
 | 
						|
	if err != nil {
 | 
						|
		return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
 | 
						|
	}
 | 
						|
	buf = buf[:n]
 | 
						|
 | 
						|
	return DetectContentType(buf), nil
 | 
						|
}
 |