mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			84 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			84 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
package chardet
 | 
						|
 | 
						|
type recognizer interface {
 | 
						|
	Match(*recognizerInput) recognizerOutput
 | 
						|
}
 | 
						|
 | 
						|
type recognizerOutput Result
 | 
						|
 | 
						|
type recognizerInput struct {
 | 
						|
	raw         []byte
 | 
						|
	input       []byte
 | 
						|
	tagStripped bool
 | 
						|
	byteStats   []int
 | 
						|
	hasC1Bytes  bool
 | 
						|
}
 | 
						|
 | 
						|
func newRecognizerInput(raw []byte, stripTag bool) *recognizerInput {
 | 
						|
	input, stripped := mayStripInput(raw, stripTag)
 | 
						|
	byteStats := computeByteStats(input)
 | 
						|
	return &recognizerInput{
 | 
						|
		raw:         raw,
 | 
						|
		input:       input,
 | 
						|
		tagStripped: stripped,
 | 
						|
		byteStats:   byteStats,
 | 
						|
		hasC1Bytes:  computeHasC1Bytes(byteStats),
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func mayStripInput(raw []byte, stripTag bool) (out []byte, stripped bool) {
 | 
						|
	const inputBufferSize = 8192
 | 
						|
	out = make([]byte, 0, inputBufferSize)
 | 
						|
	var badTags, openTags int32
 | 
						|
	var inMarkup bool = false
 | 
						|
	stripped = false
 | 
						|
	if stripTag {
 | 
						|
		stripped = true
 | 
						|
		for _, c := range raw {
 | 
						|
			if c == '<' {
 | 
						|
				if inMarkup {
 | 
						|
					badTags += 1
 | 
						|
				}
 | 
						|
				inMarkup = true
 | 
						|
				openTags += 1
 | 
						|
			}
 | 
						|
			if !inMarkup {
 | 
						|
				out = append(out, c)
 | 
						|
				if len(out) >= inputBufferSize {
 | 
						|
					break
 | 
						|
				}
 | 
						|
			}
 | 
						|
			if c == '>' {
 | 
						|
				inMarkup = false
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if openTags < 5 || openTags/5 < badTags || (len(out) < 100 && len(raw) > 600) {
 | 
						|
		limit := len(raw)
 | 
						|
		if limit > inputBufferSize {
 | 
						|
			limit = inputBufferSize
 | 
						|
		}
 | 
						|
		out = make([]byte, limit)
 | 
						|
		copy(out, raw[:limit])
 | 
						|
		stripped = false
 | 
						|
	}
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
func computeByteStats(input []byte) []int {
 | 
						|
	r := make([]int, 256)
 | 
						|
	for _, c := range input {
 | 
						|
		r[c] += 1
 | 
						|
	}
 | 
						|
	return r
 | 
						|
}
 | 
						|
 | 
						|
func computeHasC1Bytes(byteStats []int) bool {
 | 
						|
	for _, count := range byteStats[0x80 : 0x9F+1] {
 | 
						|
		if count > 0 {
 | 
						|
			return true
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return false
 | 
						|
}
 |