mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 00:20:25 +08:00 
			
		
		
		
	Improve issue search (#2387)
* Improve issue indexer * Fix new issue sqlite bug * Different test indexer paths for each db * Add integration indexer paths to make clean
This commit is contained in:
		
							
								
								
									
										508
									
								
								vendor/golang.org/x/text/unicode/norm/composition.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										508
									
								
								vendor/golang.org/x/text/unicode/norm/composition.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,508 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package norm
 | 
			
		||||
 | 
			
		||||
import "unicode/utf8"
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	maxNonStarters = 30
 | 
			
		||||
	// The maximum number of characters needed for a buffer is
 | 
			
		||||
	// maxNonStarters + 1 for the starter + 1 for the GCJ
 | 
			
		||||
	maxBufferSize    = maxNonStarters + 2
 | 
			
		||||
	maxNFCExpansion  = 3  // NFC(0x1D160)
 | 
			
		||||
	maxNFKCExpansion = 18 // NFKC(0xFDFA)
 | 
			
		||||
 | 
			
		||||
	maxByteBufferSize = utf8.UTFMax * maxBufferSize // 128
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ssState is used for reporting the segment state after inserting a rune.
 | 
			
		||||
// It is returned by streamSafe.next.
 | 
			
		||||
type ssState int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	// Indicates a rune was successfully added to the segment.
 | 
			
		||||
	ssSuccess ssState = iota
 | 
			
		||||
	// Indicates a rune starts a new segment and should not be added.
 | 
			
		||||
	ssStarter
 | 
			
		||||
	// Indicates a rune caused a segment overflow and a CGJ should be inserted.
 | 
			
		||||
	ssOverflow
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// streamSafe implements the policy of when a CGJ should be inserted.
 | 
			
		||||
type streamSafe uint8
 | 
			
		||||
 | 
			
		||||
// first inserts the first rune of a segment. It is a faster version of next if
 | 
			
		||||
// it is known p represents the first rune in a segment.
 | 
			
		||||
func (ss *streamSafe) first(p Properties) {
 | 
			
		||||
	*ss = streamSafe(p.nTrailingNonStarters())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// insert returns a ssState value to indicate whether a rune represented by p
 | 
			
		||||
// can be inserted.
 | 
			
		||||
func (ss *streamSafe) next(p Properties) ssState {
 | 
			
		||||
	if *ss > maxNonStarters {
 | 
			
		||||
		panic("streamSafe was not reset")
 | 
			
		||||
	}
 | 
			
		||||
	n := p.nLeadingNonStarters()
 | 
			
		||||
	if *ss += streamSafe(n); *ss > maxNonStarters {
 | 
			
		||||
		*ss = 0
 | 
			
		||||
		return ssOverflow
 | 
			
		||||
	}
 | 
			
		||||
	// The Stream-Safe Text Processing prescribes that the counting can stop
 | 
			
		||||
	// as soon as a starter is encountered. However, there are some starters,
 | 
			
		||||
	// like Jamo V and T, that can combine with other runes, leaving their
 | 
			
		||||
	// successive non-starters appended to the previous, possibly causing an
 | 
			
		||||
	// overflow. We will therefore consider any rune with a non-zero nLead to
 | 
			
		||||
	// be a non-starter. Note that it always hold that if nLead > 0 then
 | 
			
		||||
	// nLead == nTrail.
 | 
			
		||||
	if n == 0 {
 | 
			
		||||
		*ss = streamSafe(p.nTrailingNonStarters())
 | 
			
		||||
		return ssStarter
 | 
			
		||||
	}
 | 
			
		||||
	return ssSuccess
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// backwards is used for checking for overflow and segment starts
 | 
			
		||||
// when traversing a string backwards. Users do not need to call first
 | 
			
		||||
// for the first rune. The state of the streamSafe retains the count of
 | 
			
		||||
// the non-starters loaded.
 | 
			
		||||
func (ss *streamSafe) backwards(p Properties) ssState {
 | 
			
		||||
	if *ss > maxNonStarters {
 | 
			
		||||
		panic("streamSafe was not reset")
 | 
			
		||||
	}
 | 
			
		||||
	c := *ss + streamSafe(p.nTrailingNonStarters())
 | 
			
		||||
	if c > maxNonStarters {
 | 
			
		||||
		return ssOverflow
 | 
			
		||||
	}
 | 
			
		||||
	*ss = c
 | 
			
		||||
	if p.nLeadingNonStarters() == 0 {
 | 
			
		||||
		return ssStarter
 | 
			
		||||
	}
 | 
			
		||||
	return ssSuccess
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (ss streamSafe) isMax() bool {
 | 
			
		||||
	return ss == maxNonStarters
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GraphemeJoiner is inserted after maxNonStarters non-starter runes.
 | 
			
		||||
const GraphemeJoiner = "\u034F"
 | 
			
		||||
 | 
			
		||||
// reorderBuffer is used to normalize a single segment.  Characters inserted with
 | 
			
		||||
// insert are decomposed and reordered based on CCC. The compose method can
 | 
			
		||||
// be used to recombine characters.  Note that the byte buffer does not hold
 | 
			
		||||
// the UTF-8 characters in order.  Only the rune array is maintained in sorted
 | 
			
		||||
// order. flush writes the resulting segment to a byte array.
 | 
			
		||||
type reorderBuffer struct {
 | 
			
		||||
	rune  [maxBufferSize]Properties // Per character info.
 | 
			
		||||
	byte  [maxByteBufferSize]byte   // UTF-8 buffer. Referenced by runeInfo.pos.
 | 
			
		||||
	nbyte uint8                     // Number or bytes.
 | 
			
		||||
	ss    streamSafe                // For limiting length of non-starter sequence.
 | 
			
		||||
	nrune int                       // Number of runeInfos.
 | 
			
		||||
	f     formInfo
 | 
			
		||||
 | 
			
		||||
	src      input
 | 
			
		||||
	nsrc     int
 | 
			
		||||
	tmpBytes input
 | 
			
		||||
 | 
			
		||||
	out    []byte
 | 
			
		||||
	flushF func(*reorderBuffer) bool
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (rb *reorderBuffer) init(f Form, src []byte) {
 | 
			
		||||
	rb.f = *formTable[f]
 | 
			
		||||
	rb.src.setBytes(src)
 | 
			
		||||
	rb.nsrc = len(src)
 | 
			
		||||
	rb.ss = 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (rb *reorderBuffer) initString(f Form, src string) {
 | 
			
		||||
	rb.f = *formTable[f]
 | 
			
		||||
	rb.src.setString(src)
 | 
			
		||||
	rb.nsrc = len(src)
 | 
			
		||||
	rb.ss = 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (rb *reorderBuffer) setFlusher(out []byte, f func(*reorderBuffer) bool) {
 | 
			
		||||
	rb.out = out
 | 
			
		||||
	rb.flushF = f
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// reset discards all characters from the buffer.
 | 
			
		||||
func (rb *reorderBuffer) reset() {
 | 
			
		||||
	rb.nrune = 0
 | 
			
		||||
	rb.nbyte = 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (rb *reorderBuffer) doFlush() bool {
 | 
			
		||||
	if rb.f.composing {
 | 
			
		||||
		rb.compose()
 | 
			
		||||
	}
 | 
			
		||||
	res := rb.flushF(rb)
 | 
			
		||||
	rb.reset()
 | 
			
		||||
	return res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// appendFlush appends the normalized segment to rb.out.
 | 
			
		||||
func appendFlush(rb *reorderBuffer) bool {
 | 
			
		||||
	for i := 0; i < rb.nrune; i++ {
 | 
			
		||||
		start := rb.rune[i].pos
 | 
			
		||||
		end := start + rb.rune[i].size
 | 
			
		||||
		rb.out = append(rb.out, rb.byte[start:end]...)
 | 
			
		||||
	}
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// flush appends the normalized segment to out and resets rb.
 | 
			
		||||
func (rb *reorderBuffer) flush(out []byte) []byte {
 | 
			
		||||
	for i := 0; i < rb.nrune; i++ {
 | 
			
		||||
		start := rb.rune[i].pos
 | 
			
		||||
		end := start + rb.rune[i].size
 | 
			
		||||
		out = append(out, rb.byte[start:end]...)
 | 
			
		||||
	}
 | 
			
		||||
	rb.reset()
 | 
			
		||||
	return out
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// flushCopy copies the normalized segment to buf and resets rb.
 | 
			
		||||
// It returns the number of bytes written to buf.
 | 
			
		||||
func (rb *reorderBuffer) flushCopy(buf []byte) int {
 | 
			
		||||
	p := 0
 | 
			
		||||
	for i := 0; i < rb.nrune; i++ {
 | 
			
		||||
		runep := rb.rune[i]
 | 
			
		||||
		p += copy(buf[p:], rb.byte[runep.pos:runep.pos+runep.size])
 | 
			
		||||
	}
 | 
			
		||||
	rb.reset()
 | 
			
		||||
	return p
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// insertOrdered inserts a rune in the buffer, ordered by Canonical Combining Class.
 | 
			
		||||
// It returns false if the buffer is not large enough to hold the rune.
 | 
			
		||||
// It is used internally by insert and insertString only.
 | 
			
		||||
func (rb *reorderBuffer) insertOrdered(info Properties) {
 | 
			
		||||
	n := rb.nrune
 | 
			
		||||
	b := rb.rune[:]
 | 
			
		||||
	cc := info.ccc
 | 
			
		||||
	if cc > 0 {
 | 
			
		||||
		// Find insertion position + move elements to make room.
 | 
			
		||||
		for ; n > 0; n-- {
 | 
			
		||||
			if b[n-1].ccc <= cc {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
			b[n] = b[n-1]
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rb.nrune += 1
 | 
			
		||||
	pos := uint8(rb.nbyte)
 | 
			
		||||
	rb.nbyte += utf8.UTFMax
 | 
			
		||||
	info.pos = pos
 | 
			
		||||
	b[n] = info
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// insertErr is an error code returned by insert. Using this type instead
 | 
			
		||||
// of error improves performance up to 20% for many of the benchmarks.
 | 
			
		||||
type insertErr int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	iSuccess insertErr = -iota
 | 
			
		||||
	iShortDst
 | 
			
		||||
	iShortSrc
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// insertFlush inserts the given rune in the buffer ordered by CCC.
 | 
			
		||||
// If a decomposition with multiple segments are encountered, they leading
 | 
			
		||||
// ones are flushed.
 | 
			
		||||
// It returns a non-zero error code if the rune was not inserted.
 | 
			
		||||
func (rb *reorderBuffer) insertFlush(src input, i int, info Properties) insertErr {
 | 
			
		||||
	if rune := src.hangul(i); rune != 0 {
 | 
			
		||||
		rb.decomposeHangul(rune)
 | 
			
		||||
		return iSuccess
 | 
			
		||||
	}
 | 
			
		||||
	if info.hasDecomposition() {
 | 
			
		||||
		return rb.insertDecomposed(info.Decomposition())
 | 
			
		||||
	}
 | 
			
		||||
	rb.insertSingle(src, i, info)
 | 
			
		||||
	return iSuccess
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// insertUnsafe inserts the given rune in the buffer ordered by CCC.
 | 
			
		||||
// It is assumed there is sufficient space to hold the runes. It is the
 | 
			
		||||
// responsibility of the caller to ensure this. This can be done by checking
 | 
			
		||||
// the state returned by the streamSafe type.
 | 
			
		||||
func (rb *reorderBuffer) insertUnsafe(src input, i int, info Properties) {
 | 
			
		||||
	if rune := src.hangul(i); rune != 0 {
 | 
			
		||||
		rb.decomposeHangul(rune)
 | 
			
		||||
	}
 | 
			
		||||
	if info.hasDecomposition() {
 | 
			
		||||
		// TODO: inline.
 | 
			
		||||
		rb.insertDecomposed(info.Decomposition())
 | 
			
		||||
	} else {
 | 
			
		||||
		rb.insertSingle(src, i, info)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// insertDecomposed inserts an entry in to the reorderBuffer for each rune
 | 
			
		||||
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
 | 
			
		||||
// It flushes the buffer on each new segment start.
 | 
			
		||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) insertErr {
 | 
			
		||||
	rb.tmpBytes.setBytes(dcomp)
 | 
			
		||||
	// As the streamSafe accounting already handles the counting for modifiers,
 | 
			
		||||
	// we don't have to call next. However, we do need to keep the accounting
 | 
			
		||||
	// intact when flushing the buffer.
 | 
			
		||||
	for i := 0; i < len(dcomp); {
 | 
			
		||||
		info := rb.f.info(rb.tmpBytes, i)
 | 
			
		||||
		if info.BoundaryBefore() && rb.nrune > 0 && !rb.doFlush() {
 | 
			
		||||
			return iShortDst
 | 
			
		||||
		}
 | 
			
		||||
		i += copy(rb.byte[rb.nbyte:], dcomp[i:i+int(info.size)])
 | 
			
		||||
		rb.insertOrdered(info)
 | 
			
		||||
	}
 | 
			
		||||
	return iSuccess
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// insertSingle inserts an entry in the reorderBuffer for the rune at
 | 
			
		||||
// position i. info is the runeInfo for the rune at position i.
 | 
			
		||||
func (rb *reorderBuffer) insertSingle(src input, i int, info Properties) {
 | 
			
		||||
	src.copySlice(rb.byte[rb.nbyte:], i, i+int(info.size))
 | 
			
		||||
	rb.insertOrdered(info)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// insertCGJ inserts a Combining Grapheme Joiner (0x034f) into rb.
 | 
			
		||||
func (rb *reorderBuffer) insertCGJ() {
 | 
			
		||||
	rb.insertSingle(input{str: GraphemeJoiner}, 0, Properties{size: uint8(len(GraphemeJoiner))})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// appendRune inserts a rune at the end of the buffer. It is used for Hangul.
 | 
			
		||||
func (rb *reorderBuffer) appendRune(r rune) {
 | 
			
		||||
	bn := rb.nbyte
 | 
			
		||||
	sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
 | 
			
		||||
	rb.nbyte += utf8.UTFMax
 | 
			
		||||
	rb.rune[rb.nrune] = Properties{pos: bn, size: uint8(sz)}
 | 
			
		||||
	rb.nrune++
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// assignRune sets a rune at position pos. It is used for Hangul and recomposition.
 | 
			
		||||
func (rb *reorderBuffer) assignRune(pos int, r rune) {
 | 
			
		||||
	bn := rb.rune[pos].pos
 | 
			
		||||
	sz := utf8.EncodeRune(rb.byte[bn:], rune(r))
 | 
			
		||||
	rb.rune[pos] = Properties{pos: bn, size: uint8(sz)}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// runeAt returns the rune at position n. It is used for Hangul and recomposition.
 | 
			
		||||
func (rb *reorderBuffer) runeAt(n int) rune {
 | 
			
		||||
	inf := rb.rune[n]
 | 
			
		||||
	r, _ := utf8.DecodeRune(rb.byte[inf.pos : inf.pos+inf.size])
 | 
			
		||||
	return r
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// bytesAt returns the UTF-8 encoding of the rune at position n.
 | 
			
		||||
// It is used for Hangul and recomposition.
 | 
			
		||||
func (rb *reorderBuffer) bytesAt(n int) []byte {
 | 
			
		||||
	inf := rb.rune[n]
 | 
			
		||||
	return rb.byte[inf.pos : int(inf.pos)+int(inf.size)]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// For Hangul we combine algorithmically, instead of using tables.
 | 
			
		||||
const (
 | 
			
		||||
	hangulBase  = 0xAC00 // UTF-8(hangulBase) -> EA B0 80
 | 
			
		||||
	hangulBase0 = 0xEA
 | 
			
		||||
	hangulBase1 = 0xB0
 | 
			
		||||
	hangulBase2 = 0x80
 | 
			
		||||
 | 
			
		||||
	hangulEnd  = hangulBase + jamoLVTCount // UTF-8(0xD7A4) -> ED 9E A4
 | 
			
		||||
	hangulEnd0 = 0xED
 | 
			
		||||
	hangulEnd1 = 0x9E
 | 
			
		||||
	hangulEnd2 = 0xA4
 | 
			
		||||
 | 
			
		||||
	jamoLBase  = 0x1100 // UTF-8(jamoLBase) -> E1 84 00
 | 
			
		||||
	jamoLBase0 = 0xE1
 | 
			
		||||
	jamoLBase1 = 0x84
 | 
			
		||||
	jamoLEnd   = 0x1113
 | 
			
		||||
	jamoVBase  = 0x1161
 | 
			
		||||
	jamoVEnd   = 0x1176
 | 
			
		||||
	jamoTBase  = 0x11A7
 | 
			
		||||
	jamoTEnd   = 0x11C3
 | 
			
		||||
 | 
			
		||||
	jamoTCount   = 28
 | 
			
		||||
	jamoVCount   = 21
 | 
			
		||||
	jamoVTCount  = 21 * 28
 | 
			
		||||
	jamoLVTCount = 19 * 21 * 28
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const hangulUTF8Size = 3
 | 
			
		||||
 | 
			
		||||
func isHangul(b []byte) bool {
 | 
			
		||||
	if len(b) < hangulUTF8Size {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	b0 := b[0]
 | 
			
		||||
	if b0 < hangulBase0 {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	b1 := b[1]
 | 
			
		||||
	switch {
 | 
			
		||||
	case b0 == hangulBase0:
 | 
			
		||||
		return b1 >= hangulBase1
 | 
			
		||||
	case b0 < hangulEnd0:
 | 
			
		||||
		return true
 | 
			
		||||
	case b0 > hangulEnd0:
 | 
			
		||||
		return false
 | 
			
		||||
	case b1 < hangulEnd1:
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	return b1 == hangulEnd1 && b[2] < hangulEnd2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func isHangulString(b string) bool {
 | 
			
		||||
	if len(b) < hangulUTF8Size {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	b0 := b[0]
 | 
			
		||||
	if b0 < hangulBase0 {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	b1 := b[1]
 | 
			
		||||
	switch {
 | 
			
		||||
	case b0 == hangulBase0:
 | 
			
		||||
		return b1 >= hangulBase1
 | 
			
		||||
	case b0 < hangulEnd0:
 | 
			
		||||
		return true
 | 
			
		||||
	case b0 > hangulEnd0:
 | 
			
		||||
		return false
 | 
			
		||||
	case b1 < hangulEnd1:
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	return b1 == hangulEnd1 && b[2] < hangulEnd2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Caller must ensure len(b) >= 2.
 | 
			
		||||
func isJamoVT(b []byte) bool {
 | 
			
		||||
	// True if (rune & 0xff00) == jamoLBase
 | 
			
		||||
	return b[0] == jamoLBase0 && (b[1]&0xFC) == jamoLBase1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func isHangulWithoutJamoT(b []byte) bool {
 | 
			
		||||
	c, _ := utf8.DecodeRune(b)
 | 
			
		||||
	c -= hangulBase
 | 
			
		||||
	return c < jamoLVTCount && c%jamoTCount == 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// decomposeHangul writes the decomposed Hangul to buf and returns the number
 | 
			
		||||
// of bytes written.  len(buf) should be at least 9.
 | 
			
		||||
func decomposeHangul(buf []byte, r rune) int {
 | 
			
		||||
	const JamoUTF8Len = 3
 | 
			
		||||
	r -= hangulBase
 | 
			
		||||
	x := r % jamoTCount
 | 
			
		||||
	r /= jamoTCount
 | 
			
		||||
	utf8.EncodeRune(buf, jamoLBase+r/jamoVCount)
 | 
			
		||||
	utf8.EncodeRune(buf[JamoUTF8Len:], jamoVBase+r%jamoVCount)
 | 
			
		||||
	if x != 0 {
 | 
			
		||||
		utf8.EncodeRune(buf[2*JamoUTF8Len:], jamoTBase+x)
 | 
			
		||||
		return 3 * JamoUTF8Len
 | 
			
		||||
	}
 | 
			
		||||
	return 2 * JamoUTF8Len
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// decomposeHangul algorithmically decomposes a Hangul rune into
 | 
			
		||||
// its Jamo components.
 | 
			
		||||
// See http://unicode.org/reports/tr15/#Hangul for details on decomposing Hangul.
 | 
			
		||||
func (rb *reorderBuffer) decomposeHangul(r rune) {
 | 
			
		||||
	r -= hangulBase
 | 
			
		||||
	x := r % jamoTCount
 | 
			
		||||
	r /= jamoTCount
 | 
			
		||||
	rb.appendRune(jamoLBase + r/jamoVCount)
 | 
			
		||||
	rb.appendRune(jamoVBase + r%jamoVCount)
 | 
			
		||||
	if x != 0 {
 | 
			
		||||
		rb.appendRune(jamoTBase + x)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// combineHangul algorithmically combines Jamo character components into Hangul.
 | 
			
		||||
// See http://unicode.org/reports/tr15/#Hangul for details on combining Hangul.
 | 
			
		||||
func (rb *reorderBuffer) combineHangul(s, i, k int) {
 | 
			
		||||
	b := rb.rune[:]
 | 
			
		||||
	bn := rb.nrune
 | 
			
		||||
	for ; i < bn; i++ {
 | 
			
		||||
		cccB := b[k-1].ccc
 | 
			
		||||
		cccC := b[i].ccc
 | 
			
		||||
		if cccB == 0 {
 | 
			
		||||
			s = k - 1
 | 
			
		||||
		}
 | 
			
		||||
		if s != k-1 && cccB >= cccC {
 | 
			
		||||
			// b[i] is blocked by greater-equal cccX below it
 | 
			
		||||
			b[k] = b[i]
 | 
			
		||||
			k++
 | 
			
		||||
		} else {
 | 
			
		||||
			l := rb.runeAt(s) // also used to compare to hangulBase
 | 
			
		||||
			v := rb.runeAt(i) // also used to compare to jamoT
 | 
			
		||||
			switch {
 | 
			
		||||
			case jamoLBase <= l && l < jamoLEnd &&
 | 
			
		||||
				jamoVBase <= v && v < jamoVEnd:
 | 
			
		||||
				// 11xx plus 116x to LV
 | 
			
		||||
				rb.assignRune(s, hangulBase+
 | 
			
		||||
					(l-jamoLBase)*jamoVTCount+(v-jamoVBase)*jamoTCount)
 | 
			
		||||
			case hangulBase <= l && l < hangulEnd &&
 | 
			
		||||
				jamoTBase < v && v < jamoTEnd &&
 | 
			
		||||
				((l-hangulBase)%jamoTCount) == 0:
 | 
			
		||||
				// ACxx plus 11Ax to LVT
 | 
			
		||||
				rb.assignRune(s, l+v-jamoTBase)
 | 
			
		||||
			default:
 | 
			
		||||
				b[k] = b[i]
 | 
			
		||||
				k++
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rb.nrune = k
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// compose recombines the runes in the buffer.
 | 
			
		||||
// It should only be used to recompose a single segment, as it will not
 | 
			
		||||
// handle alternations between Hangul and non-Hangul characters correctly.
 | 
			
		||||
func (rb *reorderBuffer) compose() {
 | 
			
		||||
	// UAX #15, section X5 , including Corrigendum #5
 | 
			
		||||
	// "In any character sequence beginning with starter S, a character C is
 | 
			
		||||
	//  blocked from S if and only if there is some character B between S
 | 
			
		||||
	//  and C, and either B is a starter or it has the same or higher
 | 
			
		||||
	//  combining class as C."
 | 
			
		||||
	bn := rb.nrune
 | 
			
		||||
	if bn == 0 {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	k := 1
 | 
			
		||||
	b := rb.rune[:]
 | 
			
		||||
	for s, i := 0, 1; i < bn; i++ {
 | 
			
		||||
		if isJamoVT(rb.bytesAt(i)) {
 | 
			
		||||
			// Redo from start in Hangul mode. Necessary to support
 | 
			
		||||
			// U+320E..U+321E in NFKC mode.
 | 
			
		||||
			rb.combineHangul(s, i, k)
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
		ii := b[i]
 | 
			
		||||
		// We can only use combineForward as a filter if we later
 | 
			
		||||
		// get the info for the combined character. This is more
 | 
			
		||||
		// expensive than using the filter. Using combinesBackward()
 | 
			
		||||
		// is safe.
 | 
			
		||||
		if ii.combinesBackward() {
 | 
			
		||||
			cccB := b[k-1].ccc
 | 
			
		||||
			cccC := ii.ccc
 | 
			
		||||
			blocked := false // b[i] blocked by starter or greater or equal CCC?
 | 
			
		||||
			if cccB == 0 {
 | 
			
		||||
				s = k - 1
 | 
			
		||||
			} else {
 | 
			
		||||
				blocked = s != k-1 && cccB >= cccC
 | 
			
		||||
			}
 | 
			
		||||
			if !blocked {
 | 
			
		||||
				combined := combine(rb.runeAt(s), rb.runeAt(i))
 | 
			
		||||
				if combined != 0 {
 | 
			
		||||
					rb.assignRune(s, combined)
 | 
			
		||||
					continue
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		b[k] = b[i]
 | 
			
		||||
		k++
 | 
			
		||||
	}
 | 
			
		||||
	rb.nrune = k
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										259
									
								
								vendor/golang.org/x/text/unicode/norm/forminfo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								vendor/golang.org/x/text/unicode/norm/forminfo.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,259 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package norm
 | 
			
		||||
 | 
			
		||||
// This file contains Form-specific logic and wrappers for data in tables.go.
 | 
			
		||||
 | 
			
		||||
// Rune info is stored in a separate trie per composing form. A composing form
 | 
			
		||||
// and its corresponding decomposing form share the same trie.  Each trie maps
 | 
			
		||||
// a rune to a uint16. The values take two forms.  For v >= 0x8000:
 | 
			
		||||
//   bits
 | 
			
		||||
//   15:    1 (inverse of NFD_QC bit of qcInfo)
 | 
			
		||||
//   13..7: qcInfo (see below). isYesD is always true (no decompostion).
 | 
			
		||||
//    6..0: ccc (compressed CCC value).
 | 
			
		||||
// For v < 0x8000, the respective rune has a decomposition and v is an index
 | 
			
		||||
// into a byte array of UTF-8 decomposition sequences and additional info and
 | 
			
		||||
// has the form:
 | 
			
		||||
//    <header> <decomp_byte>* [<tccc> [<lccc>]]
 | 
			
		||||
// The header contains the number of bytes in the decomposition (excluding this
 | 
			
		||||
// length byte). The two most significant bits of this length byte correspond
 | 
			
		||||
// to bit 5 and 4 of qcInfo (see below).  The byte sequence itself starts at v+1.
 | 
			
		||||
// The byte sequence is followed by a trailing and leading CCC if the values
 | 
			
		||||
// for these are not zero.  The value of v determines which ccc are appended
 | 
			
		||||
// to the sequences.  For v < firstCCC, there are none, for v >= firstCCC,
 | 
			
		||||
// the sequence is followed by a trailing ccc, and for v >= firstLeadingCC
 | 
			
		||||
// there is an additional leading ccc. The value of tccc itself is the
 | 
			
		||||
// trailing CCC shifted left 2 bits. The two least-significant bits of tccc
 | 
			
		||||
// are the number of trailing non-starters.
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	qcInfoMask      = 0x3F // to clear all but the relevant bits in a qcInfo
 | 
			
		||||
	headerLenMask   = 0x3F // extract the length value from the header byte
 | 
			
		||||
	headerFlagsMask = 0xC0 // extract the qcInfo bits from the header byte
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Properties provides access to normalization properties of a rune.
 | 
			
		||||
type Properties struct {
 | 
			
		||||
	pos   uint8  // start position in reorderBuffer; used in composition.go
 | 
			
		||||
	size  uint8  // length of UTF-8 encoding of this rune
 | 
			
		||||
	ccc   uint8  // leading canonical combining class (ccc if not decomposition)
 | 
			
		||||
	tccc  uint8  // trailing canonical combining class (ccc if not decomposition)
 | 
			
		||||
	nLead uint8  // number of leading non-starters.
 | 
			
		||||
	flags qcInfo // quick check flags
 | 
			
		||||
	index uint16
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// functions dispatchable per form
 | 
			
		||||
type lookupFunc func(b input, i int) Properties
 | 
			
		||||
 | 
			
		||||
// formInfo holds Form-specific functions and tables.
 | 
			
		||||
type formInfo struct {
 | 
			
		||||
	form                     Form
 | 
			
		||||
	composing, compatibility bool // form type
 | 
			
		||||
	info                     lookupFunc
 | 
			
		||||
	nextMain                 iterFunc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var formTable = []*formInfo{{
 | 
			
		||||
	form:          NFC,
 | 
			
		||||
	composing:     true,
 | 
			
		||||
	compatibility: false,
 | 
			
		||||
	info:          lookupInfoNFC,
 | 
			
		||||
	nextMain:      nextComposed,
 | 
			
		||||
}, {
 | 
			
		||||
	form:          NFD,
 | 
			
		||||
	composing:     false,
 | 
			
		||||
	compatibility: false,
 | 
			
		||||
	info:          lookupInfoNFC,
 | 
			
		||||
	nextMain:      nextDecomposed,
 | 
			
		||||
}, {
 | 
			
		||||
	form:          NFKC,
 | 
			
		||||
	composing:     true,
 | 
			
		||||
	compatibility: true,
 | 
			
		||||
	info:          lookupInfoNFKC,
 | 
			
		||||
	nextMain:      nextComposed,
 | 
			
		||||
}, {
 | 
			
		||||
	form:          NFKD,
 | 
			
		||||
	composing:     false,
 | 
			
		||||
	compatibility: true,
 | 
			
		||||
	info:          lookupInfoNFKC,
 | 
			
		||||
	nextMain:      nextDecomposed,
 | 
			
		||||
}}
 | 
			
		||||
 | 
			
		||||
// We do not distinguish between boundaries for NFC, NFD, etc. to avoid
 | 
			
		||||
// unexpected behavior for the user.  For example, in NFD, there is a boundary
 | 
			
		||||
// after 'a'.  However, 'a' might combine with modifiers, so from the application's
 | 
			
		||||
// perspective it is not a good boundary. We will therefore always use the
 | 
			
		||||
// boundaries for the combining variants.
 | 
			
		||||
 | 
			
		||||
// BoundaryBefore returns true if this rune starts a new segment and
 | 
			
		||||
// cannot combine with any rune on the left.
 | 
			
		||||
func (p Properties) BoundaryBefore() bool {
 | 
			
		||||
	if p.ccc == 0 && !p.combinesBackward() {
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	// We assume that the CCC of the first character in a decomposition
 | 
			
		||||
	// is always non-zero if different from info.ccc and that we can return
 | 
			
		||||
	// false at this point. This is verified by maketables.
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// BoundaryAfter returns true if runes cannot combine with or otherwise
 | 
			
		||||
// interact with this or previous runes.
 | 
			
		||||
func (p Properties) BoundaryAfter() bool {
 | 
			
		||||
	// TODO: loosen these conditions.
 | 
			
		||||
	return p.isInert()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// We pack quick check data in 4 bits:
 | 
			
		||||
//   5:    Combines forward  (0 == false, 1 == true)
 | 
			
		||||
//   4..3: NFC_QC Yes(00), No (10), or Maybe (11)
 | 
			
		||||
//   2:    NFD_QC Yes (0) or No (1). No also means there is a decomposition.
 | 
			
		||||
//   1..0: Number of trailing non-starters.
 | 
			
		||||
//
 | 
			
		||||
// When all 4 bits are zero, the character is inert, meaning it is never
 | 
			
		||||
// influenced by normalization.
 | 
			
		||||
type qcInfo uint8
 | 
			
		||||
 | 
			
		||||
func (p Properties) isYesC() bool { return p.flags&0x10 == 0 }
 | 
			
		||||
func (p Properties) isYesD() bool { return p.flags&0x4 == 0 }
 | 
			
		||||
 | 
			
		||||
func (p Properties) combinesForward() bool  { return p.flags&0x20 != 0 }
 | 
			
		||||
func (p Properties) combinesBackward() bool { return p.flags&0x8 != 0 } // == isMaybe
 | 
			
		||||
func (p Properties) hasDecomposition() bool { return p.flags&0x4 != 0 } // == isNoD
 | 
			
		||||
 | 
			
		||||
func (p Properties) isInert() bool {
 | 
			
		||||
	return p.flags&qcInfoMask == 0 && p.ccc == 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (p Properties) multiSegment() bool {
 | 
			
		||||
	return p.index >= firstMulti && p.index < endMulti
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (p Properties) nLeadingNonStarters() uint8 {
 | 
			
		||||
	return p.nLead
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (p Properties) nTrailingNonStarters() uint8 {
 | 
			
		||||
	return uint8(p.flags & 0x03)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Decomposition returns the decomposition for the underlying rune
 | 
			
		||||
// or nil if there is none.
 | 
			
		||||
func (p Properties) Decomposition() []byte {
 | 
			
		||||
	// TODO: create the decomposition for Hangul?
 | 
			
		||||
	if p.index == 0 {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	i := p.index
 | 
			
		||||
	n := decomps[i] & headerLenMask
 | 
			
		||||
	i++
 | 
			
		||||
	return decomps[i : i+uint16(n)]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Size returns the length of UTF-8 encoding of the rune.
 | 
			
		||||
func (p Properties) Size() int {
 | 
			
		||||
	return int(p.size)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CCC returns the canonical combining class of the underlying rune.
 | 
			
		||||
func (p Properties) CCC() uint8 {
 | 
			
		||||
	if p.index >= firstCCCZeroExcept {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	return ccc[p.ccc]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// LeadCCC returns the CCC of the first rune in the decomposition.
 | 
			
		||||
// If there is no decomposition, LeadCCC equals CCC.
 | 
			
		||||
func (p Properties) LeadCCC() uint8 {
 | 
			
		||||
	return ccc[p.ccc]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TrailCCC returns the CCC of the last rune in the decomposition.
 | 
			
		||||
// If there is no decomposition, TrailCCC equals CCC.
 | 
			
		||||
func (p Properties) TrailCCC() uint8 {
 | 
			
		||||
	return ccc[p.tccc]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Recomposition
 | 
			
		||||
// We use 32-bit keys instead of 64-bit for the two codepoint keys.
 | 
			
		||||
// This clips off the bits of three entries, but we know this will not
 | 
			
		||||
// result in a collision. In the unlikely event that changes to
 | 
			
		||||
// UnicodeData.txt introduce collisions, the compiler will catch it.
 | 
			
		||||
// Note that the recomposition map for NFC and NFKC are identical.
 | 
			
		||||
 | 
			
		||||
// combine returns the combined rune or 0 if it doesn't exist.
 | 
			
		||||
func combine(a, b rune) rune {
 | 
			
		||||
	key := uint32(uint16(a))<<16 + uint32(uint16(b))
 | 
			
		||||
	return recompMap[key]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lookupInfoNFC(b input, i int) Properties {
 | 
			
		||||
	v, sz := b.charinfoNFC(i)
 | 
			
		||||
	return compInfo(v, sz)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lookupInfoNFKC(b input, i int) Properties {
 | 
			
		||||
	v, sz := b.charinfoNFKC(i)
 | 
			
		||||
	return compInfo(v, sz)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Properties returns properties for the first rune in s.
 | 
			
		||||
func (f Form) Properties(s []byte) Properties {
 | 
			
		||||
	if f == NFC || f == NFD {
 | 
			
		||||
		return compInfo(nfcData.lookup(s))
 | 
			
		||||
	}
 | 
			
		||||
	return compInfo(nfkcData.lookup(s))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// PropertiesString returns properties for the first rune in s.
 | 
			
		||||
func (f Form) PropertiesString(s string) Properties {
 | 
			
		||||
	if f == NFC || f == NFD {
 | 
			
		||||
		return compInfo(nfcData.lookupString(s))
 | 
			
		||||
	}
 | 
			
		||||
	return compInfo(nfkcData.lookupString(s))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// compInfo converts the information contained in v and sz
 | 
			
		||||
// to a Properties.  See the comment at the top of the file
 | 
			
		||||
// for more information on the format.
 | 
			
		||||
func compInfo(v uint16, sz int) Properties {
 | 
			
		||||
	if v == 0 {
 | 
			
		||||
		return Properties{size: uint8(sz)}
 | 
			
		||||
	} else if v >= 0x8000 {
 | 
			
		||||
		p := Properties{
 | 
			
		||||
			size:  uint8(sz),
 | 
			
		||||
			ccc:   uint8(v),
 | 
			
		||||
			tccc:  uint8(v),
 | 
			
		||||
			flags: qcInfo(v >> 8),
 | 
			
		||||
		}
 | 
			
		||||
		if p.ccc > 0 || p.combinesBackward() {
 | 
			
		||||
			p.nLead = uint8(p.flags & 0x3)
 | 
			
		||||
		}
 | 
			
		||||
		return p
 | 
			
		||||
	}
 | 
			
		||||
	// has decomposition
 | 
			
		||||
	h := decomps[v]
 | 
			
		||||
	f := (qcInfo(h&headerFlagsMask) >> 2) | 0x4
 | 
			
		||||
	p := Properties{size: uint8(sz), flags: f, index: v}
 | 
			
		||||
	if v >= firstCCC {
 | 
			
		||||
		v += uint16(h&headerLenMask) + 1
 | 
			
		||||
		c := decomps[v]
 | 
			
		||||
		p.tccc = c >> 2
 | 
			
		||||
		p.flags |= qcInfo(c & 0x3)
 | 
			
		||||
		if v >= firstLeadingCCC {
 | 
			
		||||
			p.nLead = c & 0x3
 | 
			
		||||
			if v >= firstStarterWithNLead {
 | 
			
		||||
				// We were tricked. Remove the decomposition.
 | 
			
		||||
				p.flags &= 0x03
 | 
			
		||||
				p.index = 0
 | 
			
		||||
				return p
 | 
			
		||||
			}
 | 
			
		||||
			p.ccc = decomps[v+1]
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return p
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										109
									
								
								vendor/golang.org/x/text/unicode/norm/input.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										109
									
								
								vendor/golang.org/x/text/unicode/norm/input.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,109 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package norm
 | 
			
		||||
 | 
			
		||||
import "unicode/utf8"
 | 
			
		||||
 | 
			
		||||
type input struct {
 | 
			
		||||
	str   string
 | 
			
		||||
	bytes []byte
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func inputBytes(str []byte) input {
 | 
			
		||||
	return input{bytes: str}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func inputString(str string) input {
 | 
			
		||||
	return input{str: str}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) setBytes(str []byte) {
 | 
			
		||||
	in.str = ""
 | 
			
		||||
	in.bytes = str
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) setString(str string) {
 | 
			
		||||
	in.str = str
 | 
			
		||||
	in.bytes = nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) _byte(p int) byte {
 | 
			
		||||
	if in.bytes == nil {
 | 
			
		||||
		return in.str[p]
 | 
			
		||||
	}
 | 
			
		||||
	return in.bytes[p]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) skipASCII(p, max int) int {
 | 
			
		||||
	if in.bytes == nil {
 | 
			
		||||
		for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return p
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) skipContinuationBytes(p int) int {
 | 
			
		||||
	if in.bytes == nil {
 | 
			
		||||
		for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
 | 
			
		||||
		}
 | 
			
		||||
	} else {
 | 
			
		||||
		for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return p
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) appendSlice(buf []byte, b, e int) []byte {
 | 
			
		||||
	if in.bytes != nil {
 | 
			
		||||
		return append(buf, in.bytes[b:e]...)
 | 
			
		||||
	}
 | 
			
		||||
	for i := b; i < e; i++ {
 | 
			
		||||
		buf = append(buf, in.str[i])
 | 
			
		||||
	}
 | 
			
		||||
	return buf
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) copySlice(buf []byte, b, e int) int {
 | 
			
		||||
	if in.bytes == nil {
 | 
			
		||||
		return copy(buf, in.str[b:e])
 | 
			
		||||
	}
 | 
			
		||||
	return copy(buf, in.bytes[b:e])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) charinfoNFC(p int) (uint16, int) {
 | 
			
		||||
	if in.bytes == nil {
 | 
			
		||||
		return nfcData.lookupString(in.str[p:])
 | 
			
		||||
	}
 | 
			
		||||
	return nfcData.lookup(in.bytes[p:])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) charinfoNFKC(p int) (uint16, int) {
 | 
			
		||||
	if in.bytes == nil {
 | 
			
		||||
		return nfkcData.lookupString(in.str[p:])
 | 
			
		||||
	}
 | 
			
		||||
	return nfkcData.lookup(in.bytes[p:])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (in *input) hangul(p int) (r rune) {
 | 
			
		||||
	var size int
 | 
			
		||||
	if in.bytes == nil {
 | 
			
		||||
		if !isHangulString(in.str[p:]) {
 | 
			
		||||
			return 0
 | 
			
		||||
		}
 | 
			
		||||
		r, size = utf8.DecodeRuneInString(in.str[p:])
 | 
			
		||||
	} else {
 | 
			
		||||
		if !isHangul(in.bytes[p:]) {
 | 
			
		||||
			return 0
 | 
			
		||||
		}
 | 
			
		||||
		r, size = utf8.DecodeRune(in.bytes[p:])
 | 
			
		||||
	}
 | 
			
		||||
	if size != hangulUTF8Size {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	return r
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										457
									
								
								vendor/golang.org/x/text/unicode/norm/iter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										457
									
								
								vendor/golang.org/x/text/unicode/norm/iter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,457 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package norm
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"unicode/utf8"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// MaxSegmentSize is the maximum size of a byte buffer needed to consider any
 | 
			
		||||
// sequence of starter and non-starter runes for the purpose of normalization.
 | 
			
		||||
const MaxSegmentSize = maxByteBufferSize
 | 
			
		||||
 | 
			
		||||
// An Iter iterates over a string or byte slice, while normalizing it
 | 
			
		||||
// to a given Form.
 | 
			
		||||
type Iter struct {
 | 
			
		||||
	rb     reorderBuffer
 | 
			
		||||
	buf    [maxByteBufferSize]byte
 | 
			
		||||
	info   Properties // first character saved from previous iteration
 | 
			
		||||
	next   iterFunc   // implementation of next depends on form
 | 
			
		||||
	asciiF iterFunc
 | 
			
		||||
 | 
			
		||||
	p        int    // current position in input source
 | 
			
		||||
	multiSeg []byte // remainder of multi-segment decomposition
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type iterFunc func(*Iter) []byte
 | 
			
		||||
 | 
			
		||||
// Init initializes i to iterate over src after normalizing it to Form f.
 | 
			
		||||
func (i *Iter) Init(f Form, src []byte) {
 | 
			
		||||
	i.p = 0
 | 
			
		||||
	if len(src) == 0 {
 | 
			
		||||
		i.setDone()
 | 
			
		||||
		i.rb.nsrc = 0
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	i.multiSeg = nil
 | 
			
		||||
	i.rb.init(f, src)
 | 
			
		||||
	i.next = i.rb.f.nextMain
 | 
			
		||||
	i.asciiF = nextASCIIBytes
 | 
			
		||||
	i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
	i.rb.ss.first(i.info)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// InitString initializes i to iterate over src after normalizing it to Form f.
 | 
			
		||||
func (i *Iter) InitString(f Form, src string) {
 | 
			
		||||
	i.p = 0
 | 
			
		||||
	if len(src) == 0 {
 | 
			
		||||
		i.setDone()
 | 
			
		||||
		i.rb.nsrc = 0
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	i.multiSeg = nil
 | 
			
		||||
	i.rb.initString(f, src)
 | 
			
		||||
	i.next = i.rb.f.nextMain
 | 
			
		||||
	i.asciiF = nextASCIIString
 | 
			
		||||
	i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
	i.rb.ss.first(i.info)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Seek sets the segment to be returned by the next call to Next to start
 | 
			
		||||
// at position p.  It is the responsibility of the caller to set p to the
 | 
			
		||||
// start of a segment.
 | 
			
		||||
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
 | 
			
		||||
	var abs int64
 | 
			
		||||
	switch whence {
 | 
			
		||||
	case 0:
 | 
			
		||||
		abs = offset
 | 
			
		||||
	case 1:
 | 
			
		||||
		abs = int64(i.p) + offset
 | 
			
		||||
	case 2:
 | 
			
		||||
		abs = int64(i.rb.nsrc) + offset
 | 
			
		||||
	default:
 | 
			
		||||
		return 0, fmt.Errorf("norm: invalid whence")
 | 
			
		||||
	}
 | 
			
		||||
	if abs < 0 {
 | 
			
		||||
		return 0, fmt.Errorf("norm: negative position")
 | 
			
		||||
	}
 | 
			
		||||
	if int(abs) >= i.rb.nsrc {
 | 
			
		||||
		i.setDone()
 | 
			
		||||
		return int64(i.p), nil
 | 
			
		||||
	}
 | 
			
		||||
	i.p = int(abs)
 | 
			
		||||
	i.multiSeg = nil
 | 
			
		||||
	i.next = i.rb.f.nextMain
 | 
			
		||||
	i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
	i.rb.ss.first(i.info)
 | 
			
		||||
	return abs, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// returnSlice returns a slice of the underlying input type as a byte slice.
 | 
			
		||||
// If the underlying is of type []byte, it will simply return a slice.
 | 
			
		||||
// If the underlying is of type string, it will copy the slice to the buffer
 | 
			
		||||
// and return that.
 | 
			
		||||
func (i *Iter) returnSlice(a, b int) []byte {
 | 
			
		||||
	if i.rb.src.bytes == nil {
 | 
			
		||||
		return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
 | 
			
		||||
	}
 | 
			
		||||
	return i.rb.src.bytes[a:b]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Pos returns the byte position at which the next call to Next will commence processing.
 | 
			
		||||
func (i *Iter) Pos() int {
 | 
			
		||||
	return i.p
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (i *Iter) setDone() {
 | 
			
		||||
	i.next = nextDone
 | 
			
		||||
	i.p = i.rb.nsrc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Done returns true if there is no more input to process.
 | 
			
		||||
func (i *Iter) Done() bool {
 | 
			
		||||
	return i.p >= i.rb.nsrc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
 | 
			
		||||
// For any input a and b for which f(a) == f(b), subsequent calls
 | 
			
		||||
// to Next will return the same segments.
 | 
			
		||||
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
 | 
			
		||||
// Although not guaranteed, n will typically be the smallest possible n.
 | 
			
		||||
func (i *Iter) Next() []byte {
 | 
			
		||||
	return i.next(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func nextASCIIBytes(i *Iter) []byte {
 | 
			
		||||
	p := i.p + 1
 | 
			
		||||
	if p >= i.rb.nsrc {
 | 
			
		||||
		i.setDone()
 | 
			
		||||
		return i.rb.src.bytes[i.p:p]
 | 
			
		||||
	}
 | 
			
		||||
	if i.rb.src.bytes[p] < utf8.RuneSelf {
 | 
			
		||||
		p0 := i.p
 | 
			
		||||
		i.p = p
 | 
			
		||||
		return i.rb.src.bytes[p0:p]
 | 
			
		||||
	}
 | 
			
		||||
	i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
	i.next = i.rb.f.nextMain
 | 
			
		||||
	return i.next(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func nextASCIIString(i *Iter) []byte {
 | 
			
		||||
	p := i.p + 1
 | 
			
		||||
	if p >= i.rb.nsrc {
 | 
			
		||||
		i.buf[0] = i.rb.src.str[i.p]
 | 
			
		||||
		i.setDone()
 | 
			
		||||
		return i.buf[:1]
 | 
			
		||||
	}
 | 
			
		||||
	if i.rb.src.str[p] < utf8.RuneSelf {
 | 
			
		||||
		i.buf[0] = i.rb.src.str[i.p]
 | 
			
		||||
		i.p = p
 | 
			
		||||
		return i.buf[:1]
 | 
			
		||||
	}
 | 
			
		||||
	i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
	i.next = i.rb.f.nextMain
 | 
			
		||||
	return i.next(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func nextHangul(i *Iter) []byte {
 | 
			
		||||
	p := i.p
 | 
			
		||||
	next := p + hangulUTF8Size
 | 
			
		||||
	if next >= i.rb.nsrc {
 | 
			
		||||
		i.setDone()
 | 
			
		||||
	} else if i.rb.src.hangul(next) == 0 {
 | 
			
		||||
		i.rb.ss.next(i.info)
 | 
			
		||||
		i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
		i.next = i.rb.f.nextMain
 | 
			
		||||
		return i.next(i)
 | 
			
		||||
	}
 | 
			
		||||
	i.p = next
 | 
			
		||||
	return i.buf[:decomposeHangul(i.buf[:], i.rb.src.hangul(p))]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func nextDone(i *Iter) []byte {
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// nextMulti is used for iterating over multi-segment decompositions
 | 
			
		||||
// for decomposing normal forms.
 | 
			
		||||
func nextMulti(i *Iter) []byte {
 | 
			
		||||
	j := 0
 | 
			
		||||
	d := i.multiSeg
 | 
			
		||||
	// skip first rune
 | 
			
		||||
	for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
 | 
			
		||||
	}
 | 
			
		||||
	for j < len(d) {
 | 
			
		||||
		info := i.rb.f.info(input{bytes: d}, j)
 | 
			
		||||
		if info.BoundaryBefore() {
 | 
			
		||||
			i.multiSeg = d[j:]
 | 
			
		||||
			return d[:j]
 | 
			
		||||
		}
 | 
			
		||||
		j += int(info.size)
 | 
			
		||||
	}
 | 
			
		||||
	// treat last segment as normal decomposition
 | 
			
		||||
	i.next = i.rb.f.nextMain
 | 
			
		||||
	return i.next(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// nextMultiNorm is used for iterating over multi-segment decompositions
 | 
			
		||||
// for composing normal forms.
 | 
			
		||||
func nextMultiNorm(i *Iter) []byte {
 | 
			
		||||
	j := 0
 | 
			
		||||
	d := i.multiSeg
 | 
			
		||||
	for j < len(d) {
 | 
			
		||||
		info := i.rb.f.info(input{bytes: d}, j)
 | 
			
		||||
		if info.BoundaryBefore() {
 | 
			
		||||
			i.rb.compose()
 | 
			
		||||
			seg := i.buf[:i.rb.flushCopy(i.buf[:])]
 | 
			
		||||
			i.rb.insertUnsafe(input{bytes: d}, j, info)
 | 
			
		||||
			i.multiSeg = d[j+int(info.size):]
 | 
			
		||||
			return seg
 | 
			
		||||
		}
 | 
			
		||||
		i.rb.insertUnsafe(input{bytes: d}, j, info)
 | 
			
		||||
		j += int(info.size)
 | 
			
		||||
	}
 | 
			
		||||
	i.multiSeg = nil
 | 
			
		||||
	i.next = nextComposed
 | 
			
		||||
	return doNormComposed(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
 | 
			
		||||
func nextDecomposed(i *Iter) (next []byte) {
 | 
			
		||||
	outp := 0
 | 
			
		||||
	inCopyStart, outCopyStart := i.p, 0
 | 
			
		||||
	for {
 | 
			
		||||
		if sz := int(i.info.size); sz <= 1 {
 | 
			
		||||
			i.rb.ss = 0
 | 
			
		||||
			p := i.p
 | 
			
		||||
			i.p++ // ASCII or illegal byte.  Either way, advance by 1.
 | 
			
		||||
			if i.p >= i.rb.nsrc {
 | 
			
		||||
				i.setDone()
 | 
			
		||||
				return i.returnSlice(p, i.p)
 | 
			
		||||
			} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
 | 
			
		||||
				i.next = i.asciiF
 | 
			
		||||
				return i.returnSlice(p, i.p)
 | 
			
		||||
			}
 | 
			
		||||
			outp++
 | 
			
		||||
		} else if d := i.info.Decomposition(); d != nil {
 | 
			
		||||
			// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
 | 
			
		||||
			// Case 1: there is a leftover to copy.  In this case the decomposition
 | 
			
		||||
			// must begin with a modifier and should always be appended.
 | 
			
		||||
			// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
 | 
			
		||||
			p := outp + len(d)
 | 
			
		||||
			if outp > 0 {
 | 
			
		||||
				i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
 | 
			
		||||
				// TODO: this condition should not be possible, but we leave it
 | 
			
		||||
				// in for defensive purposes.
 | 
			
		||||
				if p > len(i.buf) {
 | 
			
		||||
					return i.buf[:outp]
 | 
			
		||||
				}
 | 
			
		||||
			} else if i.info.multiSegment() {
 | 
			
		||||
				// outp must be 0 as multi-segment decompositions always
 | 
			
		||||
				// start a new segment.
 | 
			
		||||
				if i.multiSeg == nil {
 | 
			
		||||
					i.multiSeg = d
 | 
			
		||||
					i.next = nextMulti
 | 
			
		||||
					return nextMulti(i)
 | 
			
		||||
				}
 | 
			
		||||
				// We are in the last segment.  Treat as normal decomposition.
 | 
			
		||||
				d = i.multiSeg
 | 
			
		||||
				i.multiSeg = nil
 | 
			
		||||
				p = len(d)
 | 
			
		||||
			}
 | 
			
		||||
			prevCC := i.info.tccc
 | 
			
		||||
			if i.p += sz; i.p >= i.rb.nsrc {
 | 
			
		||||
				i.setDone()
 | 
			
		||||
				i.info = Properties{} // Force BoundaryBefore to succeed.
 | 
			
		||||
			} else {
 | 
			
		||||
				i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
			}
 | 
			
		||||
			switch i.rb.ss.next(i.info) {
 | 
			
		||||
			case ssOverflow:
 | 
			
		||||
				i.next = nextCGJDecompose
 | 
			
		||||
				fallthrough
 | 
			
		||||
			case ssStarter:
 | 
			
		||||
				if outp > 0 {
 | 
			
		||||
					copy(i.buf[outp:], d)
 | 
			
		||||
					return i.buf[:p]
 | 
			
		||||
				}
 | 
			
		||||
				return d
 | 
			
		||||
			}
 | 
			
		||||
			copy(i.buf[outp:], d)
 | 
			
		||||
			outp = p
 | 
			
		||||
			inCopyStart, outCopyStart = i.p, outp
 | 
			
		||||
			if i.info.ccc < prevCC {
 | 
			
		||||
				goto doNorm
 | 
			
		||||
			}
 | 
			
		||||
			continue
 | 
			
		||||
		} else if r := i.rb.src.hangul(i.p); r != 0 {
 | 
			
		||||
			outp = decomposeHangul(i.buf[:], r)
 | 
			
		||||
			i.p += hangulUTF8Size
 | 
			
		||||
			inCopyStart, outCopyStart = i.p, outp
 | 
			
		||||
			if i.p >= i.rb.nsrc {
 | 
			
		||||
				i.setDone()
 | 
			
		||||
				break
 | 
			
		||||
			} else if i.rb.src.hangul(i.p) != 0 {
 | 
			
		||||
				i.next = nextHangul
 | 
			
		||||
				return i.buf[:outp]
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			p := outp + sz
 | 
			
		||||
			if p > len(i.buf) {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
			outp = p
 | 
			
		||||
			i.p += sz
 | 
			
		||||
		}
 | 
			
		||||
		if i.p >= i.rb.nsrc {
 | 
			
		||||
			i.setDone()
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		prevCC := i.info.tccc
 | 
			
		||||
		i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
		if v := i.rb.ss.next(i.info); v == ssStarter {
 | 
			
		||||
			break
 | 
			
		||||
		} else if v == ssOverflow {
 | 
			
		||||
			i.next = nextCGJDecompose
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		if i.info.ccc < prevCC {
 | 
			
		||||
			goto doNorm
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if outCopyStart == 0 {
 | 
			
		||||
		return i.returnSlice(inCopyStart, i.p)
 | 
			
		||||
	} else if inCopyStart < i.p {
 | 
			
		||||
		i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
 | 
			
		||||
	}
 | 
			
		||||
	return i.buf[:outp]
 | 
			
		||||
doNorm:
 | 
			
		||||
	// Insert what we have decomposed so far in the reorderBuffer.
 | 
			
		||||
	// As we will only reorder, there will always be enough room.
 | 
			
		||||
	i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
 | 
			
		||||
	i.rb.insertDecomposed(i.buf[0:outp])
 | 
			
		||||
	return doNormDecomposed(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func doNormDecomposed(i *Iter) []byte {
 | 
			
		||||
	for {
 | 
			
		||||
		i.rb.insertUnsafe(i.rb.src, i.p, i.info)
 | 
			
		||||
		if i.p += int(i.info.size); i.p >= i.rb.nsrc {
 | 
			
		||||
			i.setDone()
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
		if i.info.ccc == 0 {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		if s := i.rb.ss.next(i.info); s == ssOverflow {
 | 
			
		||||
			i.next = nextCGJDecompose
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	// new segment or too many combining characters: exit normalization
 | 
			
		||||
	return i.buf[:i.rb.flushCopy(i.buf[:])]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func nextCGJDecompose(i *Iter) []byte {
 | 
			
		||||
	i.rb.ss = 0
 | 
			
		||||
	i.rb.insertCGJ()
 | 
			
		||||
	i.next = nextDecomposed
 | 
			
		||||
	i.rb.ss.first(i.info)
 | 
			
		||||
	buf := doNormDecomposed(i)
 | 
			
		||||
	return buf
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// nextComposed is the implementation of Next for forms NFC and NFKC.
 | 
			
		||||
func nextComposed(i *Iter) []byte {
 | 
			
		||||
	outp, startp := 0, i.p
 | 
			
		||||
	var prevCC uint8
 | 
			
		||||
	for {
 | 
			
		||||
		if !i.info.isYesC() {
 | 
			
		||||
			goto doNorm
 | 
			
		||||
		}
 | 
			
		||||
		prevCC = i.info.tccc
 | 
			
		||||
		sz := int(i.info.size)
 | 
			
		||||
		if sz == 0 {
 | 
			
		||||
			sz = 1 // illegal rune: copy byte-by-byte
 | 
			
		||||
		}
 | 
			
		||||
		p := outp + sz
 | 
			
		||||
		if p > len(i.buf) {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		outp = p
 | 
			
		||||
		i.p += sz
 | 
			
		||||
		if i.p >= i.rb.nsrc {
 | 
			
		||||
			i.setDone()
 | 
			
		||||
			break
 | 
			
		||||
		} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
 | 
			
		||||
			i.rb.ss = 0
 | 
			
		||||
			i.next = i.asciiF
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
		if v := i.rb.ss.next(i.info); v == ssStarter {
 | 
			
		||||
			break
 | 
			
		||||
		} else if v == ssOverflow {
 | 
			
		||||
			i.next = nextCGJCompose
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		if i.info.ccc < prevCC {
 | 
			
		||||
			goto doNorm
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return i.returnSlice(startp, i.p)
 | 
			
		||||
doNorm:
 | 
			
		||||
	// reset to start position
 | 
			
		||||
	i.p = startp
 | 
			
		||||
	i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
	i.rb.ss.first(i.info)
 | 
			
		||||
	if i.info.multiSegment() {
 | 
			
		||||
		d := i.info.Decomposition()
 | 
			
		||||
		info := i.rb.f.info(input{bytes: d}, 0)
 | 
			
		||||
		i.rb.insertUnsafe(input{bytes: d}, 0, info)
 | 
			
		||||
		i.multiSeg = d[int(info.size):]
 | 
			
		||||
		i.next = nextMultiNorm
 | 
			
		||||
		return nextMultiNorm(i)
 | 
			
		||||
	}
 | 
			
		||||
	i.rb.ss.first(i.info)
 | 
			
		||||
	i.rb.insertUnsafe(i.rb.src, i.p, i.info)
 | 
			
		||||
	return doNormComposed(i)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func doNormComposed(i *Iter) []byte {
 | 
			
		||||
	// First rune should already be inserted.
 | 
			
		||||
	for {
 | 
			
		||||
		if i.p += int(i.info.size); i.p >= i.rb.nsrc {
 | 
			
		||||
			i.setDone()
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		i.info = i.rb.f.info(i.rb.src, i.p)
 | 
			
		||||
		if s := i.rb.ss.next(i.info); s == ssStarter {
 | 
			
		||||
			break
 | 
			
		||||
		} else if s == ssOverflow {
 | 
			
		||||
			i.next = nextCGJCompose
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		i.rb.insertUnsafe(i.rb.src, i.p, i.info)
 | 
			
		||||
	}
 | 
			
		||||
	i.rb.compose()
 | 
			
		||||
	seg := i.buf[:i.rb.flushCopy(i.buf[:])]
 | 
			
		||||
	return seg
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func nextCGJCompose(i *Iter) []byte {
 | 
			
		||||
	i.rb.ss = 0 // instead of first
 | 
			
		||||
	i.rb.insertCGJ()
 | 
			
		||||
	i.next = nextComposed
 | 
			
		||||
	// Note that we treat any rune with nLeadingNonStarters > 0 as a non-starter,
 | 
			
		||||
	// even if they are not. This is particularly dubious for U+FF9E and UFF9A.
 | 
			
		||||
	// If we ever change that, insert a check here.
 | 
			
		||||
	i.rb.ss.first(i.info)
 | 
			
		||||
	i.rb.insertUnsafe(i.rb.src, i.p, i.info)
 | 
			
		||||
	return doNormComposed(i)
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										976
									
								
								vendor/golang.org/x/text/unicode/norm/maketables.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										976
									
								
								vendor/golang.org/x/text/unicode/norm/maketables.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,976 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build ignore
 | 
			
		||||
 | 
			
		||||
// Normalization table generator.
 | 
			
		||||
// Data read from the web.
 | 
			
		||||
// See forminfo.go for a description of the trie values associated with each rune.
 | 
			
		||||
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"flag"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io"
 | 
			
		||||
	"log"
 | 
			
		||||
	"sort"
 | 
			
		||||
	"strconv"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"golang.org/x/text/internal/gen"
 | 
			
		||||
	"golang.org/x/text/internal/triegen"
 | 
			
		||||
	"golang.org/x/text/internal/ucd"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
	gen.Init()
 | 
			
		||||
	loadUnicodeData()
 | 
			
		||||
	compactCCC()
 | 
			
		||||
	loadCompositionExclusions()
 | 
			
		||||
	completeCharFields(FCanonical)
 | 
			
		||||
	completeCharFields(FCompatibility)
 | 
			
		||||
	computeNonStarterCounts()
 | 
			
		||||
	verifyComputed()
 | 
			
		||||
	printChars()
 | 
			
		||||
	testDerived()
 | 
			
		||||
	printTestdata()
 | 
			
		||||
	makeTables()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	tablelist = flag.String("tables",
 | 
			
		||||
		"all",
 | 
			
		||||
		"comma-separated list of which tables to generate; "+
 | 
			
		||||
			"can be 'decomp', 'recomp', 'info' and 'all'")
 | 
			
		||||
	test = flag.Bool("test",
 | 
			
		||||
		false,
 | 
			
		||||
		"test existing tables against DerivedNormalizationProps and generate test data for regression testing")
 | 
			
		||||
	verbose = flag.Bool("verbose",
 | 
			
		||||
		false,
 | 
			
		||||
		"write data to stdout as it is parsed")
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const MaxChar = 0x10FFFF // anything above this shouldn't exist
 | 
			
		||||
 | 
			
		||||
// Quick Check properties of runes allow us to quickly
 | 
			
		||||
// determine whether a rune may occur in a normal form.
 | 
			
		||||
// For a given normal form, a rune may be guaranteed to occur
 | 
			
		||||
// verbatim (QC=Yes), may or may not combine with another
 | 
			
		||||
// rune (QC=Maybe), or may not occur (QC=No).
 | 
			
		||||
type QCResult int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	QCUnknown QCResult = iota
 | 
			
		||||
	QCYes
 | 
			
		||||
	QCNo
 | 
			
		||||
	QCMaybe
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func (r QCResult) String() string {
 | 
			
		||||
	switch r {
 | 
			
		||||
	case QCYes:
 | 
			
		||||
		return "Yes"
 | 
			
		||||
	case QCNo:
 | 
			
		||||
		return "No"
 | 
			
		||||
	case QCMaybe:
 | 
			
		||||
		return "Maybe"
 | 
			
		||||
	}
 | 
			
		||||
	return "***UNKNOWN***"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	FCanonical     = iota // NFC or NFD
 | 
			
		||||
	FCompatibility        // NFKC or NFKD
 | 
			
		||||
	FNumberOfFormTypes
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	MComposed   = iota // NFC or NFKC
 | 
			
		||||
	MDecomposed        // NFD or NFKD
 | 
			
		||||
	MNumberOfModes
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// This contains only the properties we're interested in.
 | 
			
		||||
type Char struct {
 | 
			
		||||
	name          string
 | 
			
		||||
	codePoint     rune  // if zero, this index is not a valid code point.
 | 
			
		||||
	ccc           uint8 // canonical combining class
 | 
			
		||||
	origCCC       uint8
 | 
			
		||||
	excludeInComp bool // from CompositionExclusions.txt
 | 
			
		||||
	compatDecomp  bool // it has a compatibility expansion
 | 
			
		||||
 | 
			
		||||
	nTrailingNonStarters uint8
 | 
			
		||||
	nLeadingNonStarters  uint8 // must be equal to trailing if non-zero
 | 
			
		||||
 | 
			
		||||
	forms [FNumberOfFormTypes]FormInfo // For FCanonical and FCompatibility
 | 
			
		||||
 | 
			
		||||
	state State
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var chars = make([]Char, MaxChar+1)
 | 
			
		||||
var cccMap = make(map[uint8]uint8)
 | 
			
		||||
 | 
			
		||||
func (c Char) String() string {
 | 
			
		||||
	buf := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
	fmt.Fprintf(buf, "%U [%s]:\n", c.codePoint, c.name)
 | 
			
		||||
	fmt.Fprintf(buf, "  ccc: %v\n", c.ccc)
 | 
			
		||||
	fmt.Fprintf(buf, "  excludeInComp: %v\n", c.excludeInComp)
 | 
			
		||||
	fmt.Fprintf(buf, "  compatDecomp: %v\n", c.compatDecomp)
 | 
			
		||||
	fmt.Fprintf(buf, "  state: %v\n", c.state)
 | 
			
		||||
	fmt.Fprintf(buf, "  NFC:\n")
 | 
			
		||||
	fmt.Fprint(buf, c.forms[FCanonical])
 | 
			
		||||
	fmt.Fprintf(buf, "  NFKC:\n")
 | 
			
		||||
	fmt.Fprint(buf, c.forms[FCompatibility])
 | 
			
		||||
 | 
			
		||||
	return buf.String()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// In UnicodeData.txt, some ranges are marked like this:
 | 
			
		||||
//	3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
 | 
			
		||||
//	4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
 | 
			
		||||
// parseCharacter keeps a state variable indicating the weirdness.
 | 
			
		||||
type State int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	SNormal State = iota // known to be zero for the type
 | 
			
		||||
	SFirst
 | 
			
		||||
	SLast
 | 
			
		||||
	SMissing
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var lastChar = rune('\u0000')
 | 
			
		||||
 | 
			
		||||
func (c Char) isValid() bool {
 | 
			
		||||
	return c.codePoint != 0 && c.state != SMissing
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type FormInfo struct {
 | 
			
		||||
	quickCheck [MNumberOfModes]QCResult // index: MComposed or MDecomposed
 | 
			
		||||
	verified   [MNumberOfModes]bool     // index: MComposed or MDecomposed
 | 
			
		||||
 | 
			
		||||
	combinesForward  bool // May combine with rune on the right
 | 
			
		||||
	combinesBackward bool // May combine with rune on the left
 | 
			
		||||
	isOneWay         bool // Never appears in result
 | 
			
		||||
	inDecomp         bool // Some decompositions result in this char.
 | 
			
		||||
	decomp           Decomposition
 | 
			
		||||
	expandedDecomp   Decomposition
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f FormInfo) String() string {
 | 
			
		||||
	buf := bytes.NewBuffer(make([]byte, 0))
 | 
			
		||||
 | 
			
		||||
	fmt.Fprintf(buf, "    quickCheck[C]: %v\n", f.quickCheck[MComposed])
 | 
			
		||||
	fmt.Fprintf(buf, "    quickCheck[D]: %v\n", f.quickCheck[MDecomposed])
 | 
			
		||||
	fmt.Fprintf(buf, "    cmbForward: %v\n", f.combinesForward)
 | 
			
		||||
	fmt.Fprintf(buf, "    cmbBackward: %v\n", f.combinesBackward)
 | 
			
		||||
	fmt.Fprintf(buf, "    isOneWay: %v\n", f.isOneWay)
 | 
			
		||||
	fmt.Fprintf(buf, "    inDecomp: %v\n", f.inDecomp)
 | 
			
		||||
	fmt.Fprintf(buf, "    decomposition: %X\n", f.decomp)
 | 
			
		||||
	fmt.Fprintf(buf, "    expandedDecomp: %X\n", f.expandedDecomp)
 | 
			
		||||
 | 
			
		||||
	return buf.String()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Decomposition []rune
 | 
			
		||||
 | 
			
		||||
func parseDecomposition(s string, skipfirst bool) (a []rune, err error) {
 | 
			
		||||
	decomp := strings.Split(s, " ")
 | 
			
		||||
	if len(decomp) > 0 && skipfirst {
 | 
			
		||||
		decomp = decomp[1:]
 | 
			
		||||
	}
 | 
			
		||||
	for _, d := range decomp {
 | 
			
		||||
		point, err := strconv.ParseUint(d, 16, 64)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return a, err
 | 
			
		||||
		}
 | 
			
		||||
		a = append(a, rune(point))
 | 
			
		||||
	}
 | 
			
		||||
	return a, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func loadUnicodeData() {
 | 
			
		||||
	f := gen.OpenUCDFile("UnicodeData.txt")
 | 
			
		||||
	defer f.Close()
 | 
			
		||||
	p := ucd.New(f)
 | 
			
		||||
	for p.Next() {
 | 
			
		||||
		r := p.Rune(ucd.CodePoint)
 | 
			
		||||
		char := &chars[r]
 | 
			
		||||
 | 
			
		||||
		char.ccc = uint8(p.Uint(ucd.CanonicalCombiningClass))
 | 
			
		||||
		decmap := p.String(ucd.DecompMapping)
 | 
			
		||||
 | 
			
		||||
		exp, err := parseDecomposition(decmap, false)
 | 
			
		||||
		isCompat := false
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			if len(decmap) > 0 {
 | 
			
		||||
				exp, err = parseDecomposition(decmap, true)
 | 
			
		||||
				if err != nil {
 | 
			
		||||
					log.Fatalf(`%U: bad decomp |%v|: "%s"`, r, decmap, err)
 | 
			
		||||
				}
 | 
			
		||||
				isCompat = true
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		char.name = p.String(ucd.Name)
 | 
			
		||||
		char.codePoint = r
 | 
			
		||||
		char.forms[FCompatibility].decomp = exp
 | 
			
		||||
		if !isCompat {
 | 
			
		||||
			char.forms[FCanonical].decomp = exp
 | 
			
		||||
		} else {
 | 
			
		||||
			char.compatDecomp = true
 | 
			
		||||
		}
 | 
			
		||||
		if len(decmap) > 0 {
 | 
			
		||||
			char.forms[FCompatibility].decomp = exp
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if err := p.Err(); err != nil {
 | 
			
		||||
		log.Fatal(err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// compactCCC converts the sparse set of CCC values to a continguous one,
 | 
			
		||||
// reducing the number of bits needed from 8 to 6.
 | 
			
		||||
func compactCCC() {
 | 
			
		||||
	m := make(map[uint8]uint8)
 | 
			
		||||
	for i := range chars {
 | 
			
		||||
		c := &chars[i]
 | 
			
		||||
		m[c.ccc] = 0
 | 
			
		||||
	}
 | 
			
		||||
	cccs := []int{}
 | 
			
		||||
	for v, _ := range m {
 | 
			
		||||
		cccs = append(cccs, int(v))
 | 
			
		||||
	}
 | 
			
		||||
	sort.Ints(cccs)
 | 
			
		||||
	for i, c := range cccs {
 | 
			
		||||
		cccMap[uint8(i)] = uint8(c)
 | 
			
		||||
		m[uint8(c)] = uint8(i)
 | 
			
		||||
	}
 | 
			
		||||
	for i := range chars {
 | 
			
		||||
		c := &chars[i]
 | 
			
		||||
		c.origCCC = c.ccc
 | 
			
		||||
		c.ccc = m[c.ccc]
 | 
			
		||||
	}
 | 
			
		||||
	if len(m) >= 1<<6 {
 | 
			
		||||
		log.Fatalf("too many difference CCC values: %d >= 64", len(m))
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CompositionExclusions.txt has form:
 | 
			
		||||
// 0958    # ...
 | 
			
		||||
// See http://unicode.org/reports/tr44/ for full explanation
 | 
			
		||||
func loadCompositionExclusions() {
 | 
			
		||||
	f := gen.OpenUCDFile("CompositionExclusions.txt")
 | 
			
		||||
	defer f.Close()
 | 
			
		||||
	p := ucd.New(f)
 | 
			
		||||
	for p.Next() {
 | 
			
		||||
		c := &chars[p.Rune(0)]
 | 
			
		||||
		if c.excludeInComp {
 | 
			
		||||
			log.Fatalf("%U: Duplicate entry in exclusions.", c.codePoint)
 | 
			
		||||
		}
 | 
			
		||||
		c.excludeInComp = true
 | 
			
		||||
	}
 | 
			
		||||
	if e := p.Err(); e != nil {
 | 
			
		||||
		log.Fatal(e)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// hasCompatDecomp returns true if any of the recursive
 | 
			
		||||
// decompositions contains a compatibility expansion.
 | 
			
		||||
// In this case, the character may not occur in NFK*.
 | 
			
		||||
func hasCompatDecomp(r rune) bool {
 | 
			
		||||
	c := &chars[r]
 | 
			
		||||
	if c.compatDecomp {
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	for _, d := range c.forms[FCompatibility].decomp {
 | 
			
		||||
		if hasCompatDecomp(d) {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Hangul related constants.
 | 
			
		||||
const (
 | 
			
		||||
	HangulBase = 0xAC00
 | 
			
		||||
	HangulEnd  = 0xD7A4 // hangulBase + Jamo combinations (19 * 21 * 28)
 | 
			
		||||
 | 
			
		||||
	JamoLBase = 0x1100
 | 
			
		||||
	JamoLEnd  = 0x1113
 | 
			
		||||
	JamoVBase = 0x1161
 | 
			
		||||
	JamoVEnd  = 0x1176
 | 
			
		||||
	JamoTBase = 0x11A8
 | 
			
		||||
	JamoTEnd  = 0x11C3
 | 
			
		||||
 | 
			
		||||
	JamoLVTCount = 19 * 21 * 28
 | 
			
		||||
	JamoTCount   = 28
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func isHangul(r rune) bool {
 | 
			
		||||
	return HangulBase <= r && r < HangulEnd
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func isHangulWithoutJamoT(r rune) bool {
 | 
			
		||||
	if !isHangul(r) {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	r -= HangulBase
 | 
			
		||||
	return r < JamoLVTCount && r%JamoTCount == 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func ccc(r rune) uint8 {
 | 
			
		||||
	return chars[r].ccc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Insert a rune in a buffer, ordered by Canonical Combining Class.
 | 
			
		||||
func insertOrdered(b Decomposition, r rune) Decomposition {
 | 
			
		||||
	n := len(b)
 | 
			
		||||
	b = append(b, 0)
 | 
			
		||||
	cc := ccc(r)
 | 
			
		||||
	if cc > 0 {
 | 
			
		||||
		// Use bubble sort.
 | 
			
		||||
		for ; n > 0; n-- {
 | 
			
		||||
			if ccc(b[n-1]) <= cc {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
			b[n] = b[n-1]
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	b[n] = r
 | 
			
		||||
	return b
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Recursively decompose.
 | 
			
		||||
func decomposeRecursive(form int, r rune, d Decomposition) Decomposition {
 | 
			
		||||
	dcomp := chars[r].forms[form].decomp
 | 
			
		||||
	if len(dcomp) == 0 {
 | 
			
		||||
		return insertOrdered(d, r)
 | 
			
		||||
	}
 | 
			
		||||
	for _, c := range dcomp {
 | 
			
		||||
		d = decomposeRecursive(form, c, d)
 | 
			
		||||
	}
 | 
			
		||||
	return d
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func completeCharFields(form int) {
 | 
			
		||||
	// Phase 0: pre-expand decomposition.
 | 
			
		||||
	for i := range chars {
 | 
			
		||||
		f := &chars[i].forms[form]
 | 
			
		||||
		if len(f.decomp) == 0 {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		exp := make(Decomposition, 0)
 | 
			
		||||
		for _, c := range f.decomp {
 | 
			
		||||
			exp = decomposeRecursive(form, c, exp)
 | 
			
		||||
		}
 | 
			
		||||
		f.expandedDecomp = exp
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Phase 1: composition exclusion, mark decomposition.
 | 
			
		||||
	for i := range chars {
 | 
			
		||||
		c := &chars[i]
 | 
			
		||||
		f := &c.forms[form]
 | 
			
		||||
 | 
			
		||||
		// Marks script-specific exclusions and version restricted.
 | 
			
		||||
		f.isOneWay = c.excludeInComp
 | 
			
		||||
 | 
			
		||||
		// Singletons
 | 
			
		||||
		f.isOneWay = f.isOneWay || len(f.decomp) == 1
 | 
			
		||||
 | 
			
		||||
		// Non-starter decompositions
 | 
			
		||||
		if len(f.decomp) > 1 {
 | 
			
		||||
			chk := c.ccc != 0 || chars[f.decomp[0]].ccc != 0
 | 
			
		||||
			f.isOneWay = f.isOneWay || chk
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Runes that decompose into more than two runes.
 | 
			
		||||
		f.isOneWay = f.isOneWay || len(f.decomp) > 2
 | 
			
		||||
 | 
			
		||||
		if form == FCompatibility {
 | 
			
		||||
			f.isOneWay = f.isOneWay || hasCompatDecomp(c.codePoint)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		for _, r := range f.decomp {
 | 
			
		||||
			chars[r].forms[form].inDecomp = true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Phase 2: forward and backward combining.
 | 
			
		||||
	for i := range chars {
 | 
			
		||||
		c := &chars[i]
 | 
			
		||||
		f := &c.forms[form]
 | 
			
		||||
 | 
			
		||||
		if !f.isOneWay && len(f.decomp) == 2 {
 | 
			
		||||
			f0 := &chars[f.decomp[0]].forms[form]
 | 
			
		||||
			f1 := &chars[f.decomp[1]].forms[form]
 | 
			
		||||
			if !f0.isOneWay {
 | 
			
		||||
				f0.combinesForward = true
 | 
			
		||||
			}
 | 
			
		||||
			if !f1.isOneWay {
 | 
			
		||||
				f1.combinesBackward = true
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if isHangulWithoutJamoT(rune(i)) {
 | 
			
		||||
			f.combinesForward = true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Phase 3: quick check values.
 | 
			
		||||
	for i := range chars {
 | 
			
		||||
		c := &chars[i]
 | 
			
		||||
		f := &c.forms[form]
 | 
			
		||||
 | 
			
		||||
		switch {
 | 
			
		||||
		case len(f.decomp) > 0:
 | 
			
		||||
			f.quickCheck[MDecomposed] = QCNo
 | 
			
		||||
		case isHangul(rune(i)):
 | 
			
		||||
			f.quickCheck[MDecomposed] = QCNo
 | 
			
		||||
		default:
 | 
			
		||||
			f.quickCheck[MDecomposed] = QCYes
 | 
			
		||||
		}
 | 
			
		||||
		switch {
 | 
			
		||||
		case f.isOneWay:
 | 
			
		||||
			f.quickCheck[MComposed] = QCNo
 | 
			
		||||
		case (i & 0xffff00) == JamoLBase:
 | 
			
		||||
			f.quickCheck[MComposed] = QCYes
 | 
			
		||||
			if JamoLBase <= i && i < JamoLEnd {
 | 
			
		||||
				f.combinesForward = true
 | 
			
		||||
			}
 | 
			
		||||
			if JamoVBase <= i && i < JamoVEnd {
 | 
			
		||||
				f.quickCheck[MComposed] = QCMaybe
 | 
			
		||||
				f.combinesBackward = true
 | 
			
		||||
				f.combinesForward = true
 | 
			
		||||
			}
 | 
			
		||||
			if JamoTBase <= i && i < JamoTEnd {
 | 
			
		||||
				f.quickCheck[MComposed] = QCMaybe
 | 
			
		||||
				f.combinesBackward = true
 | 
			
		||||
			}
 | 
			
		||||
		case !f.combinesBackward:
 | 
			
		||||
			f.quickCheck[MComposed] = QCYes
 | 
			
		||||
		default:
 | 
			
		||||
			f.quickCheck[MComposed] = QCMaybe
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func computeNonStarterCounts() {
 | 
			
		||||
	// Phase 4: leading and trailing non-starter count
 | 
			
		||||
	for i := range chars {
 | 
			
		||||
		c := &chars[i]
 | 
			
		||||
 | 
			
		||||
		runes := []rune{rune(i)}
 | 
			
		||||
		// We always use FCompatibility so that the CGJ insertion points do not
 | 
			
		||||
		// change for repeated normalizations with different forms.
 | 
			
		||||
		if exp := c.forms[FCompatibility].expandedDecomp; len(exp) > 0 {
 | 
			
		||||
			runes = exp
 | 
			
		||||
		}
 | 
			
		||||
		// We consider runes that combine backwards to be non-starters for the
 | 
			
		||||
		// purpose of Stream-Safe Text Processing.
 | 
			
		||||
		for _, r := range runes {
 | 
			
		||||
			if cr := &chars[r]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
			c.nLeadingNonStarters++
 | 
			
		||||
		}
 | 
			
		||||
		for i := len(runes) - 1; i >= 0; i-- {
 | 
			
		||||
			if cr := &chars[runes[i]]; cr.ccc == 0 && !cr.forms[FCompatibility].combinesBackward {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
			c.nTrailingNonStarters++
 | 
			
		||||
		}
 | 
			
		||||
		if c.nTrailingNonStarters > 3 {
 | 
			
		||||
			log.Fatalf("%U: Decomposition with more than 3 (%d) trailing modifiers (%U)", i, c.nTrailingNonStarters, runes)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if isHangul(rune(i)) {
 | 
			
		||||
			c.nTrailingNonStarters = 2
 | 
			
		||||
			if isHangulWithoutJamoT(rune(i)) {
 | 
			
		||||
				c.nTrailingNonStarters = 1
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if l, t := c.nLeadingNonStarters, c.nTrailingNonStarters; l > 0 && l != t {
 | 
			
		||||
			log.Fatalf("%U: number of leading and trailing non-starters should be equal (%d vs %d)", i, l, t)
 | 
			
		||||
		}
 | 
			
		||||
		if t := c.nTrailingNonStarters; t > 3 {
 | 
			
		||||
			log.Fatalf("%U: number of trailing non-starters is %d > 3", t)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func printBytes(w io.Writer, b []byte, name string) {
 | 
			
		||||
	fmt.Fprintf(w, "// %s: %d bytes\n", name, len(b))
 | 
			
		||||
	fmt.Fprintf(w, "var %s = [...]byte {", name)
 | 
			
		||||
	for i, c := range b {
 | 
			
		||||
		switch {
 | 
			
		||||
		case i%64 == 0:
 | 
			
		||||
			fmt.Fprintf(w, "\n// Bytes %x - %x\n", i, i+63)
 | 
			
		||||
		case i%8 == 0:
 | 
			
		||||
			fmt.Fprintf(w, "\n")
 | 
			
		||||
		}
 | 
			
		||||
		fmt.Fprintf(w, "0x%.2X, ", c)
 | 
			
		||||
	}
 | 
			
		||||
	fmt.Fprint(w, "\n}\n\n")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// See forminfo.go for format.
 | 
			
		||||
func makeEntry(f *FormInfo, c *Char) uint16 {
 | 
			
		||||
	e := uint16(0)
 | 
			
		||||
	if r := c.codePoint; HangulBase <= r && r < HangulEnd {
 | 
			
		||||
		e |= 0x40
 | 
			
		||||
	}
 | 
			
		||||
	if f.combinesForward {
 | 
			
		||||
		e |= 0x20
 | 
			
		||||
	}
 | 
			
		||||
	if f.quickCheck[MDecomposed] == QCNo {
 | 
			
		||||
		e |= 0x4
 | 
			
		||||
	}
 | 
			
		||||
	switch f.quickCheck[MComposed] {
 | 
			
		||||
	case QCYes:
 | 
			
		||||
	case QCNo:
 | 
			
		||||
		e |= 0x10
 | 
			
		||||
	case QCMaybe:
 | 
			
		||||
		e |= 0x18
 | 
			
		||||
	default:
 | 
			
		||||
		log.Fatalf("Illegal quickcheck value %v.", f.quickCheck[MComposed])
 | 
			
		||||
	}
 | 
			
		||||
	e |= uint16(c.nTrailingNonStarters)
 | 
			
		||||
	return e
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// decompSet keeps track of unique decompositions, grouped by whether
 | 
			
		||||
// the decomposition is followed by a trailing and/or leading CCC.
 | 
			
		||||
type decompSet [7]map[string]bool
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	normalDecomp = iota
 | 
			
		||||
	firstMulti
 | 
			
		||||
	firstCCC
 | 
			
		||||
	endMulti
 | 
			
		||||
	firstLeadingCCC
 | 
			
		||||
	firstCCCZeroExcept
 | 
			
		||||
	firstStarterWithNLead
 | 
			
		||||
	lastDecomp
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "firstStarterWithNLead", "lastDecomp"}
 | 
			
		||||
 | 
			
		||||
func makeDecompSet() decompSet {
 | 
			
		||||
	m := decompSet{}
 | 
			
		||||
	for i := range m {
 | 
			
		||||
		m[i] = make(map[string]bool)
 | 
			
		||||
	}
 | 
			
		||||
	return m
 | 
			
		||||
}
 | 
			
		||||
func (m *decompSet) insert(key int, s string) {
 | 
			
		||||
	m[key][s] = true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func printCharInfoTables(w io.Writer) int {
 | 
			
		||||
	mkstr := func(r rune, f *FormInfo) (int, string) {
 | 
			
		||||
		d := f.expandedDecomp
 | 
			
		||||
		s := string([]rune(d))
 | 
			
		||||
		if max := 1 << 6; len(s) >= max {
 | 
			
		||||
			const msg = "%U: too many bytes in decomposition: %d >= %d"
 | 
			
		||||
			log.Fatalf(msg, r, len(s), max)
 | 
			
		||||
		}
 | 
			
		||||
		head := uint8(len(s))
 | 
			
		||||
		if f.quickCheck[MComposed] != QCYes {
 | 
			
		||||
			head |= 0x40
 | 
			
		||||
		}
 | 
			
		||||
		if f.combinesForward {
 | 
			
		||||
			head |= 0x80
 | 
			
		||||
		}
 | 
			
		||||
		s = string([]byte{head}) + s
 | 
			
		||||
 | 
			
		||||
		lccc := ccc(d[0])
 | 
			
		||||
		tccc := ccc(d[len(d)-1])
 | 
			
		||||
		cc := ccc(r)
 | 
			
		||||
		if cc != 0 && lccc == 0 && tccc == 0 {
 | 
			
		||||
			log.Fatalf("%U: trailing and leading ccc are 0 for non-zero ccc %d", r, cc)
 | 
			
		||||
		}
 | 
			
		||||
		if tccc < lccc && lccc != 0 {
 | 
			
		||||
			const msg = "%U: lccc (%d) must be <= tcc (%d)"
 | 
			
		||||
			log.Fatalf(msg, r, lccc, tccc)
 | 
			
		||||
		}
 | 
			
		||||
		index := normalDecomp
 | 
			
		||||
		nTrail := chars[r].nTrailingNonStarters
 | 
			
		||||
		nLead := chars[r].nLeadingNonStarters
 | 
			
		||||
		if tccc > 0 || lccc > 0 || nTrail > 0 {
 | 
			
		||||
			tccc <<= 2
 | 
			
		||||
			tccc |= nTrail
 | 
			
		||||
			s += string([]byte{tccc})
 | 
			
		||||
			index = endMulti
 | 
			
		||||
			for _, r := range d[1:] {
 | 
			
		||||
				if ccc(r) == 0 {
 | 
			
		||||
					index = firstCCC
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			if lccc > 0 || nLead > 0 {
 | 
			
		||||
				s += string([]byte{lccc})
 | 
			
		||||
				if index == firstCCC {
 | 
			
		||||
					log.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
 | 
			
		||||
				}
 | 
			
		||||
				index = firstLeadingCCC
 | 
			
		||||
			}
 | 
			
		||||
			if cc != lccc {
 | 
			
		||||
				if cc != 0 {
 | 
			
		||||
					log.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
 | 
			
		||||
				}
 | 
			
		||||
				index = firstCCCZeroExcept
 | 
			
		||||
			}
 | 
			
		||||
		} else if len(d) > 1 {
 | 
			
		||||
			index = firstMulti
 | 
			
		||||
		}
 | 
			
		||||
		return index, s
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	decompSet := makeDecompSet()
 | 
			
		||||
	const nLeadStr = "\x00\x01" // 0-byte length and tccc with nTrail.
 | 
			
		||||
	decompSet.insert(firstStarterWithNLead, nLeadStr)
 | 
			
		||||
 | 
			
		||||
	// Store the uniqued decompositions in a byte buffer,
 | 
			
		||||
	// preceded by their byte length.
 | 
			
		||||
	for _, c := range chars {
 | 
			
		||||
		for _, f := range c.forms {
 | 
			
		||||
			if len(f.expandedDecomp) == 0 {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			if f.combinesBackward {
 | 
			
		||||
				log.Fatalf("%U: combinesBackward and decompose", c.codePoint)
 | 
			
		||||
			}
 | 
			
		||||
			index, s := mkstr(c.codePoint, &f)
 | 
			
		||||
			decompSet.insert(index, s)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	decompositions := bytes.NewBuffer(make([]byte, 0, 10000))
 | 
			
		||||
	size := 0
 | 
			
		||||
	positionMap := make(map[string]uint16)
 | 
			
		||||
	decompositions.WriteString("\000")
 | 
			
		||||
	fmt.Fprintln(w, "const (")
 | 
			
		||||
	for i, m := range decompSet {
 | 
			
		||||
		sa := []string{}
 | 
			
		||||
		for s := range m {
 | 
			
		||||
			sa = append(sa, s)
 | 
			
		||||
		}
 | 
			
		||||
		sort.Strings(sa)
 | 
			
		||||
		for _, s := range sa {
 | 
			
		||||
			p := decompositions.Len()
 | 
			
		||||
			decompositions.WriteString(s)
 | 
			
		||||
			positionMap[s] = uint16(p)
 | 
			
		||||
		}
 | 
			
		||||
		if cname[i] != "" {
 | 
			
		||||
			fmt.Fprintf(w, "%s = 0x%X\n", cname[i], decompositions.Len())
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	fmt.Fprintln(w, "maxDecomp = 0x8000")
 | 
			
		||||
	fmt.Fprintln(w, ")")
 | 
			
		||||
	b := decompositions.Bytes()
 | 
			
		||||
	printBytes(w, b, "decomps")
 | 
			
		||||
	size += len(b)
 | 
			
		||||
 | 
			
		||||
	varnames := []string{"nfc", "nfkc"}
 | 
			
		||||
	for i := 0; i < FNumberOfFormTypes; i++ {
 | 
			
		||||
		trie := triegen.NewTrie(varnames[i])
 | 
			
		||||
 | 
			
		||||
		for r, c := range chars {
 | 
			
		||||
			f := c.forms[i]
 | 
			
		||||
			d := f.expandedDecomp
 | 
			
		||||
			if len(d) != 0 {
 | 
			
		||||
				_, key := mkstr(c.codePoint, &f)
 | 
			
		||||
				trie.Insert(rune(r), uint64(positionMap[key]))
 | 
			
		||||
				if c.ccc != ccc(d[0]) {
 | 
			
		||||
					// We assume the lead ccc of a decomposition !=0 in this case.
 | 
			
		||||
					if ccc(d[0]) == 0 {
 | 
			
		||||
						log.Fatalf("Expected leading CCC to be non-zero; ccc is %d", c.ccc)
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			} else if c.nLeadingNonStarters > 0 && len(f.expandedDecomp) == 0 && c.ccc == 0 && !f.combinesBackward {
 | 
			
		||||
				// Handle cases where it can't be detected that the nLead should be equal
 | 
			
		||||
				// to nTrail.
 | 
			
		||||
				trie.Insert(c.codePoint, uint64(positionMap[nLeadStr]))
 | 
			
		||||
			} else if v := makeEntry(&f, &c)<<8 | uint16(c.ccc); v != 0 {
 | 
			
		||||
				trie.Insert(c.codePoint, uint64(0x8000|v))
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		sz, err := trie.Gen(w, triegen.Compact(&normCompacter{name: varnames[i]}))
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Fatal(err)
 | 
			
		||||
		}
 | 
			
		||||
		size += sz
 | 
			
		||||
	}
 | 
			
		||||
	return size
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func contains(sa []string, s string) bool {
 | 
			
		||||
	for _, a := range sa {
 | 
			
		||||
		if a == s {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func makeTables() {
 | 
			
		||||
	w := &bytes.Buffer{}
 | 
			
		||||
 | 
			
		||||
	size := 0
 | 
			
		||||
	if *tablelist == "" {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	list := strings.Split(*tablelist, ",")
 | 
			
		||||
	if *tablelist == "all" {
 | 
			
		||||
		list = []string{"recomp", "info"}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Compute maximum decomposition size.
 | 
			
		||||
	max := 0
 | 
			
		||||
	for _, c := range chars {
 | 
			
		||||
		if n := len(string(c.forms[FCompatibility].expandedDecomp)); n > max {
 | 
			
		||||
			max = n
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fmt.Fprintln(w, "const (")
 | 
			
		||||
	fmt.Fprintln(w, "\t// Version is the Unicode edition from which the tables are derived.")
 | 
			
		||||
	fmt.Fprintf(w, "\tVersion = %q\n", gen.UnicodeVersion())
 | 
			
		||||
	fmt.Fprintln(w)
 | 
			
		||||
	fmt.Fprintln(w, "\t// MaxTransformChunkSize indicates the maximum number of bytes that Transform")
 | 
			
		||||
	fmt.Fprintln(w, "\t// may need to write atomically for any Form. Making a destination buffer at")
 | 
			
		||||
	fmt.Fprintln(w, "\t// least this size ensures that Transform can always make progress and that")
 | 
			
		||||
	fmt.Fprintln(w, "\t// the user does not need to grow the buffer on an ErrShortDst.")
 | 
			
		||||
	fmt.Fprintf(w, "\tMaxTransformChunkSize = %d+maxNonStarters*4\n", len(string(0x034F))+max)
 | 
			
		||||
	fmt.Fprintln(w, ")\n")
 | 
			
		||||
 | 
			
		||||
	// Print the CCC remap table.
 | 
			
		||||
	size += len(cccMap)
 | 
			
		||||
	fmt.Fprintf(w, "var ccc = [%d]uint8{", len(cccMap))
 | 
			
		||||
	for i := 0; i < len(cccMap); i++ {
 | 
			
		||||
		if i%8 == 0 {
 | 
			
		||||
			fmt.Fprintln(w)
 | 
			
		||||
		}
 | 
			
		||||
		fmt.Fprintf(w, "%3d, ", cccMap[uint8(i)])
 | 
			
		||||
	}
 | 
			
		||||
	fmt.Fprintln(w, "\n}\n")
 | 
			
		||||
 | 
			
		||||
	if contains(list, "info") {
 | 
			
		||||
		size += printCharInfoTables(w)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if contains(list, "recomp") {
 | 
			
		||||
		// Note that we use 32 bit keys, instead of 64 bit.
 | 
			
		||||
		// This clips the bits of three entries, but we know
 | 
			
		||||
		// this won't cause a collision. The compiler will catch
 | 
			
		||||
		// any changes made to UnicodeData.txt that introduces
 | 
			
		||||
		// a collision.
 | 
			
		||||
		// Note that the recomposition map for NFC and NFKC
 | 
			
		||||
		// are identical.
 | 
			
		||||
 | 
			
		||||
		// Recomposition map
 | 
			
		||||
		nrentries := 0
 | 
			
		||||
		for _, c := range chars {
 | 
			
		||||
			f := c.forms[FCanonical]
 | 
			
		||||
			if !f.isOneWay && len(f.decomp) > 0 {
 | 
			
		||||
				nrentries++
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		sz := nrentries * 8
 | 
			
		||||
		size += sz
 | 
			
		||||
		fmt.Fprintf(w, "// recompMap: %d bytes (entries only)\n", sz)
 | 
			
		||||
		fmt.Fprintln(w, "var recompMap = map[uint32]rune{")
 | 
			
		||||
		for i, c := range chars {
 | 
			
		||||
			f := c.forms[FCanonical]
 | 
			
		||||
			d := f.decomp
 | 
			
		||||
			if !f.isOneWay && len(d) > 0 {
 | 
			
		||||
				key := uint32(uint16(d[0]))<<16 + uint32(uint16(d[1]))
 | 
			
		||||
				fmt.Fprintf(w, "0x%.8X: 0x%.4X,\n", key, i)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		fmt.Fprintf(w, "}\n\n")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
 | 
			
		||||
	gen.WriteGoFile("tables.go", "norm", w.Bytes())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func printChars() {
 | 
			
		||||
	if *verbose {
 | 
			
		||||
		for _, c := range chars {
 | 
			
		||||
			if !c.isValid() || c.state == SMissing {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			fmt.Println(c)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// verifyComputed does various consistency tests.
 | 
			
		||||
func verifyComputed() {
 | 
			
		||||
	for i, c := range chars {
 | 
			
		||||
		for _, f := range c.forms {
 | 
			
		||||
			isNo := (f.quickCheck[MDecomposed] == QCNo)
 | 
			
		||||
			if (len(f.decomp) > 0) != isNo && !isHangul(rune(i)) {
 | 
			
		||||
				log.Fatalf("%U: NF*D QC must be No if rune decomposes", i)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			isMaybe := f.quickCheck[MComposed] == QCMaybe
 | 
			
		||||
			if f.combinesBackward != isMaybe {
 | 
			
		||||
				log.Fatalf("%U: NF*C QC must be Maybe if combinesBackward", i)
 | 
			
		||||
			}
 | 
			
		||||
			if len(f.decomp) > 0 && f.combinesForward && isMaybe {
 | 
			
		||||
				log.Fatalf("%U: NF*C QC must be Yes or No if combinesForward and decomposes", i)
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			if len(f.expandedDecomp) != 0 {
 | 
			
		||||
				continue
 | 
			
		||||
			}
 | 
			
		||||
			if a, b := c.nLeadingNonStarters > 0, (c.ccc > 0 || f.combinesBackward); a != b {
 | 
			
		||||
				// We accept these runes to be treated differently (it only affects
 | 
			
		||||
				// segment breaking in iteration, most likely on improper use), but
 | 
			
		||||
				// reconsider if more characters are added.
 | 
			
		||||
				// U+FF9E HALFWIDTH KATAKANA VOICED SOUND MARK;Lm;0;L;<narrow> 3099;;;;N;;;;;
 | 
			
		||||
				// U+FF9F HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK;Lm;0;L;<narrow> 309A;;;;N;;;;;
 | 
			
		||||
				// U+3133 HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<compat> 11AA;;;;N;HANGUL LETTER GIYEOG SIOS;;;;
 | 
			
		||||
				// U+318E HANGUL LETTER ARAEAE;Lo;0;L;<compat> 11A1;;;;N;HANGUL LETTER ALAE AE;;;;
 | 
			
		||||
				// U+FFA3 HALFWIDTH HANGUL LETTER KIYEOK-SIOS;Lo;0;L;<narrow> 3133;;;;N;HALFWIDTH HANGUL LETTER GIYEOG SIOS;;;;
 | 
			
		||||
				// U+FFDC HALFWIDTH HANGUL LETTER I;Lo;0;L;<narrow> 3163;;;;N;;;;;
 | 
			
		||||
				if i != 0xFF9E && i != 0xFF9F && !(0x3133 <= i && i <= 0x318E) && !(0xFFA3 <= i && i <= 0xFFDC) {
 | 
			
		||||
					log.Fatalf("%U: nLead was %v; want %v", i, a, b)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		nfc := c.forms[FCanonical]
 | 
			
		||||
		nfkc := c.forms[FCompatibility]
 | 
			
		||||
		if nfc.combinesBackward != nfkc.combinesBackward {
 | 
			
		||||
			log.Fatalf("%U: Cannot combine combinesBackward\n", c.codePoint)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Use values in DerivedNormalizationProps.txt to compare against the
 | 
			
		||||
// values we computed.
 | 
			
		||||
// DerivedNormalizationProps.txt has form:
 | 
			
		||||
// 00C0..00C5    ; NFD_QC; N # ...
 | 
			
		||||
// 0374          ; NFD_QC; N # ...
 | 
			
		||||
// See http://unicode.org/reports/tr44/ for full explanation
 | 
			
		||||
func testDerived() {
 | 
			
		||||
	f := gen.OpenUCDFile("DerivedNormalizationProps.txt")
 | 
			
		||||
	defer f.Close()
 | 
			
		||||
	p := ucd.New(f)
 | 
			
		||||
	for p.Next() {
 | 
			
		||||
		r := p.Rune(0)
 | 
			
		||||
		c := &chars[r]
 | 
			
		||||
 | 
			
		||||
		var ftype, mode int
 | 
			
		||||
		qt := p.String(1)
 | 
			
		||||
		switch qt {
 | 
			
		||||
		case "NFC_QC":
 | 
			
		||||
			ftype, mode = FCanonical, MComposed
 | 
			
		||||
		case "NFD_QC":
 | 
			
		||||
			ftype, mode = FCanonical, MDecomposed
 | 
			
		||||
		case "NFKC_QC":
 | 
			
		||||
			ftype, mode = FCompatibility, MComposed
 | 
			
		||||
		case "NFKD_QC":
 | 
			
		||||
			ftype, mode = FCompatibility, MDecomposed
 | 
			
		||||
		default:
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		var qr QCResult
 | 
			
		||||
		switch p.String(2) {
 | 
			
		||||
		case "Y":
 | 
			
		||||
			qr = QCYes
 | 
			
		||||
		case "N":
 | 
			
		||||
			qr = QCNo
 | 
			
		||||
		case "M":
 | 
			
		||||
			qr = QCMaybe
 | 
			
		||||
		default:
 | 
			
		||||
			log.Fatalf(`Unexpected quick check value "%s"`, p.String(2))
 | 
			
		||||
		}
 | 
			
		||||
		if got := c.forms[ftype].quickCheck[mode]; got != qr {
 | 
			
		||||
			log.Printf("%U: FAILED %s (was %v need %v)\n", r, qt, got, qr)
 | 
			
		||||
		}
 | 
			
		||||
		c.forms[ftype].verified[mode] = true
 | 
			
		||||
	}
 | 
			
		||||
	if err := p.Err(); err != nil {
 | 
			
		||||
		log.Fatal(err)
 | 
			
		||||
	}
 | 
			
		||||
	// Any unspecified value must be QCYes. Verify this.
 | 
			
		||||
	for i, c := range chars {
 | 
			
		||||
		for j, fd := range c.forms {
 | 
			
		||||
			for k, qr := range fd.quickCheck {
 | 
			
		||||
				if !fd.verified[k] && qr != QCYes {
 | 
			
		||||
					m := "%U: FAIL F:%d M:%d (was %v need Yes) %s\n"
 | 
			
		||||
					log.Printf(m, i, j, k, qr, c.name)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var testHeader = `const (
 | 
			
		||||
	Yes = iota
 | 
			
		||||
	No
 | 
			
		||||
	Maybe
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type formData struct {
 | 
			
		||||
	qc              uint8
 | 
			
		||||
	combinesForward bool
 | 
			
		||||
	decomposition   string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type runeData struct {
 | 
			
		||||
	r      rune
 | 
			
		||||
	ccc    uint8
 | 
			
		||||
	nLead  uint8
 | 
			
		||||
	nTrail uint8
 | 
			
		||||
	f      [2]formData // 0: canonical; 1: compatibility
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func f(qc uint8, cf bool, dec string) [2]formData {
 | 
			
		||||
	return [2]formData{{qc, cf, dec}, {qc, cf, dec}}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func g(qc, qck uint8, cf, cfk bool, d, dk string) [2]formData {
 | 
			
		||||
	return [2]formData{{qc, cf, d}, {qck, cfk, dk}}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var testData = []runeData{
 | 
			
		||||
`
 | 
			
		||||
 | 
			
		||||
func printTestdata() {
 | 
			
		||||
	type lastInfo struct {
 | 
			
		||||
		ccc    uint8
 | 
			
		||||
		nLead  uint8
 | 
			
		||||
		nTrail uint8
 | 
			
		||||
		f      string
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	last := lastInfo{}
 | 
			
		||||
	w := &bytes.Buffer{}
 | 
			
		||||
	fmt.Fprintf(w, testHeader)
 | 
			
		||||
	for r, c := range chars {
 | 
			
		||||
		f := c.forms[FCanonical]
 | 
			
		||||
		qc, cf, d := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
 | 
			
		||||
		f = c.forms[FCompatibility]
 | 
			
		||||
		qck, cfk, dk := f.quickCheck[MComposed], f.combinesForward, string(f.expandedDecomp)
 | 
			
		||||
		s := ""
 | 
			
		||||
		if d == dk && qc == qck && cf == cfk {
 | 
			
		||||
			s = fmt.Sprintf("f(%s, %v, %q)", qc, cf, d)
 | 
			
		||||
		} else {
 | 
			
		||||
			s = fmt.Sprintf("g(%s, %s, %v, %v, %q, %q)", qc, qck, cf, cfk, d, dk)
 | 
			
		||||
		}
 | 
			
		||||
		current := lastInfo{c.ccc, c.nLeadingNonStarters, c.nTrailingNonStarters, s}
 | 
			
		||||
		if last != current {
 | 
			
		||||
			fmt.Fprintf(w, "\t{0x%x, %d, %d, %d, %s},\n", r, c.origCCC, c.nLeadingNonStarters, c.nTrailingNonStarters, s)
 | 
			
		||||
			last = current
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	fmt.Fprintln(w, "}")
 | 
			
		||||
	gen.WriteGoFile("data_test.go", "norm", w.Bytes())
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										609
									
								
								vendor/golang.org/x/text/unicode/norm/normalize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										609
									
								
								vendor/golang.org/x/text/unicode/norm/normalize.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,609 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// Note: the file data_test.go that is generated should not be checked in.
 | 
			
		||||
//go:generate go run maketables.go triegen.go
 | 
			
		||||
//go:generate go test -tags test
 | 
			
		||||
 | 
			
		||||
// Package norm contains types and functions for normalizing Unicode strings.
 | 
			
		||||
package norm // import "golang.org/x/text/unicode/norm"
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"unicode/utf8"
 | 
			
		||||
 | 
			
		||||
	"golang.org/x/text/transform"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// A Form denotes a canonical representation of Unicode code points.
 | 
			
		||||
// The Unicode-defined normalization and equivalence forms are:
 | 
			
		||||
//
 | 
			
		||||
//   NFC   Unicode Normalization Form C
 | 
			
		||||
//   NFD   Unicode Normalization Form D
 | 
			
		||||
//   NFKC  Unicode Normalization Form KC
 | 
			
		||||
//   NFKD  Unicode Normalization Form KD
 | 
			
		||||
//
 | 
			
		||||
// For a Form f, this documentation uses the notation f(x) to mean
 | 
			
		||||
// the bytes or string x converted to the given form.
 | 
			
		||||
// A position n in x is called a boundary if conversion to the form can
 | 
			
		||||
// proceed independently on both sides:
 | 
			
		||||
//   f(x) == append(f(x[0:n]), f(x[n:])...)
 | 
			
		||||
//
 | 
			
		||||
// References: http://unicode.org/reports/tr15/ and
 | 
			
		||||
// http://unicode.org/notes/tn5/.
 | 
			
		||||
type Form int
 | 
			
		||||
 | 
			
		||||
const (
 | 
			
		||||
	NFC Form = iota
 | 
			
		||||
	NFD
 | 
			
		||||
	NFKC
 | 
			
		||||
	NFKD
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Bytes returns f(b). May return b if f(b) = b.
 | 
			
		||||
func (f Form) Bytes(b []byte) []byte {
 | 
			
		||||
	src := inputBytes(b)
 | 
			
		||||
	ft := formTable[f]
 | 
			
		||||
	n, ok := ft.quickSpan(src, 0, len(b), true)
 | 
			
		||||
	if ok {
 | 
			
		||||
		return b
 | 
			
		||||
	}
 | 
			
		||||
	out := make([]byte, n, len(b))
 | 
			
		||||
	copy(out, b[0:n])
 | 
			
		||||
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b), out: out, flushF: appendFlush}
 | 
			
		||||
	return doAppendInner(&rb, n)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// String returns f(s).
 | 
			
		||||
func (f Form) String(s string) string {
 | 
			
		||||
	src := inputString(s)
 | 
			
		||||
	ft := formTable[f]
 | 
			
		||||
	n, ok := ft.quickSpan(src, 0, len(s), true)
 | 
			
		||||
	if ok {
 | 
			
		||||
		return s
 | 
			
		||||
	}
 | 
			
		||||
	out := make([]byte, n, len(s))
 | 
			
		||||
	copy(out, s[0:n])
 | 
			
		||||
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s), out: out, flushF: appendFlush}
 | 
			
		||||
	return string(doAppendInner(&rb, n))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IsNormal returns true if b == f(b).
 | 
			
		||||
func (f Form) IsNormal(b []byte) bool {
 | 
			
		||||
	src := inputBytes(b)
 | 
			
		||||
	ft := formTable[f]
 | 
			
		||||
	bp, ok := ft.quickSpan(src, 0, len(b), true)
 | 
			
		||||
	if ok {
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(b)}
 | 
			
		||||
	rb.setFlusher(nil, cmpNormalBytes)
 | 
			
		||||
	for bp < len(b) {
 | 
			
		||||
		rb.out = b[bp:]
 | 
			
		||||
		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
 | 
			
		||||
			return false
 | 
			
		||||
		}
 | 
			
		||||
		bp, _ = rb.f.quickSpan(rb.src, bp, len(b), true)
 | 
			
		||||
	}
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func cmpNormalBytes(rb *reorderBuffer) bool {
 | 
			
		||||
	b := rb.out
 | 
			
		||||
	for i := 0; i < rb.nrune; i++ {
 | 
			
		||||
		info := rb.rune[i]
 | 
			
		||||
		if int(info.size) > len(b) {
 | 
			
		||||
			return false
 | 
			
		||||
		}
 | 
			
		||||
		p := info.pos
 | 
			
		||||
		pe := p + info.size
 | 
			
		||||
		for ; p < pe; p++ {
 | 
			
		||||
			if b[0] != rb.byte[p] {
 | 
			
		||||
				return false
 | 
			
		||||
			}
 | 
			
		||||
			b = b[1:]
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IsNormalString returns true if s == f(s).
 | 
			
		||||
func (f Form) IsNormalString(s string) bool {
 | 
			
		||||
	src := inputString(s)
 | 
			
		||||
	ft := formTable[f]
 | 
			
		||||
	bp, ok := ft.quickSpan(src, 0, len(s), true)
 | 
			
		||||
	if ok {
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	rb := reorderBuffer{f: *ft, src: src, nsrc: len(s)}
 | 
			
		||||
	rb.setFlusher(nil, func(rb *reorderBuffer) bool {
 | 
			
		||||
		for i := 0; i < rb.nrune; i++ {
 | 
			
		||||
			info := rb.rune[i]
 | 
			
		||||
			if bp+int(info.size) > len(s) {
 | 
			
		||||
				return false
 | 
			
		||||
			}
 | 
			
		||||
			p := info.pos
 | 
			
		||||
			pe := p + info.size
 | 
			
		||||
			for ; p < pe; p++ {
 | 
			
		||||
				if s[bp] != rb.byte[p] {
 | 
			
		||||
					return false
 | 
			
		||||
				}
 | 
			
		||||
				bp++
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		return true
 | 
			
		||||
	})
 | 
			
		||||
	for bp < len(s) {
 | 
			
		||||
		if bp = decomposeSegment(&rb, bp, true); bp < 0 {
 | 
			
		||||
			return false
 | 
			
		||||
		}
 | 
			
		||||
		bp, _ = rb.f.quickSpan(rb.src, bp, len(s), true)
 | 
			
		||||
	}
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// patchTail fixes a case where a rune may be incorrectly normalized
 | 
			
		||||
// if it is followed by illegal continuation bytes. It returns the
 | 
			
		||||
// patched buffer and whether the decomposition is still in progress.
 | 
			
		||||
func patchTail(rb *reorderBuffer) bool {
 | 
			
		||||
	info, p := lastRuneStart(&rb.f, rb.out)
 | 
			
		||||
	if p == -1 || info.size == 0 {
 | 
			
		||||
		return true
 | 
			
		||||
	}
 | 
			
		||||
	end := p + int(info.size)
 | 
			
		||||
	extra := len(rb.out) - end
 | 
			
		||||
	if extra > 0 {
 | 
			
		||||
		// Potentially allocating memory. However, this only
 | 
			
		||||
		// happens with ill-formed UTF-8.
 | 
			
		||||
		x := make([]byte, 0)
 | 
			
		||||
		x = append(x, rb.out[len(rb.out)-extra:]...)
 | 
			
		||||
		rb.out = rb.out[:end]
 | 
			
		||||
		decomposeToLastBoundary(rb)
 | 
			
		||||
		rb.doFlush()
 | 
			
		||||
		rb.out = append(rb.out, x...)
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	buf := rb.out[p:]
 | 
			
		||||
	rb.out = rb.out[:p]
 | 
			
		||||
	decomposeToLastBoundary(rb)
 | 
			
		||||
	if s := rb.ss.next(info); s == ssStarter {
 | 
			
		||||
		rb.doFlush()
 | 
			
		||||
		rb.ss.first(info)
 | 
			
		||||
	} else if s == ssOverflow {
 | 
			
		||||
		rb.doFlush()
 | 
			
		||||
		rb.insertCGJ()
 | 
			
		||||
		rb.ss = 0
 | 
			
		||||
	}
 | 
			
		||||
	rb.insertUnsafe(inputBytes(buf), 0, info)
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func appendQuick(rb *reorderBuffer, i int) int {
 | 
			
		||||
	if rb.nsrc == i {
 | 
			
		||||
		return i
 | 
			
		||||
	}
 | 
			
		||||
	end, _ := rb.f.quickSpan(rb.src, i, rb.nsrc, true)
 | 
			
		||||
	rb.out = rb.src.appendSlice(rb.out, i, end)
 | 
			
		||||
	return end
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Append returns f(append(out, b...)).
 | 
			
		||||
// The buffer out must be nil, empty, or equal to f(out).
 | 
			
		||||
func (f Form) Append(out []byte, src ...byte) []byte {
 | 
			
		||||
	return f.doAppend(out, inputBytes(src), len(src))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f Form) doAppend(out []byte, src input, n int) []byte {
 | 
			
		||||
	if n == 0 {
 | 
			
		||||
		return out
 | 
			
		||||
	}
 | 
			
		||||
	ft := formTable[f]
 | 
			
		||||
	// Attempt to do a quickSpan first so we can avoid initializing the reorderBuffer.
 | 
			
		||||
	if len(out) == 0 {
 | 
			
		||||
		p, _ := ft.quickSpan(src, 0, n, true)
 | 
			
		||||
		out = src.appendSlice(out, 0, p)
 | 
			
		||||
		if p == n {
 | 
			
		||||
			return out
 | 
			
		||||
		}
 | 
			
		||||
		rb := reorderBuffer{f: *ft, src: src, nsrc: n, out: out, flushF: appendFlush}
 | 
			
		||||
		return doAppendInner(&rb, p)
 | 
			
		||||
	}
 | 
			
		||||
	rb := reorderBuffer{f: *ft, src: src, nsrc: n}
 | 
			
		||||
	return doAppend(&rb, out, 0)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func doAppend(rb *reorderBuffer, out []byte, p int) []byte {
 | 
			
		||||
	rb.setFlusher(out, appendFlush)
 | 
			
		||||
	src, n := rb.src, rb.nsrc
 | 
			
		||||
	doMerge := len(out) > 0
 | 
			
		||||
	if q := src.skipContinuationBytes(p); q > p {
 | 
			
		||||
		// Move leading non-starters to destination.
 | 
			
		||||
		rb.out = src.appendSlice(rb.out, p, q)
 | 
			
		||||
		p = q
 | 
			
		||||
		doMerge = patchTail(rb)
 | 
			
		||||
	}
 | 
			
		||||
	fd := &rb.f
 | 
			
		||||
	if doMerge {
 | 
			
		||||
		var info Properties
 | 
			
		||||
		if p < n {
 | 
			
		||||
			info = fd.info(src, p)
 | 
			
		||||
			if !info.BoundaryBefore() || info.nLeadingNonStarters() > 0 {
 | 
			
		||||
				if p == 0 {
 | 
			
		||||
					decomposeToLastBoundary(rb)
 | 
			
		||||
				}
 | 
			
		||||
				p = decomposeSegment(rb, p, true)
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if info.size == 0 {
 | 
			
		||||
			rb.doFlush()
 | 
			
		||||
			// Append incomplete UTF-8 encoding.
 | 
			
		||||
			return src.appendSlice(rb.out, p, n)
 | 
			
		||||
		}
 | 
			
		||||
		if rb.nrune > 0 {
 | 
			
		||||
			return doAppendInner(rb, p)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	p = appendQuick(rb, p)
 | 
			
		||||
	return doAppendInner(rb, p)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func doAppendInner(rb *reorderBuffer, p int) []byte {
 | 
			
		||||
	for n := rb.nsrc; p < n; {
 | 
			
		||||
		p = decomposeSegment(rb, p, true)
 | 
			
		||||
		p = appendQuick(rb, p)
 | 
			
		||||
	}
 | 
			
		||||
	return rb.out
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// AppendString returns f(append(out, []byte(s))).
 | 
			
		||||
// The buffer out must be nil, empty, or equal to f(out).
 | 
			
		||||
func (f Form) AppendString(out []byte, src string) []byte {
 | 
			
		||||
	return f.doAppend(out, inputString(src), len(src))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// QuickSpan returns a boundary n such that b[0:n] == f(b[0:n]).
 | 
			
		||||
// It is not guaranteed to return the largest such n.
 | 
			
		||||
func (f Form) QuickSpan(b []byte) int {
 | 
			
		||||
	n, _ := formTable[f].quickSpan(inputBytes(b), 0, len(b), true)
 | 
			
		||||
	return n
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Span implements transform.SpanningTransformer. It returns a boundary n such
 | 
			
		||||
// that b[0:n] == f(b[0:n]). It is not guaranteed to return the largest such n.
 | 
			
		||||
func (f Form) Span(b []byte, atEOF bool) (n int, err error) {
 | 
			
		||||
	n, ok := formTable[f].quickSpan(inputBytes(b), 0, len(b), atEOF)
 | 
			
		||||
	if n < len(b) {
 | 
			
		||||
		if !ok {
 | 
			
		||||
			err = transform.ErrEndOfSpan
 | 
			
		||||
		} else {
 | 
			
		||||
			err = transform.ErrShortSrc
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return n, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SpanString returns a boundary n such that s[0:n] == f(s[0:n]).
 | 
			
		||||
// It is not guaranteed to return the largest such n.
 | 
			
		||||
func (f Form) SpanString(s string, atEOF bool) (n int, err error) {
 | 
			
		||||
	n, ok := formTable[f].quickSpan(inputString(s), 0, len(s), atEOF)
 | 
			
		||||
	if n < len(s) {
 | 
			
		||||
		if !ok {
 | 
			
		||||
			err = transform.ErrEndOfSpan
 | 
			
		||||
		} else {
 | 
			
		||||
			err = transform.ErrShortSrc
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return n, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// quickSpan returns a boundary n such that src[0:n] == f(src[0:n]) and
 | 
			
		||||
// whether any non-normalized parts were found. If atEOF is false, n will
 | 
			
		||||
// not point past the last segment if this segment might be become
 | 
			
		||||
// non-normalized by appending other runes.
 | 
			
		||||
func (f *formInfo) quickSpan(src input, i, end int, atEOF bool) (n int, ok bool) {
 | 
			
		||||
	var lastCC uint8
 | 
			
		||||
	ss := streamSafe(0)
 | 
			
		||||
	lastSegStart := i
 | 
			
		||||
	for n = end; i < n; {
 | 
			
		||||
		if j := src.skipASCII(i, n); i != j {
 | 
			
		||||
			i = j
 | 
			
		||||
			lastSegStart = i - 1
 | 
			
		||||
			lastCC = 0
 | 
			
		||||
			ss = 0
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		info := f.info(src, i)
 | 
			
		||||
		if info.size == 0 {
 | 
			
		||||
			if atEOF {
 | 
			
		||||
				// include incomplete runes
 | 
			
		||||
				return n, true
 | 
			
		||||
			}
 | 
			
		||||
			return lastSegStart, true
 | 
			
		||||
		}
 | 
			
		||||
		// This block needs to be before the next, because it is possible to
 | 
			
		||||
		// have an overflow for runes that are starters (e.g. with U+FF9E).
 | 
			
		||||
		switch ss.next(info) {
 | 
			
		||||
		case ssStarter:
 | 
			
		||||
			lastSegStart = i
 | 
			
		||||
		case ssOverflow:
 | 
			
		||||
			return lastSegStart, false
 | 
			
		||||
		case ssSuccess:
 | 
			
		||||
			if lastCC > info.ccc {
 | 
			
		||||
				return lastSegStart, false
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		if f.composing {
 | 
			
		||||
			if !info.isYesC() {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
		} else {
 | 
			
		||||
			if !info.isYesD() {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		lastCC = info.ccc
 | 
			
		||||
		i += int(info.size)
 | 
			
		||||
	}
 | 
			
		||||
	if i == n {
 | 
			
		||||
		if !atEOF {
 | 
			
		||||
			n = lastSegStart
 | 
			
		||||
		}
 | 
			
		||||
		return n, true
 | 
			
		||||
	}
 | 
			
		||||
	return lastSegStart, false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// QuickSpanString returns a boundary n such that s[0:n] == f(s[0:n]).
 | 
			
		||||
// It is not guaranteed to return the largest such n.
 | 
			
		||||
func (f Form) QuickSpanString(s string) int {
 | 
			
		||||
	n, _ := formTable[f].quickSpan(inputString(s), 0, len(s), true)
 | 
			
		||||
	return n
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// FirstBoundary returns the position i of the first boundary in b
 | 
			
		||||
// or -1 if b contains no boundary.
 | 
			
		||||
func (f Form) FirstBoundary(b []byte) int {
 | 
			
		||||
	return f.firstBoundary(inputBytes(b), len(b))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f Form) firstBoundary(src input, nsrc int) int {
 | 
			
		||||
	i := src.skipContinuationBytes(0)
 | 
			
		||||
	if i >= nsrc {
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
	fd := formTable[f]
 | 
			
		||||
	ss := streamSafe(0)
 | 
			
		||||
	// We should call ss.first here, but we can't as the first rune is
 | 
			
		||||
	// skipped already. This means FirstBoundary can't really determine
 | 
			
		||||
	// CGJ insertion points correctly. Luckily it doesn't have to.
 | 
			
		||||
	for {
 | 
			
		||||
		info := fd.info(src, i)
 | 
			
		||||
		if info.size == 0 {
 | 
			
		||||
			return -1
 | 
			
		||||
		}
 | 
			
		||||
		if s := ss.next(info); s != ssSuccess {
 | 
			
		||||
			return i
 | 
			
		||||
		}
 | 
			
		||||
		i += int(info.size)
 | 
			
		||||
		if i >= nsrc {
 | 
			
		||||
			if !info.BoundaryAfter() && !ss.isMax() {
 | 
			
		||||
				return -1
 | 
			
		||||
			}
 | 
			
		||||
			return nsrc
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// FirstBoundaryInString returns the position i of the first boundary in s
 | 
			
		||||
// or -1 if s contains no boundary.
 | 
			
		||||
func (f Form) FirstBoundaryInString(s string) int {
 | 
			
		||||
	return f.firstBoundary(inputString(s), len(s))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NextBoundary reports the index of the boundary between the first and next
 | 
			
		||||
// segment in b or -1 if atEOF is false and there are not enough bytes to
 | 
			
		||||
// determine this boundary.
 | 
			
		||||
func (f Form) NextBoundary(b []byte, atEOF bool) int {
 | 
			
		||||
	return f.nextBoundary(inputBytes(b), len(b), atEOF)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NextBoundaryInString reports the index of the boundary between the first and
 | 
			
		||||
// next segment in b or -1 if atEOF is false and there are not enough bytes to
 | 
			
		||||
// determine this boundary.
 | 
			
		||||
func (f Form) NextBoundaryInString(s string, atEOF bool) int {
 | 
			
		||||
	return f.nextBoundary(inputString(s), len(s), atEOF)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f Form) nextBoundary(src input, nsrc int, atEOF bool) int {
 | 
			
		||||
	if nsrc == 0 {
 | 
			
		||||
		if atEOF {
 | 
			
		||||
			return 0
 | 
			
		||||
		}
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
	fd := formTable[f]
 | 
			
		||||
	info := fd.info(src, 0)
 | 
			
		||||
	if info.size == 0 {
 | 
			
		||||
		if atEOF {
 | 
			
		||||
			return 1
 | 
			
		||||
		}
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
	ss := streamSafe(0)
 | 
			
		||||
	ss.first(info)
 | 
			
		||||
 | 
			
		||||
	for i := int(info.size); i < nsrc; i += int(info.size) {
 | 
			
		||||
		info = fd.info(src, i)
 | 
			
		||||
		if info.size == 0 {
 | 
			
		||||
			if atEOF {
 | 
			
		||||
				return i
 | 
			
		||||
			}
 | 
			
		||||
			return -1
 | 
			
		||||
		}
 | 
			
		||||
		// TODO: Using streamSafe to determine the boundary isn't the same as
 | 
			
		||||
		// using BoundaryBefore. Determine which should be used.
 | 
			
		||||
		if s := ss.next(info); s != ssSuccess {
 | 
			
		||||
			return i
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if !atEOF && !info.BoundaryAfter() && !ss.isMax() {
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
	return nsrc
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// LastBoundary returns the position i of the last boundary in b
 | 
			
		||||
// or -1 if b contains no boundary.
 | 
			
		||||
func (f Form) LastBoundary(b []byte) int {
 | 
			
		||||
	return lastBoundary(formTable[f], b)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func lastBoundary(fd *formInfo, b []byte) int {
 | 
			
		||||
	i := len(b)
 | 
			
		||||
	info, p := lastRuneStart(fd, b)
 | 
			
		||||
	if p == -1 {
 | 
			
		||||
		return -1
 | 
			
		||||
	}
 | 
			
		||||
	if info.size == 0 { // ends with incomplete rune
 | 
			
		||||
		if p == 0 { // starts with incomplete rune
 | 
			
		||||
			return -1
 | 
			
		||||
		}
 | 
			
		||||
		i = p
 | 
			
		||||
		info, p = lastRuneStart(fd, b[:i])
 | 
			
		||||
		if p == -1 { // incomplete UTF-8 encoding or non-starter bytes without a starter
 | 
			
		||||
			return i
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if p+int(info.size) != i { // trailing non-starter bytes: illegal UTF-8
 | 
			
		||||
		return i
 | 
			
		||||
	}
 | 
			
		||||
	if info.BoundaryAfter() {
 | 
			
		||||
		return i
 | 
			
		||||
	}
 | 
			
		||||
	ss := streamSafe(0)
 | 
			
		||||
	v := ss.backwards(info)
 | 
			
		||||
	for i = p; i >= 0 && v != ssStarter; i = p {
 | 
			
		||||
		info, p = lastRuneStart(fd, b[:i])
 | 
			
		||||
		if v = ss.backwards(info); v == ssOverflow {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		if p+int(info.size) != i {
 | 
			
		||||
			if p == -1 { // no boundary found
 | 
			
		||||
				return -1
 | 
			
		||||
			}
 | 
			
		||||
			return i // boundary after an illegal UTF-8 encoding
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return i
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// decomposeSegment scans the first segment in src into rb. It inserts 0x034f
 | 
			
		||||
// (Grapheme Joiner) when it encounters a sequence of more than 30 non-starters
 | 
			
		||||
// and returns the number of bytes consumed from src or iShortDst or iShortSrc.
 | 
			
		||||
func decomposeSegment(rb *reorderBuffer, sp int, atEOF bool) int {
 | 
			
		||||
	// Force one character to be consumed.
 | 
			
		||||
	info := rb.f.info(rb.src, sp)
 | 
			
		||||
	if info.size == 0 {
 | 
			
		||||
		return 0
 | 
			
		||||
	}
 | 
			
		||||
	if s := rb.ss.next(info); s == ssStarter {
 | 
			
		||||
		// TODO: this could be removed if we don't support merging.
 | 
			
		||||
		if rb.nrune > 0 {
 | 
			
		||||
			goto end
 | 
			
		||||
		}
 | 
			
		||||
	} else if s == ssOverflow {
 | 
			
		||||
		rb.insertCGJ()
 | 
			
		||||
		goto end
 | 
			
		||||
	}
 | 
			
		||||
	if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
 | 
			
		||||
		return int(err)
 | 
			
		||||
	}
 | 
			
		||||
	for {
 | 
			
		||||
		sp += int(info.size)
 | 
			
		||||
		if sp >= rb.nsrc {
 | 
			
		||||
			if !atEOF && !info.BoundaryAfter() {
 | 
			
		||||
				return int(iShortSrc)
 | 
			
		||||
			}
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		info = rb.f.info(rb.src, sp)
 | 
			
		||||
		if info.size == 0 {
 | 
			
		||||
			if !atEOF {
 | 
			
		||||
				return int(iShortSrc)
 | 
			
		||||
			}
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		if s := rb.ss.next(info); s == ssStarter {
 | 
			
		||||
			break
 | 
			
		||||
		} else if s == ssOverflow {
 | 
			
		||||
			rb.insertCGJ()
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		if err := rb.insertFlush(rb.src, sp, info); err != iSuccess {
 | 
			
		||||
			return int(err)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
end:
 | 
			
		||||
	if !rb.doFlush() {
 | 
			
		||||
		return int(iShortDst)
 | 
			
		||||
	}
 | 
			
		||||
	return sp
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// lastRuneStart returns the runeInfo and position of the last
 | 
			
		||||
// rune in buf or the zero runeInfo and -1 if no rune was found.
 | 
			
		||||
func lastRuneStart(fd *formInfo, buf []byte) (Properties, int) {
 | 
			
		||||
	p := len(buf) - 1
 | 
			
		||||
	for ; p >= 0 && !utf8.RuneStart(buf[p]); p-- {
 | 
			
		||||
	}
 | 
			
		||||
	if p < 0 {
 | 
			
		||||
		return Properties{}, -1
 | 
			
		||||
	}
 | 
			
		||||
	return fd.info(inputBytes(buf), p), p
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// decomposeToLastBoundary finds an open segment at the end of the buffer
 | 
			
		||||
// and scans it into rb. Returns the buffer minus the last segment.
 | 
			
		||||
func decomposeToLastBoundary(rb *reorderBuffer) {
 | 
			
		||||
	fd := &rb.f
 | 
			
		||||
	info, i := lastRuneStart(fd, rb.out)
 | 
			
		||||
	if int(info.size) != len(rb.out)-i {
 | 
			
		||||
		// illegal trailing continuation bytes
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	if info.BoundaryAfter() {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	var add [maxNonStarters + 1]Properties // stores runeInfo in reverse order
 | 
			
		||||
	padd := 0
 | 
			
		||||
	ss := streamSafe(0)
 | 
			
		||||
	p := len(rb.out)
 | 
			
		||||
	for {
 | 
			
		||||
		add[padd] = info
 | 
			
		||||
		v := ss.backwards(info)
 | 
			
		||||
		if v == ssOverflow {
 | 
			
		||||
			// Note that if we have an overflow, it the string we are appending to
 | 
			
		||||
			// is not correctly normalized. In this case the behavior is undefined.
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		padd++
 | 
			
		||||
		p -= int(info.size)
 | 
			
		||||
		if v == ssStarter || p < 0 {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		info, i = lastRuneStart(fd, rb.out[:p])
 | 
			
		||||
		if int(info.size) != p-i {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	rb.ss = ss
 | 
			
		||||
	// Copy bytes for insertion as we may need to overwrite rb.out.
 | 
			
		||||
	var buf [maxBufferSize * utf8.UTFMax]byte
 | 
			
		||||
	cp := buf[:copy(buf[:], rb.out[p:])]
 | 
			
		||||
	rb.out = rb.out[:p]
 | 
			
		||||
	for padd--; padd >= 0; padd-- {
 | 
			
		||||
		info = add[padd]
 | 
			
		||||
		rb.insertUnsafe(inputBytes(cp), 0, info)
 | 
			
		||||
		cp = cp[info.size:]
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										125
									
								
								vendor/golang.org/x/text/unicode/norm/readwriter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										125
									
								
								vendor/golang.org/x/text/unicode/norm/readwriter.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,125 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package norm
 | 
			
		||||
 | 
			
		||||
import "io"
 | 
			
		||||
 | 
			
		||||
type normWriter struct {
 | 
			
		||||
	rb  reorderBuffer
 | 
			
		||||
	w   io.Writer
 | 
			
		||||
	buf []byte
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Write implements the standard write interface.  If the last characters are
 | 
			
		||||
// not at a normalization boundary, the bytes will be buffered for the next
 | 
			
		||||
// write. The remaining bytes will be written on close.
 | 
			
		||||
func (w *normWriter) Write(data []byte) (n int, err error) {
 | 
			
		||||
	// Process data in pieces to keep w.buf size bounded.
 | 
			
		||||
	const chunk = 4000
 | 
			
		||||
 | 
			
		||||
	for len(data) > 0 {
 | 
			
		||||
		// Normalize into w.buf.
 | 
			
		||||
		m := len(data)
 | 
			
		||||
		if m > chunk {
 | 
			
		||||
			m = chunk
 | 
			
		||||
		}
 | 
			
		||||
		w.rb.src = inputBytes(data[:m])
 | 
			
		||||
		w.rb.nsrc = m
 | 
			
		||||
		w.buf = doAppend(&w.rb, w.buf, 0)
 | 
			
		||||
		data = data[m:]
 | 
			
		||||
		n += m
 | 
			
		||||
 | 
			
		||||
		// Write out complete prefix, save remainder.
 | 
			
		||||
		// Note that lastBoundary looks back at most 31 runes.
 | 
			
		||||
		i := lastBoundary(&w.rb.f, w.buf)
 | 
			
		||||
		if i == -1 {
 | 
			
		||||
			i = 0
 | 
			
		||||
		}
 | 
			
		||||
		if i > 0 {
 | 
			
		||||
			if _, err = w.w.Write(w.buf[:i]); err != nil {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
			bn := copy(w.buf, w.buf[i:])
 | 
			
		||||
			w.buf = w.buf[:bn]
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return n, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Close forces data that remains in the buffer to be written.
 | 
			
		||||
func (w *normWriter) Close() error {
 | 
			
		||||
	if len(w.buf) > 0 {
 | 
			
		||||
		_, err := w.w.Write(w.buf)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Writer returns a new writer that implements Write(b)
 | 
			
		||||
// by writing f(b) to w.  The returned writer may use an
 | 
			
		||||
// an internal buffer to maintain state across Write calls.
 | 
			
		||||
// Calling its Close method writes any buffered data to w.
 | 
			
		||||
func (f Form) Writer(w io.Writer) io.WriteCloser {
 | 
			
		||||
	wr := &normWriter{rb: reorderBuffer{}, w: w}
 | 
			
		||||
	wr.rb.init(f, nil)
 | 
			
		||||
	return wr
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type normReader struct {
 | 
			
		||||
	rb           reorderBuffer
 | 
			
		||||
	r            io.Reader
 | 
			
		||||
	inbuf        []byte
 | 
			
		||||
	outbuf       []byte
 | 
			
		||||
	bufStart     int
 | 
			
		||||
	lastBoundary int
 | 
			
		||||
	err          error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Read implements the standard read interface.
 | 
			
		||||
func (r *normReader) Read(p []byte) (int, error) {
 | 
			
		||||
	for {
 | 
			
		||||
		if r.lastBoundary-r.bufStart > 0 {
 | 
			
		||||
			n := copy(p, r.outbuf[r.bufStart:r.lastBoundary])
 | 
			
		||||
			r.bufStart += n
 | 
			
		||||
			if r.lastBoundary-r.bufStart > 0 {
 | 
			
		||||
				return n, nil
 | 
			
		||||
			}
 | 
			
		||||
			return n, r.err
 | 
			
		||||
		}
 | 
			
		||||
		if r.err != nil {
 | 
			
		||||
			return 0, r.err
 | 
			
		||||
		}
 | 
			
		||||
		outn := copy(r.outbuf, r.outbuf[r.lastBoundary:])
 | 
			
		||||
		r.outbuf = r.outbuf[0:outn]
 | 
			
		||||
		r.bufStart = 0
 | 
			
		||||
 | 
			
		||||
		n, err := r.r.Read(r.inbuf)
 | 
			
		||||
		r.rb.src = inputBytes(r.inbuf[0:n])
 | 
			
		||||
		r.rb.nsrc, r.err = n, err
 | 
			
		||||
		if n > 0 {
 | 
			
		||||
			r.outbuf = doAppend(&r.rb, r.outbuf, 0)
 | 
			
		||||
		}
 | 
			
		||||
		if err == io.EOF {
 | 
			
		||||
			r.lastBoundary = len(r.outbuf)
 | 
			
		||||
		} else {
 | 
			
		||||
			r.lastBoundary = lastBoundary(&r.rb.f, r.outbuf)
 | 
			
		||||
			if r.lastBoundary == -1 {
 | 
			
		||||
				r.lastBoundary = 0
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Reader returns a new reader that implements Read
 | 
			
		||||
// by reading data from r and returning f(data).
 | 
			
		||||
func (f Form) Reader(r io.Reader) io.Reader {
 | 
			
		||||
	const chunk = 4000
 | 
			
		||||
	buf := make([]byte, chunk)
 | 
			
		||||
	rr := &normReader{rb: reorderBuffer{}, r: r, inbuf: buf}
 | 
			
		||||
	rr.rb.init(f, buf)
 | 
			
		||||
	return rr
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										7631
									
								
								vendor/golang.org/x/text/unicode/norm/tables.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										7631
									
								
								vendor/golang.org/x/text/unicode/norm/tables.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										88
									
								
								vendor/golang.org/x/text/unicode/norm/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										88
									
								
								vendor/golang.org/x/text/unicode/norm/transform.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,88 @@
 | 
			
		||||
// Copyright 2013 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package norm
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"unicode/utf8"
 | 
			
		||||
 | 
			
		||||
	"golang.org/x/text/transform"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Reset implements the Reset method of the transform.Transformer interface.
 | 
			
		||||
func (Form) Reset() {}
 | 
			
		||||
 | 
			
		||||
// Transform implements the Transform method of the transform.Transformer
 | 
			
		||||
// interface. It may need to write segments of up to MaxSegmentSize at once.
 | 
			
		||||
// Users should either catch ErrShortDst and allow dst to grow or have dst be at
 | 
			
		||||
// least of size MaxTransformChunkSize to be guaranteed of progress.
 | 
			
		||||
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 | 
			
		||||
	n := 0
 | 
			
		||||
	// Cap the maximum number of src bytes to check.
 | 
			
		||||
	b := src
 | 
			
		||||
	eof := atEOF
 | 
			
		||||
	if ns := len(dst); ns < len(b) {
 | 
			
		||||
		err = transform.ErrShortDst
 | 
			
		||||
		eof = false
 | 
			
		||||
		b = b[:ns]
 | 
			
		||||
	}
 | 
			
		||||
	i, ok := formTable[f].quickSpan(inputBytes(b), n, len(b), eof)
 | 
			
		||||
	n += copy(dst[n:], b[n:i])
 | 
			
		||||
	if !ok {
 | 
			
		||||
		nDst, nSrc, err = f.transform(dst[n:], src[n:], atEOF)
 | 
			
		||||
		return nDst + n, nSrc + n, err
 | 
			
		||||
	}
 | 
			
		||||
	if n < len(src) && !atEOF {
 | 
			
		||||
		err = transform.ErrShortSrc
 | 
			
		||||
	}
 | 
			
		||||
	return n, n, err
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func flushTransform(rb *reorderBuffer) bool {
 | 
			
		||||
	// Write out (must fully fit in dst, or else it is a ErrShortDst).
 | 
			
		||||
	if len(rb.out) < rb.nrune*utf8.UTFMax {
 | 
			
		||||
		return false
 | 
			
		||||
	}
 | 
			
		||||
	rb.out = rb.out[rb.flushCopy(rb.out):]
 | 
			
		||||
	return true
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var errs = []error{nil, transform.ErrShortDst, transform.ErrShortSrc}
 | 
			
		||||
 | 
			
		||||
// transform implements the transform.Transformer interface. It is only called
 | 
			
		||||
// when quickSpan does not pass for a given string.
 | 
			
		||||
func (f Form) transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
 | 
			
		||||
	// TODO: get rid of reorderBuffer. See CL 23460044.
 | 
			
		||||
	rb := reorderBuffer{}
 | 
			
		||||
	rb.init(f, src)
 | 
			
		||||
	for {
 | 
			
		||||
		// Load segment into reorder buffer.
 | 
			
		||||
		rb.setFlusher(dst[nDst:], flushTransform)
 | 
			
		||||
		end := decomposeSegment(&rb, nSrc, atEOF)
 | 
			
		||||
		if end < 0 {
 | 
			
		||||
			return nDst, nSrc, errs[-end]
 | 
			
		||||
		}
 | 
			
		||||
		nDst = len(dst) - len(rb.out)
 | 
			
		||||
		nSrc = end
 | 
			
		||||
 | 
			
		||||
		// Next quickSpan.
 | 
			
		||||
		end = rb.nsrc
 | 
			
		||||
		eof := atEOF
 | 
			
		||||
		if n := nSrc + len(dst) - nDst; n < end {
 | 
			
		||||
			err = transform.ErrShortDst
 | 
			
		||||
			end = n
 | 
			
		||||
			eof = false
 | 
			
		||||
		}
 | 
			
		||||
		end, ok := rb.f.quickSpan(rb.src, nSrc, end, eof)
 | 
			
		||||
		n := copy(dst[nDst:], rb.src.bytes[nSrc:end])
 | 
			
		||||
		nSrc += n
 | 
			
		||||
		nDst += n
 | 
			
		||||
		if ok {
 | 
			
		||||
			if n < rb.nsrc && !atEOF {
 | 
			
		||||
				err = transform.ErrShortSrc
 | 
			
		||||
			}
 | 
			
		||||
			return nDst, nSrc, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										54
									
								
								vendor/golang.org/x/text/unicode/norm/trie.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										54
									
								
								vendor/golang.org/x/text/unicode/norm/trie.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,54 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
package norm
 | 
			
		||||
 | 
			
		||||
type valueRange struct {
 | 
			
		||||
	value  uint16 // header: value:stride
 | 
			
		||||
	lo, hi byte   // header: lo:n
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type sparseBlocks struct {
 | 
			
		||||
	values []valueRange
 | 
			
		||||
	offset []uint16
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var nfcSparse = sparseBlocks{
 | 
			
		||||
	values: nfcSparseValues[:],
 | 
			
		||||
	offset: nfcSparseOffset[:],
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var nfkcSparse = sparseBlocks{
 | 
			
		||||
	values: nfkcSparseValues[:],
 | 
			
		||||
	offset: nfkcSparseOffset[:],
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var (
 | 
			
		||||
	nfcData  = newNfcTrie(0)
 | 
			
		||||
	nfkcData = newNfkcTrie(0)
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// lookupValue determines the type of block n and looks up the value for b.
 | 
			
		||||
// For n < t.cutoff, the block is a simple lookup table. Otherwise, the block
 | 
			
		||||
// is a list of ranges with an accompanying value. Given a matching range r,
 | 
			
		||||
// the value for b is by r.value + (b - r.lo) * stride.
 | 
			
		||||
func (t *sparseBlocks) lookup(n uint32, b byte) uint16 {
 | 
			
		||||
	offset := t.offset[n]
 | 
			
		||||
	header := t.values[offset]
 | 
			
		||||
	lo := offset + 1
 | 
			
		||||
	hi := lo + uint16(header.lo)
 | 
			
		||||
	for lo < hi {
 | 
			
		||||
		m := lo + (hi-lo)/2
 | 
			
		||||
		r := t.values[m]
 | 
			
		||||
		if r.lo <= b && b <= r.hi {
 | 
			
		||||
			return r.value + uint16(b-r.lo)*header.value
 | 
			
		||||
		}
 | 
			
		||||
		if b < r.lo {
 | 
			
		||||
			hi = m
 | 
			
		||||
		} else {
 | 
			
		||||
			lo = m + 1
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return 0
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										117
									
								
								vendor/golang.org/x/text/unicode/norm/triegen.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										117
									
								
								vendor/golang.org/x/text/unicode/norm/triegen.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,117 @@
 | 
			
		||||
// Copyright 2011 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build ignore
 | 
			
		||||
 | 
			
		||||
// Trie table generator.
 | 
			
		||||
// Used by make*tables tools to generate a go file with trie data structures
 | 
			
		||||
// for mapping UTF-8 to a 16-bit value. All but the last byte in a UTF-8 byte
 | 
			
		||||
// sequence are used to lookup offsets in the index table to be used for the
 | 
			
		||||
// next byte. The last byte is used to index into a table with 16-bit values.
 | 
			
		||||
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const maxSparseEntries = 16
 | 
			
		||||
 | 
			
		||||
type normCompacter struct {
 | 
			
		||||
	sparseBlocks [][]uint64
 | 
			
		||||
	sparseOffset []uint16
 | 
			
		||||
	sparseCount  int
 | 
			
		||||
	name         string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func mostFrequentStride(a []uint64) int {
 | 
			
		||||
	counts := make(map[int]int)
 | 
			
		||||
	var v int
 | 
			
		||||
	for _, x := range a {
 | 
			
		||||
		if stride := int(x) - v; v != 0 && stride >= 0 {
 | 
			
		||||
			counts[stride]++
 | 
			
		||||
		}
 | 
			
		||||
		v = int(x)
 | 
			
		||||
	}
 | 
			
		||||
	var maxs, maxc int
 | 
			
		||||
	for stride, cnt := range counts {
 | 
			
		||||
		if cnt > maxc || (cnt == maxc && stride < maxs) {
 | 
			
		||||
			maxs, maxc = stride, cnt
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return maxs
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func countSparseEntries(a []uint64) int {
 | 
			
		||||
	stride := mostFrequentStride(a)
 | 
			
		||||
	var v, count int
 | 
			
		||||
	for _, tv := range a {
 | 
			
		||||
		if int(tv)-v != stride {
 | 
			
		||||
			if tv != 0 {
 | 
			
		||||
				count++
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
		v = int(tv)
 | 
			
		||||
	}
 | 
			
		||||
	return count
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *normCompacter) Size(v []uint64) (sz int, ok bool) {
 | 
			
		||||
	if n := countSparseEntries(v); n <= maxSparseEntries {
 | 
			
		||||
		return (n+1)*4 + 2, true
 | 
			
		||||
	}
 | 
			
		||||
	return 0, false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *normCompacter) Store(v []uint64) uint32 {
 | 
			
		||||
	h := uint32(len(c.sparseOffset))
 | 
			
		||||
	c.sparseBlocks = append(c.sparseBlocks, v)
 | 
			
		||||
	c.sparseOffset = append(c.sparseOffset, uint16(c.sparseCount))
 | 
			
		||||
	c.sparseCount += countSparseEntries(v) + 1
 | 
			
		||||
	return h
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *normCompacter) Handler() string {
 | 
			
		||||
	return c.name + "Sparse.lookup"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (c *normCompacter) Print(w io.Writer) (retErr error) {
 | 
			
		||||
	p := func(f string, x ...interface{}) {
 | 
			
		||||
		if _, err := fmt.Fprintf(w, f, x...); retErr == nil && err != nil {
 | 
			
		||||
			retErr = err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	ls := len(c.sparseBlocks)
 | 
			
		||||
	p("// %sSparseOffset: %d entries, %d bytes\n", c.name, ls, ls*2)
 | 
			
		||||
	p("var %sSparseOffset = %#v\n\n", c.name, c.sparseOffset)
 | 
			
		||||
 | 
			
		||||
	ns := c.sparseCount
 | 
			
		||||
	p("// %sSparseValues: %d entries, %d bytes\n", c.name, ns, ns*4)
 | 
			
		||||
	p("var %sSparseValues = [%d]valueRange {", c.name, ns)
 | 
			
		||||
	for i, b := range c.sparseBlocks {
 | 
			
		||||
		p("\n// Block %#x, offset %#x", i, c.sparseOffset[i])
 | 
			
		||||
		var v int
 | 
			
		||||
		stride := mostFrequentStride(b)
 | 
			
		||||
		n := countSparseEntries(b)
 | 
			
		||||
		p("\n{value:%#04x,lo:%#02x},", stride, uint8(n))
 | 
			
		||||
		for i, nv := range b {
 | 
			
		||||
			if int(nv)-v != stride {
 | 
			
		||||
				if v != 0 {
 | 
			
		||||
					p(",hi:%#02x},", 0x80+i-1)
 | 
			
		||||
				}
 | 
			
		||||
				if nv != 0 {
 | 
			
		||||
					p("\n{value:%#04x,lo:%#02x", nv, 0x80+i)
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
			v = int(nv)
 | 
			
		||||
		}
 | 
			
		||||
		if v != 0 {
 | 
			
		||||
			p(",hi:%#02x},", 0x80+len(b)-1)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	p("\n}\n\n")
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user