mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	* Update makefile to use dep * Migrate to dep * Fix some deps * Try to find a better version for golang.org/x/net * Try to find a better version for golang.org/x/oauth2
		
			
				
	
	
		
			377 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			377 lines
		
	
	
		
			8.6 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
// Copyright 2014 The Go Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style
 | 
						|
// license that can be found in the LICENSE file.
 | 
						|
 | 
						|
// Package ucd provides a parser for Unicode Character Database files, the
 | 
						|
// format of which is defined in http://www.unicode.org/reports/tr44/. See
 | 
						|
// http://www.unicode.org/Public/UCD/latest/ucd/ for example files.
 | 
						|
//
 | 
						|
// It currently does not support substitutions of missing fields.
 | 
						|
package ucd // import "golang.org/x/text/internal/ucd"
 | 
						|
 | 
						|
import (
 | 
						|
	"bufio"
 | 
						|
	"bytes"
 | 
						|
	"errors"
 | 
						|
	"io"
 | 
						|
	"log"
 | 
						|
	"regexp"
 | 
						|
	"strconv"
 | 
						|
	"strings"
 | 
						|
)
 | 
						|
 | 
						|
// UnicodeData.txt fields.
 | 
						|
const (
 | 
						|
	CodePoint = iota
 | 
						|
	Name
 | 
						|
	GeneralCategory
 | 
						|
	CanonicalCombiningClass
 | 
						|
	BidiClass
 | 
						|
	DecompMapping
 | 
						|
	DecimalValue
 | 
						|
	DigitValue
 | 
						|
	NumericValue
 | 
						|
	BidiMirrored
 | 
						|
	Unicode1Name
 | 
						|
	ISOComment
 | 
						|
	SimpleUppercaseMapping
 | 
						|
	SimpleLowercaseMapping
 | 
						|
	SimpleTitlecaseMapping
 | 
						|
)
 | 
						|
 | 
						|
// Parse calls f for each entry in the given reader of a UCD file. It will close
 | 
						|
// the reader upon return. It will call log.Fatal if any error occurred.
 | 
						|
//
 | 
						|
// This implements the most common usage pattern of using Parser.
 | 
						|
func Parse(r io.ReadCloser, f func(p *Parser)) {
 | 
						|
	defer r.Close()
 | 
						|
 | 
						|
	p := New(r)
 | 
						|
	for p.Next() {
 | 
						|
		f(p)
 | 
						|
	}
 | 
						|
	if err := p.Err(); err != nil {
 | 
						|
		r.Close() // os.Exit will cause defers not to be called.
 | 
						|
		log.Fatal(err)
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// An Option is used to configure a Parser.
 | 
						|
type Option func(p *Parser)
 | 
						|
 | 
						|
func keepRanges(p *Parser) {
 | 
						|
	p.keepRanges = true
 | 
						|
}
 | 
						|
 | 
						|
var (
 | 
						|
	// KeepRanges prevents the expansion of ranges. The raw ranges can be
 | 
						|
	// obtained by calling Range(0) on the parser.
 | 
						|
	KeepRanges Option = keepRanges
 | 
						|
)
 | 
						|
 | 
						|
// The Part option register a handler for lines starting with a '@'. The text
 | 
						|
// after a '@' is available as the first field. Comments are handled as usual.
 | 
						|
func Part(f func(p *Parser)) Option {
 | 
						|
	return func(p *Parser) {
 | 
						|
		p.partHandler = f
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// The CommentHandler option passes comments that are on a line by itself to
 | 
						|
// a given handler.
 | 
						|
func CommentHandler(f func(s string)) Option {
 | 
						|
	return func(p *Parser) {
 | 
						|
		p.commentHandler = f
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// A Parser parses Unicode Character Database (UCD) files.
 | 
						|
type Parser struct {
 | 
						|
	scanner *bufio.Scanner
 | 
						|
 | 
						|
	keepRanges bool // Don't expand rune ranges in field 0.
 | 
						|
 | 
						|
	err     error
 | 
						|
	comment []byte
 | 
						|
	field   [][]byte
 | 
						|
	// parsedRange is needed in case Range(0) is called more than once for one
 | 
						|
	// field. In some cases this requires scanning ahead.
 | 
						|
	parsedRange          bool
 | 
						|
	rangeStart, rangeEnd rune
 | 
						|
 | 
						|
	partHandler    func(p *Parser)
 | 
						|
	commentHandler func(s string)
 | 
						|
}
 | 
						|
 | 
						|
func (p *Parser) setError(err error) {
 | 
						|
	if p.err == nil {
 | 
						|
		p.err = err
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (p *Parser) getField(i int) []byte {
 | 
						|
	if i >= len(p.field) {
 | 
						|
		return nil
 | 
						|
	}
 | 
						|
	return p.field[i]
 | 
						|
}
 | 
						|
 | 
						|
// Err returns a non-nil error if any error occurred during parsing.
 | 
						|
func (p *Parser) Err() error {
 | 
						|
	return p.err
 | 
						|
}
 | 
						|
 | 
						|
// New returns a Parser for the given Reader.
 | 
						|
func New(r io.Reader, o ...Option) *Parser {
 | 
						|
	p := &Parser{
 | 
						|
		scanner: bufio.NewScanner(r),
 | 
						|
	}
 | 
						|
	for _, f := range o {
 | 
						|
		f(p)
 | 
						|
	}
 | 
						|
	return p
 | 
						|
}
 | 
						|
 | 
						|
// Next parses the next line in the file. It returns true if a line was parsed
 | 
						|
// and false if it reached the end of the file.
 | 
						|
func (p *Parser) Next() bool {
 | 
						|
	if !p.keepRanges && p.rangeStart < p.rangeEnd {
 | 
						|
		p.rangeStart++
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	p.comment = nil
 | 
						|
	p.field = p.field[:0]
 | 
						|
	p.parsedRange = false
 | 
						|
 | 
						|
	for p.scanner.Scan() {
 | 
						|
		b := p.scanner.Bytes()
 | 
						|
		if len(b) == 0 {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if b[0] == '#' {
 | 
						|
			if p.commentHandler != nil {
 | 
						|
				p.commentHandler(strings.TrimSpace(string(b[1:])))
 | 
						|
			}
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		// Parse line
 | 
						|
		if i := bytes.IndexByte(b, '#'); i != -1 {
 | 
						|
			p.comment = bytes.TrimSpace(b[i+1:])
 | 
						|
			b = b[:i]
 | 
						|
		}
 | 
						|
		if b[0] == '@' {
 | 
						|
			if p.partHandler != nil {
 | 
						|
				p.field = append(p.field, bytes.TrimSpace(b[1:]))
 | 
						|
				p.partHandler(p)
 | 
						|
				p.field = p.field[:0]
 | 
						|
			}
 | 
						|
			p.comment = nil
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		for {
 | 
						|
			i := bytes.IndexByte(b, ';')
 | 
						|
			if i == -1 {
 | 
						|
				p.field = append(p.field, bytes.TrimSpace(b))
 | 
						|
				break
 | 
						|
			}
 | 
						|
			p.field = append(p.field, bytes.TrimSpace(b[:i]))
 | 
						|
			b = b[i+1:]
 | 
						|
		}
 | 
						|
		if !p.keepRanges {
 | 
						|
			p.rangeStart, p.rangeEnd = p.getRange(0)
 | 
						|
		}
 | 
						|
		return true
 | 
						|
	}
 | 
						|
	p.setError(p.scanner.Err())
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
func parseRune(b []byte) (rune, error) {
 | 
						|
	if len(b) > 2 && b[0] == 'U' && b[1] == '+' {
 | 
						|
		b = b[2:]
 | 
						|
	}
 | 
						|
	x, err := strconv.ParseUint(string(b), 16, 32)
 | 
						|
	return rune(x), err
 | 
						|
}
 | 
						|
 | 
						|
func (p *Parser) parseRune(b []byte) rune {
 | 
						|
	x, err := parseRune(b)
 | 
						|
	p.setError(err)
 | 
						|
	return x
 | 
						|
}
 | 
						|
 | 
						|
// Rune parses and returns field i as a rune.
 | 
						|
func (p *Parser) Rune(i int) rune {
 | 
						|
	if i > 0 || p.keepRanges {
 | 
						|
		return p.parseRune(p.getField(i))
 | 
						|
	}
 | 
						|
	return p.rangeStart
 | 
						|
}
 | 
						|
 | 
						|
// Runes interprets and returns field i as a sequence of runes.
 | 
						|
func (p *Parser) Runes(i int) (runes []rune) {
 | 
						|
	add := func(b []byte) {
 | 
						|
		if b = bytes.TrimSpace(b); len(b) > 0 {
 | 
						|
			runes = append(runes, p.parseRune(b))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for b := p.getField(i); ; {
 | 
						|
		i := bytes.IndexByte(b, ' ')
 | 
						|
		if i == -1 {
 | 
						|
			add(b)
 | 
						|
			break
 | 
						|
		}
 | 
						|
		add(b[:i])
 | 
						|
		b = b[i+1:]
 | 
						|
	}
 | 
						|
	return
 | 
						|
}
 | 
						|
 | 
						|
var (
 | 
						|
	errIncorrectLegacyRange = errors.New("ucd: unmatched <* First>")
 | 
						|
 | 
						|
	// reRange matches one line of a legacy rune range.
 | 
						|
	reRange = regexp.MustCompile("^([0-9A-F]*);<([^,]*), ([^>]*)>(.*)$")
 | 
						|
)
 | 
						|
 | 
						|
// Range parses and returns field i as a rune range. A range is inclusive at
 | 
						|
// both ends. If the field only has one rune, first and last will be identical.
 | 
						|
// It supports the legacy format for ranges used in UnicodeData.txt.
 | 
						|
func (p *Parser) Range(i int) (first, last rune) {
 | 
						|
	if !p.keepRanges {
 | 
						|
		return p.rangeStart, p.rangeStart
 | 
						|
	}
 | 
						|
	return p.getRange(i)
 | 
						|
}
 | 
						|
 | 
						|
func (p *Parser) getRange(i int) (first, last rune) {
 | 
						|
	b := p.getField(i)
 | 
						|
	if k := bytes.Index(b, []byte("..")); k != -1 {
 | 
						|
		return p.parseRune(b[:k]), p.parseRune(b[k+2:])
 | 
						|
	}
 | 
						|
	// The first field may not be a rune, in which case we may ignore any error
 | 
						|
	// and set the range as 0..0.
 | 
						|
	x, err := parseRune(b)
 | 
						|
	if err != nil {
 | 
						|
		// Disable range parsing henceforth. This ensures that an error will be
 | 
						|
		// returned if the user subsequently will try to parse this field as
 | 
						|
		// a Rune.
 | 
						|
		p.keepRanges = true
 | 
						|
	}
 | 
						|
	// Special case for UnicodeData that was retained for backwards compatibility.
 | 
						|
	if i == 0 && len(p.field) > 1 && bytes.HasSuffix(p.field[1], []byte("First>")) {
 | 
						|
		if p.parsedRange {
 | 
						|
			return p.rangeStart, p.rangeEnd
 | 
						|
		}
 | 
						|
		mf := reRange.FindStringSubmatch(p.scanner.Text())
 | 
						|
		if mf == nil || !p.scanner.Scan() {
 | 
						|
			p.setError(errIncorrectLegacyRange)
 | 
						|
			return x, x
 | 
						|
		}
 | 
						|
		// Using Bytes would be more efficient here, but Text is a lot easier
 | 
						|
		// and this is not a frequent case.
 | 
						|
		ml := reRange.FindStringSubmatch(p.scanner.Text())
 | 
						|
		if ml == nil || mf[2] != ml[2] || ml[3] != "Last" || mf[4] != ml[4] {
 | 
						|
			p.setError(errIncorrectLegacyRange)
 | 
						|
			return x, x
 | 
						|
		}
 | 
						|
		p.rangeStart, p.rangeEnd = x, p.parseRune(p.scanner.Bytes()[:len(ml[1])])
 | 
						|
		p.parsedRange = true
 | 
						|
		return p.rangeStart, p.rangeEnd
 | 
						|
	}
 | 
						|
	return x, x
 | 
						|
}
 | 
						|
 | 
						|
// bools recognizes all valid UCD boolean values.
 | 
						|
var bools = map[string]bool{
 | 
						|
	"":      false,
 | 
						|
	"N":     false,
 | 
						|
	"No":    false,
 | 
						|
	"F":     false,
 | 
						|
	"False": false,
 | 
						|
	"Y":     true,
 | 
						|
	"Yes":   true,
 | 
						|
	"T":     true,
 | 
						|
	"True":  true,
 | 
						|
}
 | 
						|
 | 
						|
// Bool parses and returns field i as a boolean value.
 | 
						|
func (p *Parser) Bool(i int) bool {
 | 
						|
	b := p.getField(i)
 | 
						|
	for s, v := range bools {
 | 
						|
		if bstrEq(b, s) {
 | 
						|
			return v
 | 
						|
		}
 | 
						|
	}
 | 
						|
	p.setError(strconv.ErrSyntax)
 | 
						|
	return false
 | 
						|
}
 | 
						|
 | 
						|
// Int parses and returns field i as an integer value.
 | 
						|
func (p *Parser) Int(i int) int {
 | 
						|
	x, err := strconv.ParseInt(string(p.getField(i)), 10, 64)
 | 
						|
	p.setError(err)
 | 
						|
	return int(x)
 | 
						|
}
 | 
						|
 | 
						|
// Uint parses and returns field i as an unsigned integer value.
 | 
						|
func (p *Parser) Uint(i int) uint {
 | 
						|
	x, err := strconv.ParseUint(string(p.getField(i)), 10, 64)
 | 
						|
	p.setError(err)
 | 
						|
	return uint(x)
 | 
						|
}
 | 
						|
 | 
						|
// Float parses and returns field i as a decimal value.
 | 
						|
func (p *Parser) Float(i int) float64 {
 | 
						|
	x, err := strconv.ParseFloat(string(p.getField(i)), 64)
 | 
						|
	p.setError(err)
 | 
						|
	return x
 | 
						|
}
 | 
						|
 | 
						|
// String parses and returns field i as a string value.
 | 
						|
func (p *Parser) String(i int) string {
 | 
						|
	return string(p.getField(i))
 | 
						|
}
 | 
						|
 | 
						|
// Strings parses and returns field i as a space-separated list of strings.
 | 
						|
func (p *Parser) Strings(i int) []string {
 | 
						|
	ss := strings.Split(string(p.getField(i)), " ")
 | 
						|
	for i, s := range ss {
 | 
						|
		ss[i] = strings.TrimSpace(s)
 | 
						|
	}
 | 
						|
	return ss
 | 
						|
}
 | 
						|
 | 
						|
// Comment returns the comments for the current line.
 | 
						|
func (p *Parser) Comment() string {
 | 
						|
	return string(p.comment)
 | 
						|
}
 | 
						|
 | 
						|
var errUndefinedEnum = errors.New("ucd: undefined enum value")
 | 
						|
 | 
						|
// Enum interprets and returns field i as a value that must be one of the values
 | 
						|
// in enum.
 | 
						|
func (p *Parser) Enum(i int, enum ...string) string {
 | 
						|
	b := p.getField(i)
 | 
						|
	for _, s := range enum {
 | 
						|
		if bstrEq(b, s) {
 | 
						|
			return s
 | 
						|
		}
 | 
						|
	}
 | 
						|
	p.setError(errUndefinedEnum)
 | 
						|
	return ""
 | 
						|
}
 | 
						|
 | 
						|
func bstrEq(b []byte, s string) bool {
 | 
						|
	if len(b) != len(s) {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	for i, c := range b {
 | 
						|
		if c != s[i] {
 | 
						|
			return false
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return true
 | 
						|
}
 |