mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Use Go1.11 module (#5743)
* Migrate to go modules * make vendor * Update mvdan.cc/xurls * make vendor * Update code.gitea.io/git * make fmt-check * Update github.com/go-sql-driver/mysql * make vendor
This commit is contained in:
		
							
								
								
									
										16
									
								
								vendor/github.com/golang/snappy/.gitignore
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								vendor/github.com/golang/snappy/.gitignore
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,16 @@
 | 
			
		||||
cmd/snappytool/snappytool
 | 
			
		||||
testdata/bench
 | 
			
		||||
 | 
			
		||||
# These explicitly listed benchmark data files are for an obsolete version of
 | 
			
		||||
# snappy_test.go.
 | 
			
		||||
testdata/alice29.txt
 | 
			
		||||
testdata/asyoulik.txt
 | 
			
		||||
testdata/fireworks.jpeg
 | 
			
		||||
testdata/geo.protodata
 | 
			
		||||
testdata/html
 | 
			
		||||
testdata/html_x_4
 | 
			
		||||
testdata/kppkn.gtb
 | 
			
		||||
testdata/lcet10.txt
 | 
			
		||||
testdata/paper-100k.pdf
 | 
			
		||||
testdata/plrabn12.txt
 | 
			
		||||
testdata/urls.10K
 | 
			
		||||
							
								
								
									
										107
									
								
								vendor/github.com/golang/snappy/README
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								vendor/github.com/golang/snappy/README
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,107 @@
 | 
			
		||||
The Snappy compression format in the Go programming language.
 | 
			
		||||
 | 
			
		||||
To download and install from source:
 | 
			
		||||
$ go get github.com/golang/snappy
 | 
			
		||||
 | 
			
		||||
Unless otherwise noted, the Snappy-Go source files are distributed
 | 
			
		||||
under the BSD-style license found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Benchmarks.
 | 
			
		||||
 | 
			
		||||
The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
 | 
			
		||||
or so files, the same set used by the C++ Snappy code (github.com/google/snappy
 | 
			
		||||
and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
 | 
			
		||||
3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
 | 
			
		||||
 | 
			
		||||
"go test -test.bench=."
 | 
			
		||||
 | 
			
		||||
_UFlat0-8         2.19GB/s ± 0%  html
 | 
			
		||||
_UFlat1-8         1.41GB/s ± 0%  urls
 | 
			
		||||
_UFlat2-8         23.5GB/s ± 2%  jpg
 | 
			
		||||
_UFlat3-8         1.91GB/s ± 0%  jpg_200
 | 
			
		||||
_UFlat4-8         14.0GB/s ± 1%  pdf
 | 
			
		||||
_UFlat5-8         1.97GB/s ± 0%  html4
 | 
			
		||||
_UFlat6-8          814MB/s ± 0%  txt1
 | 
			
		||||
_UFlat7-8          785MB/s ± 0%  txt2
 | 
			
		||||
_UFlat8-8          857MB/s ± 0%  txt3
 | 
			
		||||
_UFlat9-8          719MB/s ± 1%  txt4
 | 
			
		||||
_UFlat10-8        2.84GB/s ± 0%  pb
 | 
			
		||||
_UFlat11-8        1.05GB/s ± 0%  gaviota
 | 
			
		||||
 | 
			
		||||
_ZFlat0-8         1.04GB/s ± 0%  html
 | 
			
		||||
_ZFlat1-8          534MB/s ± 0%  urls
 | 
			
		||||
_ZFlat2-8         15.7GB/s ± 1%  jpg
 | 
			
		||||
_ZFlat3-8          740MB/s ± 3%  jpg_200
 | 
			
		||||
_ZFlat4-8         9.20GB/s ± 1%  pdf
 | 
			
		||||
_ZFlat5-8          991MB/s ± 0%  html4
 | 
			
		||||
_ZFlat6-8          379MB/s ± 0%  txt1
 | 
			
		||||
_ZFlat7-8          352MB/s ± 0%  txt2
 | 
			
		||||
_ZFlat8-8          396MB/s ± 1%  txt3
 | 
			
		||||
_ZFlat9-8          327MB/s ± 1%  txt4
 | 
			
		||||
_ZFlat10-8        1.33GB/s ± 1%  pb
 | 
			
		||||
_ZFlat11-8         605MB/s ± 1%  gaviota
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
"go test -test.bench=. -tags=noasm"
 | 
			
		||||
 | 
			
		||||
_UFlat0-8          621MB/s ± 2%  html
 | 
			
		||||
_UFlat1-8          494MB/s ± 1%  urls
 | 
			
		||||
_UFlat2-8         23.2GB/s ± 1%  jpg
 | 
			
		||||
_UFlat3-8         1.12GB/s ± 1%  jpg_200
 | 
			
		||||
_UFlat4-8         4.35GB/s ± 1%  pdf
 | 
			
		||||
_UFlat5-8          609MB/s ± 0%  html4
 | 
			
		||||
_UFlat6-8          296MB/s ± 0%  txt1
 | 
			
		||||
_UFlat7-8          288MB/s ± 0%  txt2
 | 
			
		||||
_UFlat8-8          309MB/s ± 1%  txt3
 | 
			
		||||
_UFlat9-8          280MB/s ± 1%  txt4
 | 
			
		||||
_UFlat10-8         753MB/s ± 0%  pb
 | 
			
		||||
_UFlat11-8         400MB/s ± 0%  gaviota
 | 
			
		||||
 | 
			
		||||
_ZFlat0-8          409MB/s ± 1%  html
 | 
			
		||||
_ZFlat1-8          250MB/s ± 1%  urls
 | 
			
		||||
_ZFlat2-8         12.3GB/s ± 1%  jpg
 | 
			
		||||
_ZFlat3-8          132MB/s ± 0%  jpg_200
 | 
			
		||||
_ZFlat4-8         2.92GB/s ± 0%  pdf
 | 
			
		||||
_ZFlat5-8          405MB/s ± 1%  html4
 | 
			
		||||
_ZFlat6-8          179MB/s ± 1%  txt1
 | 
			
		||||
_ZFlat7-8          170MB/s ± 1%  txt2
 | 
			
		||||
_ZFlat8-8          189MB/s ± 1%  txt3
 | 
			
		||||
_ZFlat9-8          164MB/s ± 1%  txt4
 | 
			
		||||
_ZFlat10-8         479MB/s ± 1%  pb
 | 
			
		||||
_ZFlat11-8         270MB/s ± 1%  gaviota
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
 | 
			
		||||
are the numbers from C++ Snappy's
 | 
			
		||||
 | 
			
		||||
make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
 | 
			
		||||
 | 
			
		||||
BM_UFlat/0     2.4GB/s  html
 | 
			
		||||
BM_UFlat/1     1.4GB/s  urls
 | 
			
		||||
BM_UFlat/2    21.8GB/s  jpg
 | 
			
		||||
BM_UFlat/3     1.5GB/s  jpg_200
 | 
			
		||||
BM_UFlat/4    13.3GB/s  pdf
 | 
			
		||||
BM_UFlat/5     2.1GB/s  html4
 | 
			
		||||
BM_UFlat/6     1.0GB/s  txt1
 | 
			
		||||
BM_UFlat/7   959.4MB/s  txt2
 | 
			
		||||
BM_UFlat/8     1.0GB/s  txt3
 | 
			
		||||
BM_UFlat/9   864.5MB/s  txt4
 | 
			
		||||
BM_UFlat/10    2.9GB/s  pb
 | 
			
		||||
BM_UFlat/11    1.2GB/s  gaviota
 | 
			
		||||
 | 
			
		||||
BM_ZFlat/0   944.3MB/s  html (22.31 %)
 | 
			
		||||
BM_ZFlat/1   501.6MB/s  urls (47.78 %)
 | 
			
		||||
BM_ZFlat/2    14.3GB/s  jpg (99.95 %)
 | 
			
		||||
BM_ZFlat/3   538.3MB/s  jpg_200 (73.00 %)
 | 
			
		||||
BM_ZFlat/4     8.3GB/s  pdf (83.30 %)
 | 
			
		||||
BM_ZFlat/5   903.5MB/s  html4 (22.52 %)
 | 
			
		||||
BM_ZFlat/6   336.0MB/s  txt1 (57.88 %)
 | 
			
		||||
BM_ZFlat/7   312.3MB/s  txt2 (61.91 %)
 | 
			
		||||
BM_ZFlat/8   353.1MB/s  txt3 (54.99 %)
 | 
			
		||||
BM_ZFlat/9   289.9MB/s  txt4 (66.26 %)
 | 
			
		||||
BM_ZFlat/10    1.2GB/s  pb (19.68 %)
 | 
			
		||||
BM_ZFlat/11  527.4MB/s  gaviota (37.72 %)
 | 
			
		||||
							
								
								
									
										24
									
								
								vendor/github.com/golang/snappy/decode.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								vendor/github.com/golang/snappy/decode.go
									
									
									
										generated
									
									
										vendored
									
									
								
							@@ -18,7 +18,6 @@ var (
 | 
			
		||||
	// ErrUnsupported reports that the input isn't supported.
 | 
			
		||||
	ErrUnsupported = errors.New("snappy: unsupported input")
 | 
			
		||||
 | 
			
		||||
	errUnsupportedCopy4Tag      = errors.New("snappy: unsupported COPY_4 tag")
 | 
			
		||||
	errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
@@ -46,7 +45,6 @@ func decodedLen(src []byte) (blockLen, headerLen int, err error) {
 | 
			
		||||
const (
 | 
			
		||||
	decodeErrCodeCorrupt                  = 1
 | 
			
		||||
	decodeErrCodeUnsupportedLiteralLength = 2
 | 
			
		||||
	decodeErrCodeUnsupportedCopy4Tag      = 3
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Decode returns the decoded form of src. The returned slice may be a sub-
 | 
			
		||||
@@ -69,8 +67,6 @@ func Decode(dst, src []byte) ([]byte, error) {
 | 
			
		||||
		return dst, nil
 | 
			
		||||
	case decodeErrCodeUnsupportedLiteralLength:
 | 
			
		||||
		return nil, errUnsupportedLiteralLength
 | 
			
		||||
	case decodeErrCodeUnsupportedCopy4Tag:
 | 
			
		||||
		return nil, errUnsupportedCopy4Tag
 | 
			
		||||
	}
 | 
			
		||||
	return nil, ErrCorrupt
 | 
			
		||||
}
 | 
			
		||||
@@ -108,9 +104,9 @@ func (r *Reader) Reset(reader io.Reader) {
 | 
			
		||||
	r.readHeader = false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (r *Reader) readFull(p []byte) (ok bool) {
 | 
			
		||||
func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
 | 
			
		||||
	if _, r.err = io.ReadFull(r.r, p); r.err != nil {
 | 
			
		||||
		if r.err == io.ErrUnexpectedEOF {
 | 
			
		||||
		if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
 | 
			
		||||
			r.err = ErrCorrupt
 | 
			
		||||
		}
 | 
			
		||||
		return false
 | 
			
		||||
@@ -129,7 +125,7 @@ func (r *Reader) Read(p []byte) (int, error) {
 | 
			
		||||
			r.i += n
 | 
			
		||||
			return n, nil
 | 
			
		||||
		}
 | 
			
		||||
		if !r.readFull(r.buf[:4]) {
 | 
			
		||||
		if !r.readFull(r.buf[:4], true) {
 | 
			
		||||
			return 0, r.err
 | 
			
		||||
		}
 | 
			
		||||
		chunkType := r.buf[0]
 | 
			
		||||
@@ -156,7 +152,7 @@ func (r *Reader) Read(p []byte) (int, error) {
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			buf := r.buf[:chunkLen]
 | 
			
		||||
			if !r.readFull(buf) {
 | 
			
		||||
			if !r.readFull(buf, false) {
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
 | 
			
		||||
@@ -189,13 +185,17 @@ func (r *Reader) Read(p []byte) (int, error) {
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			buf := r.buf[:checksumSize]
 | 
			
		||||
			if !r.readFull(buf) {
 | 
			
		||||
			if !r.readFull(buf, false) {
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
 | 
			
		||||
			// Read directly into r.decoded instead of via r.buf.
 | 
			
		||||
			n := chunkLen - checksumSize
 | 
			
		||||
			if !r.readFull(r.decoded[:n]) {
 | 
			
		||||
			if n > len(r.decoded) {
 | 
			
		||||
				r.err = ErrCorrupt
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			if !r.readFull(r.decoded[:n], false) {
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			if crc(r.decoded[:n]) != checksum {
 | 
			
		||||
@@ -211,7 +211,7 @@ func (r *Reader) Read(p []byte) (int, error) {
 | 
			
		||||
				r.err = ErrCorrupt
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			if !r.readFull(r.buf[:len(magicBody)]) {
 | 
			
		||||
			if !r.readFull(r.buf[:len(magicBody)], false) {
 | 
			
		||||
				return 0, r.err
 | 
			
		||||
			}
 | 
			
		||||
			for i := 0; i < len(magicBody); i++ {
 | 
			
		||||
@@ -230,7 +230,7 @@ func (r *Reader) Read(p []byte) (int, error) {
 | 
			
		||||
		}
 | 
			
		||||
		// Section 4.4 Padding (chunk type 0xfe).
 | 
			
		||||
		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
 | 
			
		||||
		if !r.readFull(r.buf[:chunkLen]) {
 | 
			
		||||
		if !r.readFull(r.buf[:chunkLen], false) {
 | 
			
		||||
			return 0, r.err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										4
									
								
								vendor/github.com/golang/snappy/decode_amd64.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								vendor/github.com/golang/snappy/decode_amd64.go
									
									
									
										generated
									
									
										vendored
									
									
								
							@@ -2,6 +2,10 @@
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build !appengine
 | 
			
		||||
// +build gc
 | 
			
		||||
// +build !noasm
 | 
			
		||||
 | 
			
		||||
package snappy
 | 
			
		||||
 | 
			
		||||
// decode has the same semantics as in decode_other.go.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										38
									
								
								vendor/github.com/golang/snappy/decode_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										38
									
								
								vendor/github.com/golang/snappy/decode_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
								
							@@ -2,12 +2,16 @@
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build !appengine
 | 
			
		||||
// +build gc
 | 
			
		||||
// +build !noasm
 | 
			
		||||
 | 
			
		||||
#include "textflag.h"
 | 
			
		||||
 | 
			
		||||
// func decode(dst, src []byte) int
 | 
			
		||||
//
 | 
			
		||||
// The asm code generally follows the pure Go code in decode_other.go, except
 | 
			
		||||
// where marked with a "!!!".
 | 
			
		||||
 | 
			
		||||
// func decode(dst, src []byte) int
 | 
			
		||||
//
 | 
			
		||||
// All local variables fit into registers. The non-zero stack size is only to
 | 
			
		||||
// spill registers and push args when issuing a CALL. The register allocation:
 | 
			
		||||
@@ -222,6 +226,25 @@ tagLit63:
 | 
			
		||||
// ----------------------------------------
 | 
			
		||||
// The code below handles copy tags.
 | 
			
		||||
 | 
			
		||||
tagCopy4:
 | 
			
		||||
	// case tagCopy4:
 | 
			
		||||
	// s += 5
 | 
			
		||||
	ADDQ $5, SI
 | 
			
		||||
 | 
			
		||||
	// if uint(s) > uint(len(src)) { etc }
 | 
			
		||||
	MOVQ SI, BX
 | 
			
		||||
	SUBQ R11, BX
 | 
			
		||||
	CMPQ BX, R12
 | 
			
		||||
	JA   errCorrupt
 | 
			
		||||
 | 
			
		||||
	// length = 1 + int(src[s-5])>>2
 | 
			
		||||
	SHRQ $2, CX
 | 
			
		||||
	INCQ CX
 | 
			
		||||
 | 
			
		||||
	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
 | 
			
		||||
	MOVLQZX -4(SI), DX
 | 
			
		||||
	JMP     doCopy
 | 
			
		||||
 | 
			
		||||
tagCopy2:
 | 
			
		||||
	// case tagCopy2:
 | 
			
		||||
	// s += 3
 | 
			
		||||
@@ -237,7 +260,7 @@ tagCopy2:
 | 
			
		||||
	SHRQ $2, CX
 | 
			
		||||
	INCQ CX
 | 
			
		||||
 | 
			
		||||
	// offset = int(src[s-2]) | int(src[s-1])<<8
 | 
			
		||||
	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
 | 
			
		||||
	MOVWQZX -2(SI), DX
 | 
			
		||||
	JMP     doCopy
 | 
			
		||||
 | 
			
		||||
@@ -247,7 +270,7 @@ tagCopy:
 | 
			
		||||
	//	- CX == src[s]
 | 
			
		||||
	CMPQ BX, $2
 | 
			
		||||
	JEQ  tagCopy2
 | 
			
		||||
	JA   errUC4T
 | 
			
		||||
	JA   tagCopy4
 | 
			
		||||
 | 
			
		||||
	// case tagCopy1:
 | 
			
		||||
	// s += 2
 | 
			
		||||
@@ -259,7 +282,7 @@ tagCopy:
 | 
			
		||||
	CMPQ BX, R12
 | 
			
		||||
	JA   errCorrupt
 | 
			
		||||
 | 
			
		||||
	// offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
 | 
			
		||||
	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
 | 
			
		||||
	MOVQ    CX, DX
 | 
			
		||||
	ANDQ    $0xe0, DX
 | 
			
		||||
	SHLQ    $3, DX
 | 
			
		||||
@@ -465,8 +488,3 @@ errCorrupt:
 | 
			
		||||
	// return decodeErrCodeCorrupt
 | 
			
		||||
	MOVQ $1, ret+48(FP)
 | 
			
		||||
	RET
 | 
			
		||||
 | 
			
		||||
errUC4T:
 | 
			
		||||
	// return decodeErrCodeUnsupportedCopy4Tag
 | 
			
		||||
	MOVQ $3, ret+48(FP)
 | 
			
		||||
	RET
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										13
									
								
								vendor/github.com/golang/snappy/decode_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								vendor/github.com/golang/snappy/decode_other.go
									
									
									
										generated
									
									
										vendored
									
									
								
							@@ -2,7 +2,7 @@
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build !amd64
 | 
			
		||||
// +build !amd64 appengine !gc noasm
 | 
			
		||||
 | 
			
		||||
package snappy
 | 
			
		||||
 | 
			
		||||
@@ -63,7 +63,7 @@ func decode(dst, src []byte) int {
 | 
			
		||||
				return decodeErrCodeCorrupt
 | 
			
		||||
			}
 | 
			
		||||
			length = 4 + int(src[s-2])>>2&0x7
 | 
			
		||||
			offset = int(src[s-2])&0xe0<<3 | int(src[s-1])
 | 
			
		||||
			offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
 | 
			
		||||
 | 
			
		||||
		case tagCopy2:
 | 
			
		||||
			s += 3
 | 
			
		||||
@@ -71,10 +71,15 @@ func decode(dst, src []byte) int {
 | 
			
		||||
				return decodeErrCodeCorrupt
 | 
			
		||||
			}
 | 
			
		||||
			length = 1 + int(src[s-3])>>2
 | 
			
		||||
			offset = int(src[s-2]) | int(src[s-1])<<8
 | 
			
		||||
			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
 | 
			
		||||
 | 
			
		||||
		case tagCopy4:
 | 
			
		||||
			return decodeErrCodeUnsupportedCopy4Tag
 | 
			
		||||
			s += 5
 | 
			
		||||
			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
 | 
			
		||||
				return decodeErrCodeCorrupt
 | 
			
		||||
			}
 | 
			
		||||
			length = 1 + int(src[s-5])>>2
 | 
			
		||||
			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		if offset <= 0 || d < offset || length > len(dst)-d {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										186
									
								
								vendor/github.com/golang/snappy/encode.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										186
									
								
								vendor/github.com/golang/snappy/encode.go
									
									
									
										generated
									
									
										vendored
									
									
								
							@@ -10,78 +10,11 @@ import (
 | 
			
		||||
	"io"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// maxOffset limits how far copy back-references can go, the same as the C++
 | 
			
		||||
// code.
 | 
			
		||||
const maxOffset = 1 << 15
 | 
			
		||||
 | 
			
		||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
 | 
			
		||||
func emitLiteral(dst, lit []byte) int {
 | 
			
		||||
	i, n := 0, uint(len(lit)-1)
 | 
			
		||||
	switch {
 | 
			
		||||
	case n < 60:
 | 
			
		||||
		dst[0] = uint8(n)<<2 | tagLiteral
 | 
			
		||||
		i = 1
 | 
			
		||||
	case n < 1<<8:
 | 
			
		||||
		dst[0] = 60<<2 | tagLiteral
 | 
			
		||||
		dst[1] = uint8(n)
 | 
			
		||||
		i = 2
 | 
			
		||||
	case n < 1<<16:
 | 
			
		||||
		dst[0] = 61<<2 | tagLiteral
 | 
			
		||||
		dst[1] = uint8(n)
 | 
			
		||||
		dst[2] = uint8(n >> 8)
 | 
			
		||||
		i = 3
 | 
			
		||||
	case n < 1<<24:
 | 
			
		||||
		dst[0] = 62<<2 | tagLiteral
 | 
			
		||||
		dst[1] = uint8(n)
 | 
			
		||||
		dst[2] = uint8(n >> 8)
 | 
			
		||||
		dst[3] = uint8(n >> 16)
 | 
			
		||||
		i = 4
 | 
			
		||||
	case int64(n) < 1<<32:
 | 
			
		||||
		dst[0] = 63<<2 | tagLiteral
 | 
			
		||||
		dst[1] = uint8(n)
 | 
			
		||||
		dst[2] = uint8(n >> 8)
 | 
			
		||||
		dst[3] = uint8(n >> 16)
 | 
			
		||||
		dst[4] = uint8(n >> 24)
 | 
			
		||||
		i = 5
 | 
			
		||||
	default:
 | 
			
		||||
		panic("snappy: source buffer is too long")
 | 
			
		||||
	}
 | 
			
		||||
	if copy(dst[i:], lit) != len(lit) {
 | 
			
		||||
		panic("snappy: destination buffer is too short")
 | 
			
		||||
	}
 | 
			
		||||
	return i + len(lit)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// emitCopy writes a copy chunk and returns the number of bytes written.
 | 
			
		||||
func emitCopy(dst []byte, offset, length int32) int {
 | 
			
		||||
	i := 0
 | 
			
		||||
	for length > 0 {
 | 
			
		||||
		x := length - 4
 | 
			
		||||
		if 0 <= x && x < 1<<3 && offset < 1<<11 {
 | 
			
		||||
			dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1
 | 
			
		||||
			dst[i+1] = uint8(offset)
 | 
			
		||||
			i += 2
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		x = length
 | 
			
		||||
		if x > 1<<6 {
 | 
			
		||||
			x = 1 << 6
 | 
			
		||||
		}
 | 
			
		||||
		dst[i+0] = uint8(x-1)<<2 | tagCopy2
 | 
			
		||||
		dst[i+1] = uint8(offset)
 | 
			
		||||
		dst[i+2] = uint8(offset >> 8)
 | 
			
		||||
		i += 3
 | 
			
		||||
		length -= x
 | 
			
		||||
	}
 | 
			
		||||
	return i
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Encode returns the encoded form of src. The returned slice may be a sub-
 | 
			
		||||
// slice of dst if dst was large enough to hold the entire encoded block.
 | 
			
		||||
// Otherwise, a newly allocated slice will be returned.
 | 
			
		||||
//
 | 
			
		||||
// It is valid to pass a nil dst.
 | 
			
		||||
// The dst and src must not overlap. It is valid to pass a nil dst.
 | 
			
		||||
func Encode(dst, src []byte) []byte {
 | 
			
		||||
	if n := MaxEncodedLen(len(src)); n < 0 {
 | 
			
		||||
		panic(ErrTooLarge)
 | 
			
		||||
@@ -98,94 +31,43 @@ func Encode(dst, src []byte) []byte {
 | 
			
		||||
		if len(p) > maxBlockSize {
 | 
			
		||||
			p, src = p[:maxBlockSize], p[maxBlockSize:]
 | 
			
		||||
		}
 | 
			
		||||
		d += encodeBlock(dst[d:], p)
 | 
			
		||||
		if len(p) < minNonLiteralBlockSize {
 | 
			
		||||
			d += emitLiteral(dst[d:], p)
 | 
			
		||||
		} else {
 | 
			
		||||
			d += encodeBlock(dst[d:], p)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return dst[:d]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 | 
			
		||||
// assumes that the varint-encoded length of the decompressed bytes has already
 | 
			
		||||
// been written.
 | 
			
		||||
// inputMargin is the minimum number of extra input bytes to keep, inside
 | 
			
		||||
// encodeBlock's inner loop. On some architectures, this margin lets us
 | 
			
		||||
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
 | 
			
		||||
// literals can be implemented as a single load to and store from a 16-byte
 | 
			
		||||
// register. That literal's actual length can be as short as 1 byte, so this
 | 
			
		||||
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
 | 
			
		||||
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
 | 
			
		||||
// that we don't overrun the dst and src buffers.
 | 
			
		||||
const inputMargin = 16 - 1
 | 
			
		||||
 | 
			
		||||
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
 | 
			
		||||
// could be encoded with a copy tag. This is the minimum with respect to the
 | 
			
		||||
// algorithm used by encodeBlock, not a minimum enforced by the file format.
 | 
			
		||||
//
 | 
			
		||||
// It also assumes that:
 | 
			
		||||
//	len(dst) >= MaxEncodedLen(len(src)) &&
 | 
			
		||||
// 	0 < len(src) && len(src) <= maxBlockSize
 | 
			
		||||
func encodeBlock(dst, src []byte) (d int) {
 | 
			
		||||
	// Return early if src is short.
 | 
			
		||||
	if len(src) <= 4 {
 | 
			
		||||
		return emitLiteral(dst, src)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
 | 
			
		||||
	const maxTableSize = 1 << 14
 | 
			
		||||
	shift, tableSize := uint(32-8), 1<<8
 | 
			
		||||
	for tableSize < maxTableSize && tableSize < len(src) {
 | 
			
		||||
		shift--
 | 
			
		||||
		tableSize *= 2
 | 
			
		||||
	}
 | 
			
		||||
	var table [maxTableSize]int32
 | 
			
		||||
 | 
			
		||||
	// Iterate over the source bytes.
 | 
			
		||||
	var (
 | 
			
		||||
		s   int32 // The iterator position.
 | 
			
		||||
		t   int32 // The last position with the same hash as s.
 | 
			
		||||
		lit int32 // The start position of any pending literal bytes.
 | 
			
		||||
 | 
			
		||||
		// Copied from the C++ snappy implementation:
 | 
			
		||||
		//
 | 
			
		||||
		// Heuristic match skipping: If 32 bytes are scanned with no matches
 | 
			
		||||
		// found, start looking only at every other byte. If 32 more bytes are
 | 
			
		||||
		// scanned, look at every third byte, etc.. When a match is found,
 | 
			
		||||
		// immediately go back to looking at every byte. This is a small loss
 | 
			
		||||
		// (~5% performance, ~0.1% density) for compressible data due to more
 | 
			
		||||
		// bookkeeping, but for non-compressible data (such as JPEG) it's a
 | 
			
		||||
		// huge win since the compressor quickly "realizes" the data is
 | 
			
		||||
		// incompressible and doesn't bother looking for matches everywhere.
 | 
			
		||||
		//
 | 
			
		||||
		// The "skip" variable keeps track of how many bytes there are since
 | 
			
		||||
		// the last match; dividing it by 32 (ie. right-shifting by five) gives
 | 
			
		||||
		// the number of bytes to move ahead for each iteration.
 | 
			
		||||
		skip uint32 = 32
 | 
			
		||||
	)
 | 
			
		||||
	for uint32(s+3) < uint32(len(src)) { // The uint32 conversions catch overflow from the +3.
 | 
			
		||||
		// Update the hash table.
 | 
			
		||||
		b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3]
 | 
			
		||||
		h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24
 | 
			
		||||
		p := &table[(h*0x1e35a7bd)>>shift]
 | 
			
		||||
		// We need to to store values in [-1, inf) in table. To save
 | 
			
		||||
		// some initialization time, (re)use the table's zero value
 | 
			
		||||
		// and shift the values against this zero: add 1 on writes,
 | 
			
		||||
		// subtract 1 on reads.
 | 
			
		||||
		t, *p = *p-1, s+1
 | 
			
		||||
		// If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte.
 | 
			
		||||
		if t < 0 || s-t >= maxOffset || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] {
 | 
			
		||||
			s += int32(skip >> 5)
 | 
			
		||||
			skip++
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		skip = 32
 | 
			
		||||
		// Otherwise, we have a match. First, emit any pending literal bytes.
 | 
			
		||||
		if lit != s {
 | 
			
		||||
			d += emitLiteral(dst[d:], src[lit:s])
 | 
			
		||||
		}
 | 
			
		||||
		// Extend the match to be as long as possible.
 | 
			
		||||
		s0 := s
 | 
			
		||||
		s, t = s+4, t+4
 | 
			
		||||
		for int(s) < len(src) && src[s] == src[t] {
 | 
			
		||||
			s++
 | 
			
		||||
			t++
 | 
			
		||||
		}
 | 
			
		||||
		// Emit the copied bytes.
 | 
			
		||||
		d += emitCopy(dst[d:], s-t, s-s0)
 | 
			
		||||
		lit = s
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Emit any final pending literal bytes and return.
 | 
			
		||||
	if int(lit) != len(src) {
 | 
			
		||||
		d += emitLiteral(dst[d:], src[lit:])
 | 
			
		||||
	}
 | 
			
		||||
	return d
 | 
			
		||||
}
 | 
			
		||||
// The encoded output must start with at least a 1 byte literal, as there are
 | 
			
		||||
// no previous bytes to copy. A minimal (1 byte) copy after that, generated
 | 
			
		||||
// from an emitCopy call in encodeBlock's main loop, would require at least
 | 
			
		||||
// another inputMargin bytes, for the reason above: we want any emitLiteral
 | 
			
		||||
// calls inside encodeBlock's main loop to use the fast path if possible, which
 | 
			
		||||
// requires being able to overrun by inputMargin bytes. Thus,
 | 
			
		||||
// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
 | 
			
		||||
//
 | 
			
		||||
// The C++ code doesn't use this exact threshold, but it could, as discussed at
 | 
			
		||||
// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
 | 
			
		||||
// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
 | 
			
		||||
// optimization. It should not affect the encoded form. This is tested by
 | 
			
		||||
// TestSameEncodingAsCppShortCopies.
 | 
			
		||||
const minNonLiteralBlockSize = 1 + 1 + inputMargin
 | 
			
		||||
 | 
			
		||||
// MaxEncodedLen returns the maximum length of a snappy block, given its
 | 
			
		||||
// uncompressed length.
 | 
			
		||||
@@ -256,7 +138,7 @@ func NewBufferedWriter(w io.Writer) *Writer {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Writer is an io.Writer than can write Snappy-compressed bytes.
 | 
			
		||||
// Writer is an io.Writer that can write Snappy-compressed bytes.
 | 
			
		||||
type Writer struct {
 | 
			
		||||
	w   io.Writer
 | 
			
		||||
	err error
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										29
									
								
								vendor/github.com/golang/snappy/encode_amd64.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								vendor/github.com/golang/snappy/encode_amd64.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,29 @@
 | 
			
		||||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build !appengine
 | 
			
		||||
// +build gc
 | 
			
		||||
// +build !noasm
 | 
			
		||||
 | 
			
		||||
package snappy
 | 
			
		||||
 | 
			
		||||
// emitLiteral has the same semantics as in encode_other.go.
 | 
			
		||||
//
 | 
			
		||||
//go:noescape
 | 
			
		||||
func emitLiteral(dst, lit []byte) int
 | 
			
		||||
 | 
			
		||||
// emitCopy has the same semantics as in encode_other.go.
 | 
			
		||||
//
 | 
			
		||||
//go:noescape
 | 
			
		||||
func emitCopy(dst []byte, offset, length int) int
 | 
			
		||||
 | 
			
		||||
// extendMatch has the same semantics as in encode_other.go.
 | 
			
		||||
//
 | 
			
		||||
//go:noescape
 | 
			
		||||
func extendMatch(src []byte, i, j int) int
 | 
			
		||||
 | 
			
		||||
// encodeBlock has the same semantics as in encode_other.go.
 | 
			
		||||
//
 | 
			
		||||
//go:noescape
 | 
			
		||||
func encodeBlock(dst, src []byte) (d int)
 | 
			
		||||
							
								
								
									
										730
									
								
								vendor/github.com/golang/snappy/encode_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										730
									
								
								vendor/github.com/golang/snappy/encode_amd64.s
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,730 @@
 | 
			
		||||
// Copyright 2016 The Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build !appengine
 | 
			
		||||
// +build gc
 | 
			
		||||
// +build !noasm
 | 
			
		||||
 | 
			
		||||
#include "textflag.h"
 | 
			
		||||
 | 
			
		||||
// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
 | 
			
		||||
// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
 | 
			
		||||
// https://github.com/golang/snappy/issues/29
 | 
			
		||||
//
 | 
			
		||||
// As a workaround, the package was built with a known good assembler, and
 | 
			
		||||
// those instructions were disassembled by "objdump -d" to yield the
 | 
			
		||||
//	4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
 | 
			
		||||
// style comments, in AT&T asm syntax. Note that rsp here is a physical
 | 
			
		||||
// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
 | 
			
		||||
// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
 | 
			
		||||
// fine on Go 1.6.
 | 
			
		||||
 | 
			
		||||
// The asm code generally follows the pure Go code in encode_other.go, except
 | 
			
		||||
// where marked with a "!!!".
 | 
			
		||||
 | 
			
		||||
// ----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
// func emitLiteral(dst, lit []byte) int
 | 
			
		||||
//
 | 
			
		||||
// All local variables fit into registers. The register allocation:
 | 
			
		||||
//	- AX	len(lit)
 | 
			
		||||
//	- BX	n
 | 
			
		||||
//	- DX	return value
 | 
			
		||||
//	- DI	&dst[i]
 | 
			
		||||
//	- R10	&lit[0]
 | 
			
		||||
//
 | 
			
		||||
// The 24 bytes of stack space is to call runtime·memmove.
 | 
			
		||||
//
 | 
			
		||||
// The unusual register allocation of local variables, such as R10 for the
 | 
			
		||||
// source pointer, matches the allocation used at the call site in encodeBlock,
 | 
			
		||||
// which makes it easier to manually inline this function.
 | 
			
		||||
TEXT ·emitLiteral(SB), NOSPLIT, $24-56
 | 
			
		||||
	MOVQ dst_base+0(FP), DI
 | 
			
		||||
	MOVQ lit_base+24(FP), R10
 | 
			
		||||
	MOVQ lit_len+32(FP), AX
 | 
			
		||||
	MOVQ AX, DX
 | 
			
		||||
	MOVL AX, BX
 | 
			
		||||
	SUBL $1, BX
 | 
			
		||||
 | 
			
		||||
	CMPL BX, $60
 | 
			
		||||
	JLT  oneByte
 | 
			
		||||
	CMPL BX, $256
 | 
			
		||||
	JLT  twoBytes
 | 
			
		||||
 | 
			
		||||
threeBytes:
 | 
			
		||||
	MOVB $0xf4, 0(DI)
 | 
			
		||||
	MOVW BX, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
	ADDQ $3, DX
 | 
			
		||||
	JMP  memmove
 | 
			
		||||
 | 
			
		||||
twoBytes:
 | 
			
		||||
	MOVB $0xf0, 0(DI)
 | 
			
		||||
	MOVB BX, 1(DI)
 | 
			
		||||
	ADDQ $2, DI
 | 
			
		||||
	ADDQ $2, DX
 | 
			
		||||
	JMP  memmove
 | 
			
		||||
 | 
			
		||||
oneByte:
 | 
			
		||||
	SHLB $2, BX
 | 
			
		||||
	MOVB BX, 0(DI)
 | 
			
		||||
	ADDQ $1, DI
 | 
			
		||||
	ADDQ $1, DX
 | 
			
		||||
 | 
			
		||||
memmove:
 | 
			
		||||
	MOVQ DX, ret+48(FP)
 | 
			
		||||
 | 
			
		||||
	// copy(dst[i:], lit)
 | 
			
		||||
	//
 | 
			
		||||
	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
 | 
			
		||||
	// DI, R10 and AX as arguments.
 | 
			
		||||
	MOVQ DI, 0(SP)
 | 
			
		||||
	MOVQ R10, 8(SP)
 | 
			
		||||
	MOVQ AX, 16(SP)
 | 
			
		||||
	CALL runtime·memmove(SB)
 | 
			
		||||
	RET
 | 
			
		||||
 | 
			
		||||
// ----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
// func emitCopy(dst []byte, offset, length int) int
 | 
			
		||||
//
 | 
			
		||||
// All local variables fit into registers. The register allocation:
 | 
			
		||||
//	- AX	length
 | 
			
		||||
//	- SI	&dst[0]
 | 
			
		||||
//	- DI	&dst[i]
 | 
			
		||||
//	- R11	offset
 | 
			
		||||
//
 | 
			
		||||
// The unusual register allocation of local variables, such as R11 for the
 | 
			
		||||
// offset, matches the allocation used at the call site in encodeBlock, which
 | 
			
		||||
// makes it easier to manually inline this function.
 | 
			
		||||
TEXT ·emitCopy(SB), NOSPLIT, $0-48
 | 
			
		||||
	MOVQ dst_base+0(FP), DI
 | 
			
		||||
	MOVQ DI, SI
 | 
			
		||||
	MOVQ offset+24(FP), R11
 | 
			
		||||
	MOVQ length+32(FP), AX
 | 
			
		||||
 | 
			
		||||
loop0:
 | 
			
		||||
	// for length >= 68 { etc }
 | 
			
		||||
	CMPL AX, $68
 | 
			
		||||
	JLT  step1
 | 
			
		||||
 | 
			
		||||
	// Emit a length 64 copy, encoded as 3 bytes.
 | 
			
		||||
	MOVB $0xfe, 0(DI)
 | 
			
		||||
	MOVW R11, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
	SUBL $64, AX
 | 
			
		||||
	JMP  loop0
 | 
			
		||||
 | 
			
		||||
step1:
 | 
			
		||||
	// if length > 64 { etc }
 | 
			
		||||
	CMPL AX, $64
 | 
			
		||||
	JLE  step2
 | 
			
		||||
 | 
			
		||||
	// Emit a length 60 copy, encoded as 3 bytes.
 | 
			
		||||
	MOVB $0xee, 0(DI)
 | 
			
		||||
	MOVW R11, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
	SUBL $60, AX
 | 
			
		||||
 | 
			
		||||
step2:
 | 
			
		||||
	// if length >= 12 || offset >= 2048 { goto step3 }
 | 
			
		||||
	CMPL AX, $12
 | 
			
		||||
	JGE  step3
 | 
			
		||||
	CMPL R11, $2048
 | 
			
		||||
	JGE  step3
 | 
			
		||||
 | 
			
		||||
	// Emit the remaining copy, encoded as 2 bytes.
 | 
			
		||||
	MOVB R11, 1(DI)
 | 
			
		||||
	SHRL $8, R11
 | 
			
		||||
	SHLB $5, R11
 | 
			
		||||
	SUBB $4, AX
 | 
			
		||||
	SHLB $2, AX
 | 
			
		||||
	ORB  AX, R11
 | 
			
		||||
	ORB  $1, R11
 | 
			
		||||
	MOVB R11, 0(DI)
 | 
			
		||||
	ADDQ $2, DI
 | 
			
		||||
 | 
			
		||||
	// Return the number of bytes written.
 | 
			
		||||
	SUBQ SI, DI
 | 
			
		||||
	MOVQ DI, ret+40(FP)
 | 
			
		||||
	RET
 | 
			
		||||
 | 
			
		||||
step3:
 | 
			
		||||
	// Emit the remaining copy, encoded as 3 bytes.
 | 
			
		||||
	SUBL $1, AX
 | 
			
		||||
	SHLB $2, AX
 | 
			
		||||
	ORB  $2, AX
 | 
			
		||||
	MOVB AX, 0(DI)
 | 
			
		||||
	MOVW R11, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
 | 
			
		||||
	// Return the number of bytes written.
 | 
			
		||||
	SUBQ SI, DI
 | 
			
		||||
	MOVQ DI, ret+40(FP)
 | 
			
		||||
	RET
 | 
			
		||||
 | 
			
		||||
// ----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
// func extendMatch(src []byte, i, j int) int
 | 
			
		||||
//
 | 
			
		||||
// All local variables fit into registers. The register allocation:
 | 
			
		||||
//	- DX	&src[0]
 | 
			
		||||
//	- SI	&src[j]
 | 
			
		||||
//	- R13	&src[len(src) - 8]
 | 
			
		||||
//	- R14	&src[len(src)]
 | 
			
		||||
//	- R15	&src[i]
 | 
			
		||||
//
 | 
			
		||||
// The unusual register allocation of local variables, such as R15 for a source
 | 
			
		||||
// pointer, matches the allocation used at the call site in encodeBlock, which
 | 
			
		||||
// makes it easier to manually inline this function.
 | 
			
		||||
TEXT ·extendMatch(SB), NOSPLIT, $0-48
 | 
			
		||||
	MOVQ src_base+0(FP), DX
 | 
			
		||||
	MOVQ src_len+8(FP), R14
 | 
			
		||||
	MOVQ i+24(FP), R15
 | 
			
		||||
	MOVQ j+32(FP), SI
 | 
			
		||||
	ADDQ DX, R14
 | 
			
		||||
	ADDQ DX, R15
 | 
			
		||||
	ADDQ DX, SI
 | 
			
		||||
	MOVQ R14, R13
 | 
			
		||||
	SUBQ $8, R13
 | 
			
		||||
 | 
			
		||||
cmp8:
 | 
			
		||||
	// As long as we are 8 or more bytes before the end of src, we can load and
 | 
			
		||||
	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 | 
			
		||||
	CMPQ SI, R13
 | 
			
		||||
	JA   cmp1
 | 
			
		||||
	MOVQ (R15), AX
 | 
			
		||||
	MOVQ (SI), BX
 | 
			
		||||
	CMPQ AX, BX
 | 
			
		||||
	JNE  bsf
 | 
			
		||||
	ADDQ $8, R15
 | 
			
		||||
	ADDQ $8, SI
 | 
			
		||||
	JMP  cmp8
 | 
			
		||||
 | 
			
		||||
bsf:
 | 
			
		||||
	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 | 
			
		||||
	// the index of the first byte that differs. The BSF instruction finds the
 | 
			
		||||
	// least significant 1 bit, the amd64 architecture is little-endian, and
 | 
			
		||||
	// the shift by 3 converts a bit index to a byte index.
 | 
			
		||||
	XORQ AX, BX
 | 
			
		||||
	BSFQ BX, BX
 | 
			
		||||
	SHRQ $3, BX
 | 
			
		||||
	ADDQ BX, SI
 | 
			
		||||
 | 
			
		||||
	// Convert from &src[ret] to ret.
 | 
			
		||||
	SUBQ DX, SI
 | 
			
		||||
	MOVQ SI, ret+40(FP)
 | 
			
		||||
	RET
 | 
			
		||||
 | 
			
		||||
cmp1:
 | 
			
		||||
	// In src's tail, compare 1 byte at a time.
 | 
			
		||||
	CMPQ SI, R14
 | 
			
		||||
	JAE  extendMatchEnd
 | 
			
		||||
	MOVB (R15), AX
 | 
			
		||||
	MOVB (SI), BX
 | 
			
		||||
	CMPB AX, BX
 | 
			
		||||
	JNE  extendMatchEnd
 | 
			
		||||
	ADDQ $1, R15
 | 
			
		||||
	ADDQ $1, SI
 | 
			
		||||
	JMP  cmp1
 | 
			
		||||
 | 
			
		||||
extendMatchEnd:
 | 
			
		||||
	// Convert from &src[ret] to ret.
 | 
			
		||||
	SUBQ DX, SI
 | 
			
		||||
	MOVQ SI, ret+40(FP)
 | 
			
		||||
	RET
 | 
			
		||||
 | 
			
		||||
// ----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
// func encodeBlock(dst, src []byte) (d int)
 | 
			
		||||
//
 | 
			
		||||
// All local variables fit into registers, other than "var table". The register
 | 
			
		||||
// allocation:
 | 
			
		||||
//	- AX	.	.
 | 
			
		||||
//	- BX	.	.
 | 
			
		||||
//	- CX	56	shift (note that amd64 shifts by non-immediates must use CX).
 | 
			
		||||
//	- DX	64	&src[0], tableSize
 | 
			
		||||
//	- SI	72	&src[s]
 | 
			
		||||
//	- DI	80	&dst[d]
 | 
			
		||||
//	- R9	88	sLimit
 | 
			
		||||
//	- R10	.	&src[nextEmit]
 | 
			
		||||
//	- R11	96	prevHash, currHash, nextHash, offset
 | 
			
		||||
//	- R12	104	&src[base], skip
 | 
			
		||||
//	- R13	.	&src[nextS], &src[len(src) - 8]
 | 
			
		||||
//	- R14	.	len(src), bytesBetweenHashLookups, &src[len(src)], x
 | 
			
		||||
//	- R15	112	candidate
 | 
			
		||||
//
 | 
			
		||||
// The second column (56, 64, etc) is the stack offset to spill the registers
 | 
			
		||||
// when calling other functions. We could pack this slightly tighter, but it's
 | 
			
		||||
// simpler to have a dedicated spill map independent of the function called.
 | 
			
		||||
//
 | 
			
		||||
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
 | 
			
		||||
// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
 | 
			
		||||
// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
 | 
			
		||||
TEXT ·encodeBlock(SB), 0, $32888-56
 | 
			
		||||
	MOVQ dst_base+0(FP), DI
 | 
			
		||||
	MOVQ src_base+24(FP), SI
 | 
			
		||||
	MOVQ src_len+32(FP), R14
 | 
			
		||||
 | 
			
		||||
	// shift, tableSize := uint32(32-8), 1<<8
 | 
			
		||||
	MOVQ $24, CX
 | 
			
		||||
	MOVQ $256, DX
 | 
			
		||||
 | 
			
		||||
calcShift:
 | 
			
		||||
	// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
 | 
			
		||||
	//	shift--
 | 
			
		||||
	// }
 | 
			
		||||
	CMPQ DX, $16384
 | 
			
		||||
	JGE  varTable
 | 
			
		||||
	CMPQ DX, R14
 | 
			
		||||
	JGE  varTable
 | 
			
		||||
	SUBQ $1, CX
 | 
			
		||||
	SHLQ $1, DX
 | 
			
		||||
	JMP  calcShift
 | 
			
		||||
 | 
			
		||||
varTable:
 | 
			
		||||
	// var table [maxTableSize]uint16
 | 
			
		||||
	//
 | 
			
		||||
	// In the asm code, unlike the Go code, we can zero-initialize only the
 | 
			
		||||
	// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
 | 
			
		||||
	// writes 16 bytes, so we can do only tableSize/8 writes instead of the
 | 
			
		||||
	// 2048 writes that would zero-initialize all of table's 32768 bytes.
 | 
			
		||||
	SHRQ $3, DX
 | 
			
		||||
	LEAQ table-32768(SP), BX
 | 
			
		||||
	PXOR X0, X0
 | 
			
		||||
 | 
			
		||||
memclr:
 | 
			
		||||
	MOVOU X0, 0(BX)
 | 
			
		||||
	ADDQ  $16, BX
 | 
			
		||||
	SUBQ  $1, DX
 | 
			
		||||
	JNZ   memclr
 | 
			
		||||
 | 
			
		||||
	// !!! DX = &src[0]
 | 
			
		||||
	MOVQ SI, DX
 | 
			
		||||
 | 
			
		||||
	// sLimit := len(src) - inputMargin
 | 
			
		||||
	MOVQ R14, R9
 | 
			
		||||
	SUBQ $15, R9
 | 
			
		||||
 | 
			
		||||
	// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
 | 
			
		||||
	// change for the rest of the function.
 | 
			
		||||
	MOVQ CX, 56(SP)
 | 
			
		||||
	MOVQ DX, 64(SP)
 | 
			
		||||
	MOVQ R9, 88(SP)
 | 
			
		||||
 | 
			
		||||
	// nextEmit := 0
 | 
			
		||||
	MOVQ DX, R10
 | 
			
		||||
 | 
			
		||||
	// s := 1
 | 
			
		||||
	ADDQ $1, SI
 | 
			
		||||
 | 
			
		||||
	// nextHash := hash(load32(src, s), shift)
 | 
			
		||||
	MOVL  0(SI), R11
 | 
			
		||||
	IMULL $0x1e35a7bd, R11
 | 
			
		||||
	SHRL  CX, R11
 | 
			
		||||
 | 
			
		||||
outer:
 | 
			
		||||
	// for { etc }
 | 
			
		||||
 | 
			
		||||
	// skip := 32
 | 
			
		||||
	MOVQ $32, R12
 | 
			
		||||
 | 
			
		||||
	// nextS := s
 | 
			
		||||
	MOVQ SI, R13
 | 
			
		||||
 | 
			
		||||
	// candidate := 0
 | 
			
		||||
	MOVQ $0, R15
 | 
			
		||||
 | 
			
		||||
inner0:
 | 
			
		||||
	// for { etc }
 | 
			
		||||
 | 
			
		||||
	// s := nextS
 | 
			
		||||
	MOVQ R13, SI
 | 
			
		||||
 | 
			
		||||
	// bytesBetweenHashLookups := skip >> 5
 | 
			
		||||
	MOVQ R12, R14
 | 
			
		||||
	SHRQ $5, R14
 | 
			
		||||
 | 
			
		||||
	// nextS = s + bytesBetweenHashLookups
 | 
			
		||||
	ADDQ R14, R13
 | 
			
		||||
 | 
			
		||||
	// skip += bytesBetweenHashLookups
 | 
			
		||||
	ADDQ R14, R12
 | 
			
		||||
 | 
			
		||||
	// if nextS > sLimit { goto emitRemainder }
 | 
			
		||||
	MOVQ R13, AX
 | 
			
		||||
	SUBQ DX, AX
 | 
			
		||||
	CMPQ AX, R9
 | 
			
		||||
	JA   emitRemainder
 | 
			
		||||
 | 
			
		||||
	// candidate = int(table[nextHash])
 | 
			
		||||
	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
 | 
			
		||||
	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
 | 
			
		||||
	BYTE $0x4e
 | 
			
		||||
	BYTE $0x0f
 | 
			
		||||
	BYTE $0xb7
 | 
			
		||||
	BYTE $0x7c
 | 
			
		||||
	BYTE $0x5c
 | 
			
		||||
	BYTE $0x78
 | 
			
		||||
 | 
			
		||||
	// table[nextHash] = uint16(s)
 | 
			
		||||
	MOVQ SI, AX
 | 
			
		||||
	SUBQ DX, AX
 | 
			
		||||
 | 
			
		||||
	// XXX: MOVW AX, table-32768(SP)(R11*2)
 | 
			
		||||
	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
 | 
			
		||||
	BYTE $0x66
 | 
			
		||||
	BYTE $0x42
 | 
			
		||||
	BYTE $0x89
 | 
			
		||||
	BYTE $0x44
 | 
			
		||||
	BYTE $0x5c
 | 
			
		||||
	BYTE $0x78
 | 
			
		||||
 | 
			
		||||
	// nextHash = hash(load32(src, nextS), shift)
 | 
			
		||||
	MOVL  0(R13), R11
 | 
			
		||||
	IMULL $0x1e35a7bd, R11
 | 
			
		||||
	SHRL  CX, R11
 | 
			
		||||
 | 
			
		||||
	// if load32(src, s) != load32(src, candidate) { continue } break
 | 
			
		||||
	MOVL 0(SI), AX
 | 
			
		||||
	MOVL (DX)(R15*1), BX
 | 
			
		||||
	CMPL AX, BX
 | 
			
		||||
	JNE  inner0
 | 
			
		||||
 | 
			
		||||
fourByteMatch:
 | 
			
		||||
	// As per the encode_other.go code:
 | 
			
		||||
	//
 | 
			
		||||
	// A 4-byte match has been found. We'll later see etc.
 | 
			
		||||
 | 
			
		||||
	// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
 | 
			
		||||
	// on inputMargin in encode.go.
 | 
			
		||||
	MOVQ SI, AX
 | 
			
		||||
	SUBQ R10, AX
 | 
			
		||||
	CMPQ AX, $16
 | 
			
		||||
	JLE  emitLiteralFastPath
 | 
			
		||||
 | 
			
		||||
	// ----------------------------------------
 | 
			
		||||
	// Begin inline of the emitLiteral call.
 | 
			
		||||
	//
 | 
			
		||||
	// d += emitLiteral(dst[d:], src[nextEmit:s])
 | 
			
		||||
 | 
			
		||||
	MOVL AX, BX
 | 
			
		||||
	SUBL $1, BX
 | 
			
		||||
 | 
			
		||||
	CMPL BX, $60
 | 
			
		||||
	JLT  inlineEmitLiteralOneByte
 | 
			
		||||
	CMPL BX, $256
 | 
			
		||||
	JLT  inlineEmitLiteralTwoBytes
 | 
			
		||||
 | 
			
		||||
inlineEmitLiteralThreeBytes:
 | 
			
		||||
	MOVB $0xf4, 0(DI)
 | 
			
		||||
	MOVW BX, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
	JMP  inlineEmitLiteralMemmove
 | 
			
		||||
 | 
			
		||||
inlineEmitLiteralTwoBytes:
 | 
			
		||||
	MOVB $0xf0, 0(DI)
 | 
			
		||||
	MOVB BX, 1(DI)
 | 
			
		||||
	ADDQ $2, DI
 | 
			
		||||
	JMP  inlineEmitLiteralMemmove
 | 
			
		||||
 | 
			
		||||
inlineEmitLiteralOneByte:
 | 
			
		||||
	SHLB $2, BX
 | 
			
		||||
	MOVB BX, 0(DI)
 | 
			
		||||
	ADDQ $1, DI
 | 
			
		||||
 | 
			
		||||
inlineEmitLiteralMemmove:
 | 
			
		||||
	// Spill local variables (registers) onto the stack; call; unspill.
 | 
			
		||||
	//
 | 
			
		||||
	// copy(dst[i:], lit)
 | 
			
		||||
	//
 | 
			
		||||
	// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
 | 
			
		||||
	// DI, R10 and AX as arguments.
 | 
			
		||||
	MOVQ DI, 0(SP)
 | 
			
		||||
	MOVQ R10, 8(SP)
 | 
			
		||||
	MOVQ AX, 16(SP)
 | 
			
		||||
	ADDQ AX, DI              // Finish the "d +=" part of "d += emitLiteral(etc)".
 | 
			
		||||
	MOVQ SI, 72(SP)
 | 
			
		||||
	MOVQ DI, 80(SP)
 | 
			
		||||
	MOVQ R15, 112(SP)
 | 
			
		||||
	CALL runtime·memmove(SB)
 | 
			
		||||
	MOVQ 56(SP), CX
 | 
			
		||||
	MOVQ 64(SP), DX
 | 
			
		||||
	MOVQ 72(SP), SI
 | 
			
		||||
	MOVQ 80(SP), DI
 | 
			
		||||
	MOVQ 88(SP), R9
 | 
			
		||||
	MOVQ 112(SP), R15
 | 
			
		||||
	JMP  inner1
 | 
			
		||||
 | 
			
		||||
inlineEmitLiteralEnd:
 | 
			
		||||
	// End inline of the emitLiteral call.
 | 
			
		||||
	// ----------------------------------------
 | 
			
		||||
 | 
			
		||||
emitLiteralFastPath:
 | 
			
		||||
	// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
 | 
			
		||||
	MOVB AX, BX
 | 
			
		||||
	SUBB $1, BX
 | 
			
		||||
	SHLB $2, BX
 | 
			
		||||
	MOVB BX, (DI)
 | 
			
		||||
	ADDQ $1, DI
 | 
			
		||||
 | 
			
		||||
	// !!! Implement the copy from lit to dst as a 16-byte load and store.
 | 
			
		||||
	// (Encode's documentation says that dst and src must not overlap.)
 | 
			
		||||
	//
 | 
			
		||||
	// This always copies 16 bytes, instead of only len(lit) bytes, but that's
 | 
			
		||||
	// OK. Subsequent iterations will fix up the overrun.
 | 
			
		||||
	//
 | 
			
		||||
	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
 | 
			
		||||
	// 16-byte loads and stores. This technique probably wouldn't be as
 | 
			
		||||
	// effective on architectures that are fussier about alignment.
 | 
			
		||||
	MOVOU 0(R10), X0
 | 
			
		||||
	MOVOU X0, 0(DI)
 | 
			
		||||
	ADDQ  AX, DI
 | 
			
		||||
 | 
			
		||||
inner1:
 | 
			
		||||
	// for { etc }
 | 
			
		||||
 | 
			
		||||
	// base := s
 | 
			
		||||
	MOVQ SI, R12
 | 
			
		||||
 | 
			
		||||
	// !!! offset := base - candidate
 | 
			
		||||
	MOVQ R12, R11
 | 
			
		||||
	SUBQ R15, R11
 | 
			
		||||
	SUBQ DX, R11
 | 
			
		||||
 | 
			
		||||
	// ----------------------------------------
 | 
			
		||||
	// Begin inline of the extendMatch call.
 | 
			
		||||
	//
 | 
			
		||||
	// s = extendMatch(src, candidate+4, s+4)
 | 
			
		||||
 | 
			
		||||
	// !!! R14 = &src[len(src)]
 | 
			
		||||
	MOVQ src_len+32(FP), R14
 | 
			
		||||
	ADDQ DX, R14
 | 
			
		||||
 | 
			
		||||
	// !!! R13 = &src[len(src) - 8]
 | 
			
		||||
	MOVQ R14, R13
 | 
			
		||||
	SUBQ $8, R13
 | 
			
		||||
 | 
			
		||||
	// !!! R15 = &src[candidate + 4]
 | 
			
		||||
	ADDQ $4, R15
 | 
			
		||||
	ADDQ DX, R15
 | 
			
		||||
 | 
			
		||||
	// !!! s += 4
 | 
			
		||||
	ADDQ $4, SI
 | 
			
		||||
 | 
			
		||||
inlineExtendMatchCmp8:
 | 
			
		||||
	// As long as we are 8 or more bytes before the end of src, we can load and
 | 
			
		||||
	// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
 | 
			
		||||
	CMPQ SI, R13
 | 
			
		||||
	JA   inlineExtendMatchCmp1
 | 
			
		||||
	MOVQ (R15), AX
 | 
			
		||||
	MOVQ (SI), BX
 | 
			
		||||
	CMPQ AX, BX
 | 
			
		||||
	JNE  inlineExtendMatchBSF
 | 
			
		||||
	ADDQ $8, R15
 | 
			
		||||
	ADDQ $8, SI
 | 
			
		||||
	JMP  inlineExtendMatchCmp8
 | 
			
		||||
 | 
			
		||||
inlineExtendMatchBSF:
 | 
			
		||||
	// If those 8 bytes were not equal, XOR the two 8 byte values, and return
 | 
			
		||||
	// the index of the first byte that differs. The BSF instruction finds the
 | 
			
		||||
	// least significant 1 bit, the amd64 architecture is little-endian, and
 | 
			
		||||
	// the shift by 3 converts a bit index to a byte index.
 | 
			
		||||
	XORQ AX, BX
 | 
			
		||||
	BSFQ BX, BX
 | 
			
		||||
	SHRQ $3, BX
 | 
			
		||||
	ADDQ BX, SI
 | 
			
		||||
	JMP  inlineExtendMatchEnd
 | 
			
		||||
 | 
			
		||||
inlineExtendMatchCmp1:
 | 
			
		||||
	// In src's tail, compare 1 byte at a time.
 | 
			
		||||
	CMPQ SI, R14
 | 
			
		||||
	JAE  inlineExtendMatchEnd
 | 
			
		||||
	MOVB (R15), AX
 | 
			
		||||
	MOVB (SI), BX
 | 
			
		||||
	CMPB AX, BX
 | 
			
		||||
	JNE  inlineExtendMatchEnd
 | 
			
		||||
	ADDQ $1, R15
 | 
			
		||||
	ADDQ $1, SI
 | 
			
		||||
	JMP  inlineExtendMatchCmp1
 | 
			
		||||
 | 
			
		||||
inlineExtendMatchEnd:
 | 
			
		||||
	// End inline of the extendMatch call.
 | 
			
		||||
	// ----------------------------------------
 | 
			
		||||
 | 
			
		||||
	// ----------------------------------------
 | 
			
		||||
	// Begin inline of the emitCopy call.
 | 
			
		||||
	//
 | 
			
		||||
	// d += emitCopy(dst[d:], base-candidate, s-base)
 | 
			
		||||
 | 
			
		||||
	// !!! length := s - base
 | 
			
		||||
	MOVQ SI, AX
 | 
			
		||||
	SUBQ R12, AX
 | 
			
		||||
 | 
			
		||||
inlineEmitCopyLoop0:
 | 
			
		||||
	// for length >= 68 { etc }
 | 
			
		||||
	CMPL AX, $68
 | 
			
		||||
	JLT  inlineEmitCopyStep1
 | 
			
		||||
 | 
			
		||||
	// Emit a length 64 copy, encoded as 3 bytes.
 | 
			
		||||
	MOVB $0xfe, 0(DI)
 | 
			
		||||
	MOVW R11, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
	SUBL $64, AX
 | 
			
		||||
	JMP  inlineEmitCopyLoop0
 | 
			
		||||
 | 
			
		||||
inlineEmitCopyStep1:
 | 
			
		||||
	// if length > 64 { etc }
 | 
			
		||||
	CMPL AX, $64
 | 
			
		||||
	JLE  inlineEmitCopyStep2
 | 
			
		||||
 | 
			
		||||
	// Emit a length 60 copy, encoded as 3 bytes.
 | 
			
		||||
	MOVB $0xee, 0(DI)
 | 
			
		||||
	MOVW R11, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
	SUBL $60, AX
 | 
			
		||||
 | 
			
		||||
inlineEmitCopyStep2:
 | 
			
		||||
	// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
 | 
			
		||||
	CMPL AX, $12
 | 
			
		||||
	JGE  inlineEmitCopyStep3
 | 
			
		||||
	CMPL R11, $2048
 | 
			
		||||
	JGE  inlineEmitCopyStep3
 | 
			
		||||
 | 
			
		||||
	// Emit the remaining copy, encoded as 2 bytes.
 | 
			
		||||
	MOVB R11, 1(DI)
 | 
			
		||||
	SHRL $8, R11
 | 
			
		||||
	SHLB $5, R11
 | 
			
		||||
	SUBB $4, AX
 | 
			
		||||
	SHLB $2, AX
 | 
			
		||||
	ORB  AX, R11
 | 
			
		||||
	ORB  $1, R11
 | 
			
		||||
	MOVB R11, 0(DI)
 | 
			
		||||
	ADDQ $2, DI
 | 
			
		||||
	JMP  inlineEmitCopyEnd
 | 
			
		||||
 | 
			
		||||
inlineEmitCopyStep3:
 | 
			
		||||
	// Emit the remaining copy, encoded as 3 bytes.
 | 
			
		||||
	SUBL $1, AX
 | 
			
		||||
	SHLB $2, AX
 | 
			
		||||
	ORB  $2, AX
 | 
			
		||||
	MOVB AX, 0(DI)
 | 
			
		||||
	MOVW R11, 1(DI)
 | 
			
		||||
	ADDQ $3, DI
 | 
			
		||||
 | 
			
		||||
inlineEmitCopyEnd:
 | 
			
		||||
	// End inline of the emitCopy call.
 | 
			
		||||
	// ----------------------------------------
 | 
			
		||||
 | 
			
		||||
	// nextEmit = s
 | 
			
		||||
	MOVQ SI, R10
 | 
			
		||||
 | 
			
		||||
	// if s >= sLimit { goto emitRemainder }
 | 
			
		||||
	MOVQ SI, AX
 | 
			
		||||
	SUBQ DX, AX
 | 
			
		||||
	CMPQ AX, R9
 | 
			
		||||
	JAE  emitRemainder
 | 
			
		||||
 | 
			
		||||
	// As per the encode_other.go code:
 | 
			
		||||
	//
 | 
			
		||||
	// We could immediately etc.
 | 
			
		||||
 | 
			
		||||
	// x := load64(src, s-1)
 | 
			
		||||
	MOVQ -1(SI), R14
 | 
			
		||||
 | 
			
		||||
	// prevHash := hash(uint32(x>>0), shift)
 | 
			
		||||
	MOVL  R14, R11
 | 
			
		||||
	IMULL $0x1e35a7bd, R11
 | 
			
		||||
	SHRL  CX, R11
 | 
			
		||||
 | 
			
		||||
	// table[prevHash] = uint16(s-1)
 | 
			
		||||
	MOVQ SI, AX
 | 
			
		||||
	SUBQ DX, AX
 | 
			
		||||
	SUBQ $1, AX
 | 
			
		||||
 | 
			
		||||
	// XXX: MOVW AX, table-32768(SP)(R11*2)
 | 
			
		||||
	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
 | 
			
		||||
	BYTE $0x66
 | 
			
		||||
	BYTE $0x42
 | 
			
		||||
	BYTE $0x89
 | 
			
		||||
	BYTE $0x44
 | 
			
		||||
	BYTE $0x5c
 | 
			
		||||
	BYTE $0x78
 | 
			
		||||
 | 
			
		||||
	// currHash := hash(uint32(x>>8), shift)
 | 
			
		||||
	SHRQ  $8, R14
 | 
			
		||||
	MOVL  R14, R11
 | 
			
		||||
	IMULL $0x1e35a7bd, R11
 | 
			
		||||
	SHRL  CX, R11
 | 
			
		||||
 | 
			
		||||
	// candidate = int(table[currHash])
 | 
			
		||||
	// XXX: MOVWQZX table-32768(SP)(R11*2), R15
 | 
			
		||||
	// XXX: 4e 0f b7 7c 5c 78       movzwq 0x78(%rsp,%r11,2),%r15
 | 
			
		||||
	BYTE $0x4e
 | 
			
		||||
	BYTE $0x0f
 | 
			
		||||
	BYTE $0xb7
 | 
			
		||||
	BYTE $0x7c
 | 
			
		||||
	BYTE $0x5c
 | 
			
		||||
	BYTE $0x78
 | 
			
		||||
 | 
			
		||||
	// table[currHash] = uint16(s)
 | 
			
		||||
	ADDQ $1, AX
 | 
			
		||||
 | 
			
		||||
	// XXX: MOVW AX, table-32768(SP)(R11*2)
 | 
			
		||||
	// XXX: 66 42 89 44 5c 78       mov    %ax,0x78(%rsp,%r11,2)
 | 
			
		||||
	BYTE $0x66
 | 
			
		||||
	BYTE $0x42
 | 
			
		||||
	BYTE $0x89
 | 
			
		||||
	BYTE $0x44
 | 
			
		||||
	BYTE $0x5c
 | 
			
		||||
	BYTE $0x78
 | 
			
		||||
 | 
			
		||||
	// if uint32(x>>8) == load32(src, candidate) { continue }
 | 
			
		||||
	MOVL (DX)(R15*1), BX
 | 
			
		||||
	CMPL R14, BX
 | 
			
		||||
	JEQ  inner1
 | 
			
		||||
 | 
			
		||||
	// nextHash = hash(uint32(x>>16), shift)
 | 
			
		||||
	SHRQ  $8, R14
 | 
			
		||||
	MOVL  R14, R11
 | 
			
		||||
	IMULL $0x1e35a7bd, R11
 | 
			
		||||
	SHRL  CX, R11
 | 
			
		||||
 | 
			
		||||
	// s++
 | 
			
		||||
	ADDQ $1, SI
 | 
			
		||||
 | 
			
		||||
	// break out of the inner1 for loop, i.e. continue the outer loop.
 | 
			
		||||
	JMP outer
 | 
			
		||||
 | 
			
		||||
emitRemainder:
 | 
			
		||||
	// if nextEmit < len(src) { etc }
 | 
			
		||||
	MOVQ src_len+32(FP), AX
 | 
			
		||||
	ADDQ DX, AX
 | 
			
		||||
	CMPQ R10, AX
 | 
			
		||||
	JEQ  encodeBlockEnd
 | 
			
		||||
 | 
			
		||||
	// d += emitLiteral(dst[d:], src[nextEmit:])
 | 
			
		||||
	//
 | 
			
		||||
	// Push args.
 | 
			
		||||
	MOVQ DI, 0(SP)
 | 
			
		||||
	MOVQ $0, 8(SP)   // Unnecessary, as the callee ignores it, but conservative.
 | 
			
		||||
	MOVQ $0, 16(SP)  // Unnecessary, as the callee ignores it, but conservative.
 | 
			
		||||
	MOVQ R10, 24(SP)
 | 
			
		||||
	SUBQ R10, AX
 | 
			
		||||
	MOVQ AX, 32(SP)
 | 
			
		||||
	MOVQ AX, 40(SP)  // Unnecessary, as the callee ignores it, but conservative.
 | 
			
		||||
 | 
			
		||||
	// Spill local variables (registers) onto the stack; call; unspill.
 | 
			
		||||
	MOVQ DI, 80(SP)
 | 
			
		||||
	CALL ·emitLiteral(SB)
 | 
			
		||||
	MOVQ 80(SP), DI
 | 
			
		||||
 | 
			
		||||
	// Finish the "d +=" part of "d += emitLiteral(etc)".
 | 
			
		||||
	ADDQ 48(SP), DI
 | 
			
		||||
 | 
			
		||||
encodeBlockEnd:
 | 
			
		||||
	MOVQ dst_base+0(FP), AX
 | 
			
		||||
	SUBQ AX, DI
 | 
			
		||||
	MOVQ DI, d+48(FP)
 | 
			
		||||
	RET
 | 
			
		||||
							
								
								
									
										238
									
								
								vendor/github.com/golang/snappy/encode_other.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										238
									
								
								vendor/github.com/golang/snappy/encode_other.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,238 @@
 | 
			
		||||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// +build !amd64 appengine !gc noasm
 | 
			
		||||
 | 
			
		||||
package snappy
 | 
			
		||||
 | 
			
		||||
func load32(b []byte, i int) uint32 {
 | 
			
		||||
	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
 | 
			
		||||
	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func load64(b []byte, i int) uint64 {
 | 
			
		||||
	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
 | 
			
		||||
	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
 | 
			
		||||
		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
 | 
			
		||||
//
 | 
			
		||||
// It assumes that:
 | 
			
		||||
//	dst is long enough to hold the encoded bytes
 | 
			
		||||
//	1 <= len(lit) && len(lit) <= 65536
 | 
			
		||||
func emitLiteral(dst, lit []byte) int {
 | 
			
		||||
	i, n := 0, uint(len(lit)-1)
 | 
			
		||||
	switch {
 | 
			
		||||
	case n < 60:
 | 
			
		||||
		dst[0] = uint8(n)<<2 | tagLiteral
 | 
			
		||||
		i = 1
 | 
			
		||||
	case n < 1<<8:
 | 
			
		||||
		dst[0] = 60<<2 | tagLiteral
 | 
			
		||||
		dst[1] = uint8(n)
 | 
			
		||||
		i = 2
 | 
			
		||||
	default:
 | 
			
		||||
		dst[0] = 61<<2 | tagLiteral
 | 
			
		||||
		dst[1] = uint8(n)
 | 
			
		||||
		dst[2] = uint8(n >> 8)
 | 
			
		||||
		i = 3
 | 
			
		||||
	}
 | 
			
		||||
	return i + copy(dst[i:], lit)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// emitCopy writes a copy chunk and returns the number of bytes written.
 | 
			
		||||
//
 | 
			
		||||
// It assumes that:
 | 
			
		||||
//	dst is long enough to hold the encoded bytes
 | 
			
		||||
//	1 <= offset && offset <= 65535
 | 
			
		||||
//	4 <= length && length <= 65535
 | 
			
		||||
func emitCopy(dst []byte, offset, length int) int {
 | 
			
		||||
	i := 0
 | 
			
		||||
	// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
 | 
			
		||||
	// threshold for this loop is a little higher (at 68 = 64 + 4), and the
 | 
			
		||||
	// length emitted down below is is a little lower (at 60 = 64 - 4), because
 | 
			
		||||
	// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
 | 
			
		||||
	// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
 | 
			
		||||
	// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
 | 
			
		||||
	// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
 | 
			
		||||
	// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
 | 
			
		||||
	// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
 | 
			
		||||
	for length >= 68 {
 | 
			
		||||
		// Emit a length 64 copy, encoded as 3 bytes.
 | 
			
		||||
		dst[i+0] = 63<<2 | tagCopy2
 | 
			
		||||
		dst[i+1] = uint8(offset)
 | 
			
		||||
		dst[i+2] = uint8(offset >> 8)
 | 
			
		||||
		i += 3
 | 
			
		||||
		length -= 64
 | 
			
		||||
	}
 | 
			
		||||
	if length > 64 {
 | 
			
		||||
		// Emit a length 60 copy, encoded as 3 bytes.
 | 
			
		||||
		dst[i+0] = 59<<2 | tagCopy2
 | 
			
		||||
		dst[i+1] = uint8(offset)
 | 
			
		||||
		dst[i+2] = uint8(offset >> 8)
 | 
			
		||||
		i += 3
 | 
			
		||||
		length -= 60
 | 
			
		||||
	}
 | 
			
		||||
	if length >= 12 || offset >= 2048 {
 | 
			
		||||
		// Emit the remaining copy, encoded as 3 bytes.
 | 
			
		||||
		dst[i+0] = uint8(length-1)<<2 | tagCopy2
 | 
			
		||||
		dst[i+1] = uint8(offset)
 | 
			
		||||
		dst[i+2] = uint8(offset >> 8)
 | 
			
		||||
		return i + 3
 | 
			
		||||
	}
 | 
			
		||||
	// Emit the remaining copy, encoded as 2 bytes.
 | 
			
		||||
	dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
 | 
			
		||||
	dst[i+1] = uint8(offset)
 | 
			
		||||
	return i + 2
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// extendMatch returns the largest k such that k <= len(src) and that
 | 
			
		||||
// src[i:i+k-j] and src[j:k] have the same contents.
 | 
			
		||||
//
 | 
			
		||||
// It assumes that:
 | 
			
		||||
//	0 <= i && i < j && j <= len(src)
 | 
			
		||||
func extendMatch(src []byte, i, j int) int {
 | 
			
		||||
	for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
 | 
			
		||||
	}
 | 
			
		||||
	return j
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func hash(u, shift uint32) uint32 {
 | 
			
		||||
	return (u * 0x1e35a7bd) >> shift
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 | 
			
		||||
// assumes that the varint-encoded length of the decompressed bytes has already
 | 
			
		||||
// been written.
 | 
			
		||||
//
 | 
			
		||||
// It also assumes that:
 | 
			
		||||
//	len(dst) >= MaxEncodedLen(len(src)) &&
 | 
			
		||||
// 	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
 | 
			
		||||
func encodeBlock(dst, src []byte) (d int) {
 | 
			
		||||
	// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
 | 
			
		||||
	// The table element type is uint16, as s < sLimit and sLimit < len(src)
 | 
			
		||||
	// and len(src) <= maxBlockSize and maxBlockSize == 65536.
 | 
			
		||||
	const (
 | 
			
		||||
		maxTableSize = 1 << 14
 | 
			
		||||
		// tableMask is redundant, but helps the compiler eliminate bounds
 | 
			
		||||
		// checks.
 | 
			
		||||
		tableMask = maxTableSize - 1
 | 
			
		||||
	)
 | 
			
		||||
	shift := uint32(32 - 8)
 | 
			
		||||
	for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
 | 
			
		||||
		shift--
 | 
			
		||||
	}
 | 
			
		||||
	// In Go, all array elements are zero-initialized, so there is no advantage
 | 
			
		||||
	// to a smaller tableSize per se. However, it matches the C++ algorithm,
 | 
			
		||||
	// and in the asm versions of this code, we can get away with zeroing only
 | 
			
		||||
	// the first tableSize elements.
 | 
			
		||||
	var table [maxTableSize]uint16
 | 
			
		||||
 | 
			
		||||
	// sLimit is when to stop looking for offset/length copies. The inputMargin
 | 
			
		||||
	// lets us use a fast path for emitLiteral in the main loop, while we are
 | 
			
		||||
	// looking for copies.
 | 
			
		||||
	sLimit := len(src) - inputMargin
 | 
			
		||||
 | 
			
		||||
	// nextEmit is where in src the next emitLiteral should start from.
 | 
			
		||||
	nextEmit := 0
 | 
			
		||||
 | 
			
		||||
	// The encoded form must start with a literal, as there are no previous
 | 
			
		||||
	// bytes to copy, so we start looking for hash matches at s == 1.
 | 
			
		||||
	s := 1
 | 
			
		||||
	nextHash := hash(load32(src, s), shift)
 | 
			
		||||
 | 
			
		||||
	for {
 | 
			
		||||
		// Copied from the C++ snappy implementation:
 | 
			
		||||
		//
 | 
			
		||||
		// Heuristic match skipping: If 32 bytes are scanned with no matches
 | 
			
		||||
		// found, start looking only at every other byte. If 32 more bytes are
 | 
			
		||||
		// scanned (or skipped), look at every third byte, etc.. When a match
 | 
			
		||||
		// is found, immediately go back to looking at every byte. This is a
 | 
			
		||||
		// small loss (~5% performance, ~0.1% density) for compressible data
 | 
			
		||||
		// due to more bookkeeping, but for non-compressible data (such as
 | 
			
		||||
		// JPEG) it's a huge win since the compressor quickly "realizes" the
 | 
			
		||||
		// data is incompressible and doesn't bother looking for matches
 | 
			
		||||
		// everywhere.
 | 
			
		||||
		//
 | 
			
		||||
		// The "skip" variable keeps track of how many bytes there are since
 | 
			
		||||
		// the last match; dividing it by 32 (ie. right-shifting by five) gives
 | 
			
		||||
		// the number of bytes to move ahead for each iteration.
 | 
			
		||||
		skip := 32
 | 
			
		||||
 | 
			
		||||
		nextS := s
 | 
			
		||||
		candidate := 0
 | 
			
		||||
		for {
 | 
			
		||||
			s = nextS
 | 
			
		||||
			bytesBetweenHashLookups := skip >> 5
 | 
			
		||||
			nextS = s + bytesBetweenHashLookups
 | 
			
		||||
			skip += bytesBetweenHashLookups
 | 
			
		||||
			if nextS > sLimit {
 | 
			
		||||
				goto emitRemainder
 | 
			
		||||
			}
 | 
			
		||||
			candidate = int(table[nextHash&tableMask])
 | 
			
		||||
			table[nextHash&tableMask] = uint16(s)
 | 
			
		||||
			nextHash = hash(load32(src, nextS), shift)
 | 
			
		||||
			if load32(src, s) == load32(src, candidate) {
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// A 4-byte match has been found. We'll later see if more than 4 bytes
 | 
			
		||||
		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
 | 
			
		||||
		// them as literal bytes.
 | 
			
		||||
		d += emitLiteral(dst[d:], src[nextEmit:s])
 | 
			
		||||
 | 
			
		||||
		// Call emitCopy, and then see if another emitCopy could be our next
 | 
			
		||||
		// move. Repeat until we find no match for the input immediately after
 | 
			
		||||
		// what was consumed by the last emitCopy call.
 | 
			
		||||
		//
 | 
			
		||||
		// If we exit this loop normally then we need to call emitLiteral next,
 | 
			
		||||
		// though we don't yet know how big the literal will be. We handle that
 | 
			
		||||
		// by proceeding to the next iteration of the main loop. We also can
 | 
			
		||||
		// exit this loop via goto if we get close to exhausting the input.
 | 
			
		||||
		for {
 | 
			
		||||
			// Invariant: we have a 4-byte match at s, and no need to emit any
 | 
			
		||||
			// literal bytes prior to s.
 | 
			
		||||
			base := s
 | 
			
		||||
 | 
			
		||||
			// Extend the 4-byte match as long as possible.
 | 
			
		||||
			//
 | 
			
		||||
			// This is an inlined version of:
 | 
			
		||||
			//	s = extendMatch(src, candidate+4, s+4)
 | 
			
		||||
			s += 4
 | 
			
		||||
			for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			d += emitCopy(dst[d:], base-candidate, s-base)
 | 
			
		||||
			nextEmit = s
 | 
			
		||||
			if s >= sLimit {
 | 
			
		||||
				goto emitRemainder
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			// We could immediately start working at s now, but to improve
 | 
			
		||||
			// compression we first update the hash table at s-1 and at s. If
 | 
			
		||||
			// another emitCopy is not our next move, also calculate nextHash
 | 
			
		||||
			// at s+1. At least on GOARCH=amd64, these three hash calculations
 | 
			
		||||
			// are faster as one load64 call (with some shifts) instead of
 | 
			
		||||
			// three load32 calls.
 | 
			
		||||
			x := load64(src, s-1)
 | 
			
		||||
			prevHash := hash(uint32(x>>0), shift)
 | 
			
		||||
			table[prevHash&tableMask] = uint16(s - 1)
 | 
			
		||||
			currHash := hash(uint32(x>>8), shift)
 | 
			
		||||
			candidate = int(table[currHash&tableMask])
 | 
			
		||||
			table[currHash&tableMask] = uint16(s)
 | 
			
		||||
			if uint32(x>>8) != load32(src, candidate) {
 | 
			
		||||
				nextHash = hash(uint32(x>>16), shift)
 | 
			
		||||
				s++
 | 
			
		||||
				break
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
emitRemainder:
 | 
			
		||||
	if nextEmit < len(src) {
 | 
			
		||||
		d += emitLiteral(dst[d:], src[nextEmit:])
 | 
			
		||||
	}
 | 
			
		||||
	return d
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										22
									
								
								vendor/github.com/golang/snappy/snappy.go
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								vendor/github.com/golang/snappy/snappy.go
									
									
									
										generated
									
									
										vendored
									
									
								
							@@ -2,10 +2,21 @@
 | 
			
		||||
// Use of this source code is governed by a BSD-style
 | 
			
		||||
// license that can be found in the LICENSE file.
 | 
			
		||||
 | 
			
		||||
// Package snappy implements the snappy block-based compression format.
 | 
			
		||||
// It aims for very high speeds and reasonable compression.
 | 
			
		||||
// Package snappy implements the Snappy compression format. It aims for very
 | 
			
		||||
// high speeds and reasonable compression.
 | 
			
		||||
//
 | 
			
		||||
// The C++ snappy implementation is at https://github.com/google/snappy
 | 
			
		||||
// There are actually two Snappy formats: block and stream. They are related,
 | 
			
		||||
// but different: trying to decompress block-compressed data as a Snappy stream
 | 
			
		||||
// will fail, and vice versa. The block format is the Decode and Encode
 | 
			
		||||
// functions and the stream format is the Reader and Writer types.
 | 
			
		||||
//
 | 
			
		||||
// The block format, the more common case, is used when the complete size (the
 | 
			
		||||
// number of bytes) of the original data is known upfront, at the time
 | 
			
		||||
// compression starts. The stream format, also known as the framing format, is
 | 
			
		||||
// for when that isn't always true.
 | 
			
		||||
//
 | 
			
		||||
// The canonical, C++ implementation is at https://github.com/google/snappy and
 | 
			
		||||
// it only implements the block format.
 | 
			
		||||
package snappy // import "github.com/golang/snappy"
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
@@ -32,7 +43,10 @@ Lempel-Ziv compression algorithms. In particular:
 | 
			
		||||
  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
 | 
			
		||||
    The length is 1 + m. The offset is the little-endian unsigned integer
 | 
			
		||||
    denoted by the next 2 bytes.
 | 
			
		||||
  - For l == 3, this tag is a legacy format that is no longer supported.
 | 
			
		||||
  - For l == 3, this tag is a legacy format that is no longer issued by most
 | 
			
		||||
    encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
 | 
			
		||||
    [1, 65). The length is 1 + m. The offset is the little-endian unsigned
 | 
			
		||||
    integer denoted by the next 4 bytes.
 | 
			
		||||
*/
 | 
			
		||||
const (
 | 
			
		||||
	tagLiteral = 0x00
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user