mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Reduce repo indexer disk usage (#3452)
This commit is contained in:
		
							
								
								
									
										53
									
								
								vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								vendor/github.com/blevesearch/bleve/analysis/token/unique/unique.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,53 @@
 | 
			
		||||
//  Copyright (c) 2018 Couchbase, Inc.
 | 
			
		||||
//
 | 
			
		||||
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
// you may not use this file except in compliance with the License.
 | 
			
		||||
// You may obtain a copy of the License at
 | 
			
		||||
//
 | 
			
		||||
// 		http://www.apache.org/licenses/LICENSE-2.0
 | 
			
		||||
//
 | 
			
		||||
// Unless required by applicable law or agreed to in writing, software
 | 
			
		||||
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
			
		||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
			
		||||
// See the License for the specific language governing permissions and
 | 
			
		||||
// limitations under the License.
 | 
			
		||||
 | 
			
		||||
package unique
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"github.com/blevesearch/bleve/analysis"
 | 
			
		||||
	"github.com/blevesearch/bleve/registry"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const Name = "unique"
 | 
			
		||||
 | 
			
		||||
// UniqueTermFilter retains only the tokens which mark the first occurence of
 | 
			
		||||
// a term. Tokens whose term appears in a preceding token are dropped.
 | 
			
		||||
type UniqueTermFilter struct{}
 | 
			
		||||
 | 
			
		||||
func NewUniqueTermFilter() *UniqueTermFilter {
 | 
			
		||||
	return &UniqueTermFilter{}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (f *UniqueTermFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
 | 
			
		||||
	encounteredTerms := make(map[string]struct{}, len(input)/4)
 | 
			
		||||
	j := 0
 | 
			
		||||
	for _, token := range input {
 | 
			
		||||
		term := string(token.Term)
 | 
			
		||||
		if _, ok := encounteredTerms[term]; ok {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		encounteredTerms[term] = struct{}{}
 | 
			
		||||
		input[j] = token
 | 
			
		||||
		j++
 | 
			
		||||
	}
 | 
			
		||||
	return input[:j]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func UniqueTermFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
 | 
			
		||||
	return NewUniqueTermFilter(), nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func init() {
 | 
			
		||||
	registry.RegisterTokenFilter(Name, UniqueTermFilterConstructor)
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										173
									
								
								vendor/github.com/ethantkoenig/rupture/Gopkg.lock
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										173
									
								
								vendor/github.com/ethantkoenig/rupture/Gopkg.lock
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,173 @@
 | 
			
		||||
# This file is autogenerated, do not edit; changes may be undone by the next 'dep ensure'.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "github.com/RoaringBitmap/roaring"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "84551f0e309d6f9bafa428ef39b31ab7f16ff7b8"
 | 
			
		||||
  version = "v0.4.1"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/Smerity/govarint"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "7265e41f48f15fd61751e16da866af3c704bb3ab"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "github.com/blevesearch/bleve"
 | 
			
		||||
  packages = [
 | 
			
		||||
    ".",
 | 
			
		||||
    "analysis",
 | 
			
		||||
    "analysis/analyzer/standard",
 | 
			
		||||
    "analysis/datetime/flexible",
 | 
			
		||||
    "analysis/datetime/optional",
 | 
			
		||||
    "analysis/lang/en",
 | 
			
		||||
    "analysis/token/lowercase",
 | 
			
		||||
    "analysis/token/porter",
 | 
			
		||||
    "analysis/token/stop",
 | 
			
		||||
    "analysis/tokenizer/unicode",
 | 
			
		||||
    "document",
 | 
			
		||||
    "geo",
 | 
			
		||||
    "index",
 | 
			
		||||
    "index/scorch",
 | 
			
		||||
    "index/scorch/mergeplan",
 | 
			
		||||
    "index/scorch/segment",
 | 
			
		||||
    "index/scorch/segment/mem",
 | 
			
		||||
    "index/scorch/segment/zap",
 | 
			
		||||
    "index/store",
 | 
			
		||||
    "index/store/boltdb",
 | 
			
		||||
    "index/store/gtreap",
 | 
			
		||||
    "index/upsidedown",
 | 
			
		||||
    "mapping",
 | 
			
		||||
    "numeric",
 | 
			
		||||
    "registry",
 | 
			
		||||
    "search",
 | 
			
		||||
    "search/collector",
 | 
			
		||||
    "search/facet",
 | 
			
		||||
    "search/highlight",
 | 
			
		||||
    "search/highlight/format/html",
 | 
			
		||||
    "search/highlight/fragmenter/simple",
 | 
			
		||||
    "search/highlight/highlighter/html",
 | 
			
		||||
    "search/highlight/highlighter/simple",
 | 
			
		||||
    "search/query",
 | 
			
		||||
    "search/scorer",
 | 
			
		||||
    "search/searcher"
 | 
			
		||||
  ]
 | 
			
		||||
  revision = "a3b125508b4443344b596888ca58467b6c9310b9"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/blevesearch/go-porterstemmer"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "23a2c8e5cf1f380f27722c6d2ae8896431dc7d0e"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/blevesearch/segment"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "762005e7a34fd909a84586299f1dd457371d36ee"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/boltdb/bolt"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "9da31745363232bc1e27dbab3569e77383a51585"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/couchbase/vellum"
 | 
			
		||||
  packages = [
 | 
			
		||||
    ".",
 | 
			
		||||
    "regexp",
 | 
			
		||||
    "utf8"
 | 
			
		||||
  ]
 | 
			
		||||
  revision = "ed84a675e24ed0a0bf6859b1ddec7e7c858354bd"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "github.com/davecgh/go-spew"
 | 
			
		||||
  packages = ["spew"]
 | 
			
		||||
  revision = "346938d642f2ec3594ed81d874461961cd0faa76"
 | 
			
		||||
  version = "v1.1.0"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/edsrzf/mmap-go"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "0bce6a6887123b67a60366d2c9fe2dfb74289d2e"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/glycerine/go-unsnap-stream"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "62a9a9eb44fd8932157b1a8ace2149eff5971af6"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "github.com/golang/protobuf"
 | 
			
		||||
  packages = ["proto"]
 | 
			
		||||
  revision = "925541529c1fa6821df4e44ce2723319eb2be768"
 | 
			
		||||
  version = "v1.0.0"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/golang/snappy"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "553a641470496b2327abcac10b36396bd98e45c9"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/mschoch/smat"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "90eadee771aeab36e8bf796039b8c261bebebe4f"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "github.com/philhofer/fwd"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "bb6d471dc95d4fe11e432687f8b70ff496cf3136"
 | 
			
		||||
  version = "v1.0.0"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "github.com/pmezard/go-difflib"
 | 
			
		||||
  packages = ["difflib"]
 | 
			
		||||
  revision = "792786c7400a136282c1664665ae0a8db921c6c2"
 | 
			
		||||
  version = "v1.0.0"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/steveyen/gtreap"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "0abe01ef9be25c4aedc174758ec2d917314d6d70"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "github.com/stretchr/testify"
 | 
			
		||||
  packages = ["assert"]
 | 
			
		||||
  revision = "12b6f73e6084dad08a7c6e575284b177ecafbc71"
 | 
			
		||||
  version = "v1.2.1"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/tinylib/msgp"
 | 
			
		||||
  packages = ["msgp"]
 | 
			
		||||
  revision = "03a79185462ad029a6e7e05b2f3f3e0498d0a6c0"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  branch = "master"
 | 
			
		||||
  name = "github.com/willf/bitset"
 | 
			
		||||
  packages = ["."]
 | 
			
		||||
  revision = "1a37ad96e8c1a11b20900a232874843b5174221f"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "golang.org/x/net"
 | 
			
		||||
  packages = ["context"]
 | 
			
		||||
  revision = "309822c5b9b9f80db67f016069a12628d94fad34"
 | 
			
		||||
 | 
			
		||||
[[projects]]
 | 
			
		||||
  name = "golang.org/x/sys"
 | 
			
		||||
  packages = ["unix"]
 | 
			
		||||
  revision = "3dbebcf8efb6a5011a60c2b4591c1022a759af8a"
 | 
			
		||||
 | 
			
		||||
[solve-meta]
 | 
			
		||||
  analyzer-name = "dep"
 | 
			
		||||
  analyzer-version = 1
 | 
			
		||||
  inputs-digest = "61c759f0c1136cadf86ae8a30bb78edf33fc844cdcb2316469b4ae14a8d051b0"
 | 
			
		||||
  solver-name = "gps-cdcl"
 | 
			
		||||
  solver-version = 1
 | 
			
		||||
							
								
								
									
										34
									
								
								vendor/github.com/ethantkoenig/rupture/Gopkg.toml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								vendor/github.com/ethantkoenig/rupture/Gopkg.toml
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,34 @@
 | 
			
		||||
# Gopkg.toml example
 | 
			
		||||
#
 | 
			
		||||
# Refer to https://github.com/golang/dep/blob/master/docs/Gopkg.toml.md
 | 
			
		||||
# for detailed Gopkg.toml documentation.
 | 
			
		||||
#
 | 
			
		||||
# required = ["github.com/user/thing/cmd/thing"]
 | 
			
		||||
# ignored = ["github.com/user/project/pkgX", "bitbucket.org/user/project/pkgA/pkgY"]
 | 
			
		||||
#
 | 
			
		||||
# [[constraint]]
 | 
			
		||||
#   name = "github.com/user/project"
 | 
			
		||||
#   version = "1.0.0"
 | 
			
		||||
#
 | 
			
		||||
# [[constraint]]
 | 
			
		||||
#   name = "github.com/user/project2"
 | 
			
		||||
#   branch = "dev"
 | 
			
		||||
#   source = "github.com/myfork/project2"
 | 
			
		||||
#
 | 
			
		||||
# [[override]]
 | 
			
		||||
#   name = "github.com/x/y"
 | 
			
		||||
#   version = "2.4.0"
 | 
			
		||||
#
 | 
			
		||||
# [prune]
 | 
			
		||||
#   non-go = false
 | 
			
		||||
#   go-tests = true
 | 
			
		||||
#   unused-packages = true
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[[constraint]]
 | 
			
		||||
  name = "github.com/stretchr/testify"
 | 
			
		||||
  version = "1.2.1"
 | 
			
		||||
 | 
			
		||||
[prune]
 | 
			
		||||
  go-tests = true
 | 
			
		||||
  unused-packages = true
 | 
			
		||||
							
								
								
									
										21
									
								
								vendor/github.com/ethantkoenig/rupture/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								vendor/github.com/ethantkoenig/rupture/LICENSE
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,21 @@
 | 
			
		||||
MIT License
 | 
			
		||||
 | 
			
		||||
Copyright (c) 2018 Ethan Koenig
 | 
			
		||||
 | 
			
		||||
Permission is hereby granted, free of charge, to any person obtaining a copy
 | 
			
		||||
of this software and associated documentation files (the "Software"), to deal
 | 
			
		||||
in the Software without restriction, including without limitation the rights
 | 
			
		||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 | 
			
		||||
copies of the Software, and to permit persons to whom the Software is
 | 
			
		||||
furnished to do so, subject to the following conditions:
 | 
			
		||||
 | 
			
		||||
The above copyright notice and this permission notice shall be included in all
 | 
			
		||||
copies or substantial portions of the Software.
 | 
			
		||||
 | 
			
		||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 | 
			
		||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 | 
			
		||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 | 
			
		||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 | 
			
		||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 | 
			
		||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 | 
			
		||||
SOFTWARE.
 | 
			
		||||
							
								
								
									
										13
									
								
								vendor/github.com/ethantkoenig/rupture/README.md
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								vendor/github.com/ethantkoenig/rupture/README.md
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,13 @@
 | 
			
		||||
# rupture
 | 
			
		||||
 | 
			
		||||
[](https://travis-ci.org/ethantkoenig/rupture) [](https://godoc.org/github.com/ethantkoenig/rupture) [](https://goreportcard.com/report/blevesearch/bleve)
 | 
			
		||||
 | 
			
		||||
An explosive companion to the [bleve indexing library](https://www.github.com/blevesearch/bleve)
 | 
			
		||||
 | 
			
		||||
## Features
 | 
			
		||||
 | 
			
		||||
`rupture` includes the following additions to `bleve`:
 | 
			
		||||
 | 
			
		||||
- __Flushing batches__: Batches of operation which automatically flush to the underlying bleve index.
 | 
			
		||||
- __Sharded indices__: An index-like abstraction built on top of several underlying indices. Sharded indices provide lower write latencies for indices with large amounts of data.
 | 
			
		||||
- __Index metadata__: Track index version for easily managing migrations and schema changes.
 | 
			
		||||
							
								
								
									
										67
									
								
								vendor/github.com/ethantkoenig/rupture/flushing_batch.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								vendor/github.com/ethantkoenig/rupture/flushing_batch.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,67 @@
 | 
			
		||||
package rupture
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"github.com/blevesearch/bleve"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// FlushingBatch is a batch of operations that automatically flushes to the
 | 
			
		||||
// underlying index once it reaches a certain size.
 | 
			
		||||
type FlushingBatch interface {
 | 
			
		||||
	// Index adds the specified index operation batch, possibly triggering a
 | 
			
		||||
	// flush.
 | 
			
		||||
	Index(id string, data interface{}) error
 | 
			
		||||
	// Remove adds the specified delete operation to the batch, possibly
 | 
			
		||||
	// triggering a flush.
 | 
			
		||||
	Delete(id string) error
 | 
			
		||||
	// Flush flushes the batch's contents.
 | 
			
		||||
	Flush() error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type singleIndexFlushingBatch struct {
 | 
			
		||||
	maxBatchSize int
 | 
			
		||||
	batch        *bleve.Batch
 | 
			
		||||
	index        bleve.Index
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func newFlushingBatch(index bleve.Index, maxBatchSize int) *singleIndexFlushingBatch {
 | 
			
		||||
	return &singleIndexFlushingBatch{
 | 
			
		||||
		maxBatchSize: maxBatchSize,
 | 
			
		||||
		batch:        index.NewBatch(),
 | 
			
		||||
		index:        index,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewFlushingBatch creates a new flushing batch for the specified index. Once
 | 
			
		||||
// the number of operations in the batch reaches the specified limit, the batch
 | 
			
		||||
// automatically flushes its operations to the index.
 | 
			
		||||
func NewFlushingBatch(index bleve.Index, maxBatchSize int) FlushingBatch {
 | 
			
		||||
	return newFlushingBatch(index, maxBatchSize)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *singleIndexFlushingBatch) Index(id string, data interface{}) error {
 | 
			
		||||
	if err := b.batch.Index(id, data); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return b.flushIfFull()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *singleIndexFlushingBatch) Delete(id string) error {
 | 
			
		||||
	b.batch.Delete(id)
 | 
			
		||||
	return b.flushIfFull()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *singleIndexFlushingBatch) flushIfFull() error {
 | 
			
		||||
	if b.batch.Size() < b.maxBatchSize {
 | 
			
		||||
		return nil
 | 
			
		||||
	}
 | 
			
		||||
	return b.Flush()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *singleIndexFlushingBatch) Flush() error {
 | 
			
		||||
	err := b.index.Batch(b.batch)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	b.batch.Reset()
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										68
									
								
								vendor/github.com/ethantkoenig/rupture/metadata.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								vendor/github.com/ethantkoenig/rupture/metadata.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
package rupture
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"encoding/json"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"os"
 | 
			
		||||
	"path/filepath"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
const metaFilename = "rupture_meta.json"
 | 
			
		||||
 | 
			
		||||
func indexMetadataPath(dir string) string {
 | 
			
		||||
	return filepath.Join(dir, metaFilename)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IndexMetadata contains metadata about a bleve index.
 | 
			
		||||
type IndexMetadata struct {
 | 
			
		||||
	// The version of the data in the index. This can be useful for tracking
 | 
			
		||||
	// schema changes or data migrations.
 | 
			
		||||
	Version int `json:"version"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// in addition to the user-exposed metadata, we keep additional, internal-only
 | 
			
		||||
// metadata for sharded indices.
 | 
			
		||||
const shardedMetadataFilename = "rupture_sharded_meta.json"
 | 
			
		||||
 | 
			
		||||
func shardedIndexMetadataPath(dir string) string {
 | 
			
		||||
	return filepath.Join(dir, shardedMetadataFilename)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type shardedIndexMetadata struct {
 | 
			
		||||
	NumShards int `json:"num_shards"`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func readJSON(path string, meta interface{}) error {
 | 
			
		||||
	metaBytes, err := ioutil.ReadFile(path)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return json.Unmarshal(metaBytes, meta)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func writeJSON(path string, meta interface{}) error {
 | 
			
		||||
	metaBytes, err := json.Marshal(meta)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	return ioutil.WriteFile(path, metaBytes, 0666)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ReadIndexMetadata returns the metadata for the index at the specified path.
 | 
			
		||||
// If no such index metadata exists, an empty metadata and a nil error are
 | 
			
		||||
// returned.
 | 
			
		||||
func ReadIndexMetadata(path string) (*IndexMetadata, error) {
 | 
			
		||||
	meta := &IndexMetadata{}
 | 
			
		||||
	metaPath := indexMetadataPath(path)
 | 
			
		||||
	if _, err := os.Stat(metaPath); os.IsNotExist(err) {
 | 
			
		||||
		return meta, nil
 | 
			
		||||
	} else if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
	return meta, readJSON(metaPath, meta)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// WriteIndexMetadata writes metadata for the index at the specified path.
 | 
			
		||||
func WriteIndexMetadata(path string, meta *IndexMetadata) error {
 | 
			
		||||
	return writeJSON(indexMetadataPath(path), meta)
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										146
									
								
								vendor/github.com/ethantkoenig/rupture/sharded_index.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								vendor/github.com/ethantkoenig/rupture/sharded_index.go
									
									
									
										generated
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,146 @@
 | 
			
		||||
package rupture
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"hash/fnv"
 | 
			
		||||
	"path/filepath"
 | 
			
		||||
	"strconv"
 | 
			
		||||
 | 
			
		||||
	"github.com/blevesearch/bleve"
 | 
			
		||||
	"github.com/blevesearch/bleve/document"
 | 
			
		||||
	"github.com/blevesearch/bleve/mapping"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// ShardedIndex an index that is built onto of multiple underlying bleve
 | 
			
		||||
// indices (i.e. shards). Similar to bleve's index aliases, some methods may
 | 
			
		||||
// not be supported.
 | 
			
		||||
type ShardedIndex interface {
 | 
			
		||||
	bleve.Index
 | 
			
		||||
	shards() []bleve.Index
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// a type alias for bleve.Index, so that the anonymous field of
 | 
			
		||||
// shardedIndex does not conflict with the Index(..) method.
 | 
			
		||||
type bleveIndex bleve.Index
 | 
			
		||||
 | 
			
		||||
type shardedIndex struct {
 | 
			
		||||
	bleveIndex
 | 
			
		||||
	indices []bleve.Index
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func hash(id string, n int) uint64 {
 | 
			
		||||
	fnvHash := fnv.New64()
 | 
			
		||||
	fnvHash.Write([]byte(id))
 | 
			
		||||
	return fnvHash.Sum64() % uint64(n)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func childIndexerPath(rootPath string, i int) string {
 | 
			
		||||
	return filepath.Join(rootPath, strconv.Itoa(i))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewShardedIndex creates a sharded index at the specified path, with the
 | 
			
		||||
// specified mapping and number of shards.
 | 
			
		||||
func NewShardedIndex(path string, mapping mapping.IndexMapping, numShards int) (ShardedIndex, error) {
 | 
			
		||||
	if numShards <= 0 {
 | 
			
		||||
		return nil, fmt.Errorf("Invalid number of shards: %d", numShards)
 | 
			
		||||
	}
 | 
			
		||||
	err := writeJSON(shardedIndexMetadataPath(path), &shardedIndexMetadata{NumShards: numShards})
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	s := &shardedIndex{
 | 
			
		||||
		indices: make([]bleve.Index, numShards),
 | 
			
		||||
	}
 | 
			
		||||
	for i := 0; i < numShards; i++ {
 | 
			
		||||
		s.indices[i], err = bleve.New(childIndexerPath(path, i), mapping)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	s.bleveIndex = bleve.NewIndexAlias(s.indices...)
 | 
			
		||||
	return s, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// OpenShardedIndex opens a sharded index at the specified path.
 | 
			
		||||
func OpenShardedIndex(path string) (ShardedIndex, error) {
 | 
			
		||||
	var meta shardedIndexMetadata
 | 
			
		||||
	var err error
 | 
			
		||||
	if err = readJSON(shardedIndexMetadataPath(path), &meta); err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	s := &shardedIndex{
 | 
			
		||||
		indices: make([]bleve.Index, meta.NumShards),
 | 
			
		||||
	}
 | 
			
		||||
	for i := 0; i < meta.NumShards; i++ {
 | 
			
		||||
		s.indices[i], err = bleve.Open(childIndexerPath(path, i))
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			return nil, err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	s.bleveIndex = bleve.NewIndexAlias(s.indices...)
 | 
			
		||||
	return s, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *shardedIndex) Index(id string, data interface{}) error {
 | 
			
		||||
	return s.indices[hash(id, len(s.indices))].Index(id, data)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *shardedIndex) Delete(id string) error {
 | 
			
		||||
	return s.indices[hash(id, len(s.indices))].Delete(id)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *shardedIndex) Document(id string) (*document.Document, error) {
 | 
			
		||||
	return s.indices[hash(id, len(s.indices))].Document(id)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *shardedIndex) Close() error {
 | 
			
		||||
	if err := s.bleveIndex.Close(); err != nil {
 | 
			
		||||
		return err
 | 
			
		||||
	}
 | 
			
		||||
	for _, index := range s.indices {
 | 
			
		||||
		if err := index.Close(); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (s *shardedIndex) shards() []bleve.Index {
 | 
			
		||||
	return s.indices
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type shardedIndexFlushingBatch struct {
 | 
			
		||||
	batches []*singleIndexFlushingBatch
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewShardedFlushingBatch creates a flushing batch with the specified batch
 | 
			
		||||
// size for the specified sharded index.
 | 
			
		||||
func NewShardedFlushingBatch(index ShardedIndex, maxBatchSize int) FlushingBatch {
 | 
			
		||||
	indices := index.shards()
 | 
			
		||||
	b := &shardedIndexFlushingBatch{
 | 
			
		||||
		batches: make([]*singleIndexFlushingBatch, len(indices)),
 | 
			
		||||
	}
 | 
			
		||||
	for i, index := range indices {
 | 
			
		||||
		b.batches[i] = newFlushingBatch(index, maxBatchSize)
 | 
			
		||||
	}
 | 
			
		||||
	return b
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *shardedIndexFlushingBatch) Index(id string, data interface{}) error {
 | 
			
		||||
	return b.batches[hash(id, len(b.batches))].Index(id, data)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *shardedIndexFlushingBatch) Delete(id string) error {
 | 
			
		||||
	return b.batches[hash(id, len(b.batches))].Delete(id)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (b *shardedIndexFlushingBatch) Flush() error {
 | 
			
		||||
	for _, batch := range b.batches {
 | 
			
		||||
		if err := batch.Flush(); err != nil {
 | 
			
		||||
			return err
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user