mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 08:30:25 +08:00 
			
		
		
		
	* Fix bug on migration 111 * Upgrade bleve to 1.0.10 Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io>
		
			
				
	
	
		
			377 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			377 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
//  Copyright (c) 2014 Couchbase, Inc.
 | 
						|
//
 | 
						|
// Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
// you may not use this file except in compliance with the License.
 | 
						|
// You may obtain a copy of the License at
 | 
						|
//
 | 
						|
// 		http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
//
 | 
						|
// Unless required by applicable law or agreed to in writing, software
 | 
						|
// distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
// See the License for the specific language governing permissions and
 | 
						|
// limitations under the License.
 | 
						|
 | 
						|
package index
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"encoding/json"
 | 
						|
	"fmt"
 | 
						|
	"reflect"
 | 
						|
 | 
						|
	"github.com/blevesearch/bleve/document"
 | 
						|
	"github.com/blevesearch/bleve/index/store"
 | 
						|
	"github.com/blevesearch/bleve/size"
 | 
						|
)
 | 
						|
 | 
						|
var reflectStaticSizeTermFieldDoc int
 | 
						|
var reflectStaticSizeTermFieldVector int
 | 
						|
 | 
						|
func init() {
 | 
						|
	var tfd TermFieldDoc
 | 
						|
	reflectStaticSizeTermFieldDoc = int(reflect.TypeOf(tfd).Size())
 | 
						|
	var tfv TermFieldVector
 | 
						|
	reflectStaticSizeTermFieldVector = int(reflect.TypeOf(tfv).Size())
 | 
						|
}
 | 
						|
 | 
						|
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
 | 
						|
 | 
						|
type Index interface {
 | 
						|
	Open() error
 | 
						|
	Close() error
 | 
						|
 | 
						|
	Update(doc *document.Document) error
 | 
						|
	Delete(id string) error
 | 
						|
	Batch(batch *Batch) error
 | 
						|
 | 
						|
	SetInternal(key, val []byte) error
 | 
						|
	DeleteInternal(key []byte) error
 | 
						|
 | 
						|
	// Reader returns a low-level accessor on the index data. Close it to
 | 
						|
	// release associated resources.
 | 
						|
	Reader() (IndexReader, error)
 | 
						|
 | 
						|
	Stats() json.Marshaler
 | 
						|
	StatsMap() map[string]interface{}
 | 
						|
 | 
						|
	Analyze(d *document.Document) *AnalysisResult
 | 
						|
 | 
						|
	Advanced() (store.KVStore, error)
 | 
						|
}
 | 
						|
 | 
						|
type DocumentFieldTermVisitor func(field string, term []byte)
 | 
						|
 | 
						|
type IndexReader interface {
 | 
						|
	TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (TermFieldReader, error)
 | 
						|
 | 
						|
	// DocIDReader returns an iterator over all doc ids
 | 
						|
	// The caller must close returned instance to release associated resources.
 | 
						|
	DocIDReaderAll() (DocIDReader, error)
 | 
						|
 | 
						|
	DocIDReaderOnly(ids []string) (DocIDReader, error)
 | 
						|
 | 
						|
	FieldDict(field string) (FieldDict, error)
 | 
						|
 | 
						|
	// FieldDictRange is currently defined to include the start and end terms
 | 
						|
	FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error)
 | 
						|
	FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
 | 
						|
 | 
						|
	Document(id string) (*document.Document, error)
 | 
						|
	DocumentVisitFieldTerms(id IndexInternalID, fields []string, visitor DocumentFieldTermVisitor) error
 | 
						|
 | 
						|
	DocValueReader(fields []string) (DocValueReader, error)
 | 
						|
 | 
						|
	Fields() ([]string, error)
 | 
						|
 | 
						|
	GetInternal(key []byte) ([]byte, error)
 | 
						|
 | 
						|
	DocCount() (uint64, error)
 | 
						|
 | 
						|
	ExternalID(id IndexInternalID) (string, error)
 | 
						|
	InternalID(id string) (IndexInternalID, error)
 | 
						|
 | 
						|
	DumpAll() chan interface{}
 | 
						|
	DumpDoc(id string) chan interface{}
 | 
						|
	DumpFields() chan interface{}
 | 
						|
 | 
						|
	Close() error
 | 
						|
}
 | 
						|
 | 
						|
// The Regexp interface defines the subset of the regexp.Regexp API
 | 
						|
// methods that are used by bleve indexes, allowing callers to pass in
 | 
						|
// alternate implementations.
 | 
						|
type Regexp interface {
 | 
						|
	FindStringIndex(s string) (loc []int)
 | 
						|
 | 
						|
	LiteralPrefix() (prefix string, complete bool)
 | 
						|
 | 
						|
	String() string
 | 
						|
}
 | 
						|
 | 
						|
type IndexReaderRegexp interface {
 | 
						|
	FieldDictRegexp(field string, regex string) (FieldDict, error)
 | 
						|
}
 | 
						|
 | 
						|
type IndexReaderFuzzy interface {
 | 
						|
	FieldDictFuzzy(field string, term string, fuzziness int, prefix string) (FieldDict, error)
 | 
						|
}
 | 
						|
 | 
						|
type IndexReaderOnly interface {
 | 
						|
	FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
 | 
						|
}
 | 
						|
 | 
						|
type IndexReaderContains interface {
 | 
						|
	FieldDictContains(field string) (FieldDictContains, error)
 | 
						|
}
 | 
						|
 | 
						|
// FieldTerms contains the terms used by a document, keyed by field
 | 
						|
type FieldTerms map[string][]string
 | 
						|
 | 
						|
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
 | 
						|
func (f FieldTerms) FieldsNotYetCached(fields []string) []string {
 | 
						|
	rv := make([]string, 0, len(fields))
 | 
						|
	for _, field := range fields {
 | 
						|
		if _, ok := f[field]; !ok {
 | 
						|
			rv = append(rv, field)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return rv
 | 
						|
}
 | 
						|
 | 
						|
// Merge will combine two FieldTerms
 | 
						|
// it assumes that the terms lists are complete (thus do not need to be merged)
 | 
						|
// field terms from the other list always replace the ones in the receiver
 | 
						|
func (f FieldTerms) Merge(other FieldTerms) {
 | 
						|
	for field, terms := range other {
 | 
						|
		f[field] = terms
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
type TermFieldVector struct {
 | 
						|
	Field          string
 | 
						|
	ArrayPositions []uint64
 | 
						|
	Pos            uint64
 | 
						|
	Start          uint64
 | 
						|
	End            uint64
 | 
						|
}
 | 
						|
 | 
						|
func (tfv *TermFieldVector) Size() int {
 | 
						|
	return reflectStaticSizeTermFieldVector + size.SizeOfPtr +
 | 
						|
		len(tfv.Field) + len(tfv.ArrayPositions)*size.SizeOfUint64
 | 
						|
}
 | 
						|
 | 
						|
// IndexInternalID is an opaque document identifier interal to the index impl
 | 
						|
type IndexInternalID []byte
 | 
						|
 | 
						|
func (id IndexInternalID) Equals(other IndexInternalID) bool {
 | 
						|
	return id.Compare(other) == 0
 | 
						|
}
 | 
						|
 | 
						|
func (id IndexInternalID) Compare(other IndexInternalID) int {
 | 
						|
	return bytes.Compare(id, other)
 | 
						|
}
 | 
						|
 | 
						|
type TermFieldDoc struct {
 | 
						|
	Term    string
 | 
						|
	ID      IndexInternalID
 | 
						|
	Freq    uint64
 | 
						|
	Norm    float64
 | 
						|
	Vectors []*TermFieldVector
 | 
						|
}
 | 
						|
 | 
						|
func (tfd *TermFieldDoc) Size() int {
 | 
						|
	sizeInBytes := reflectStaticSizeTermFieldDoc + size.SizeOfPtr +
 | 
						|
		len(tfd.Term) + len(tfd.ID)
 | 
						|
 | 
						|
	for _, entry := range tfd.Vectors {
 | 
						|
		sizeInBytes += entry.Size()
 | 
						|
	}
 | 
						|
 | 
						|
	return sizeInBytes
 | 
						|
}
 | 
						|
 | 
						|
// Reset allows an already allocated TermFieldDoc to be reused
 | 
						|
func (tfd *TermFieldDoc) Reset() *TermFieldDoc {
 | 
						|
	// remember the []byte used for the ID
 | 
						|
	id := tfd.ID
 | 
						|
	vectors := tfd.Vectors
 | 
						|
	// idiom to copy over from empty TermFieldDoc (0 allocations)
 | 
						|
	*tfd = TermFieldDoc{}
 | 
						|
	// reuse the []byte already allocated (and reset len to 0)
 | 
						|
	tfd.ID = id[:0]
 | 
						|
	tfd.Vectors = vectors[:0]
 | 
						|
	return tfd
 | 
						|
}
 | 
						|
 | 
						|
// TermFieldReader is the interface exposing the enumeration of documents
 | 
						|
// containing a given term in a given field. Documents are returned in byte
 | 
						|
// lexicographic order over their identifiers.
 | 
						|
type TermFieldReader interface {
 | 
						|
	// Next returns the next document containing the term in this field, or nil
 | 
						|
	// when it reaches the end of the enumeration.  The preAlloced TermFieldDoc
 | 
						|
	// is optional, and when non-nil, will be used instead of allocating memory.
 | 
						|
	Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
 | 
						|
 | 
						|
	// Advance resets the enumeration at specified document or its immediate
 | 
						|
	// follower.
 | 
						|
	Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
 | 
						|
 | 
						|
	// Count returns the number of documents contains the term in this field.
 | 
						|
	Count() uint64
 | 
						|
	Close() error
 | 
						|
 | 
						|
	Size() int
 | 
						|
}
 | 
						|
 | 
						|
type DictEntry struct {
 | 
						|
	Term  string
 | 
						|
	Count uint64
 | 
						|
}
 | 
						|
 | 
						|
type FieldDict interface {
 | 
						|
	Next() (*DictEntry, error)
 | 
						|
	Close() error
 | 
						|
}
 | 
						|
 | 
						|
type FieldDictContains interface {
 | 
						|
	Contains(key []byte) (bool, error)
 | 
						|
}
 | 
						|
 | 
						|
// DocIDReader is the interface exposing enumeration of documents identifiers.
 | 
						|
// Close the reader to release associated resources.
 | 
						|
type DocIDReader interface {
 | 
						|
	// Next returns the next document internal identifier in the natural
 | 
						|
	// index order, nil when the end of the sequence is reached.
 | 
						|
	Next() (IndexInternalID, error)
 | 
						|
 | 
						|
	// Advance resets the iteration to the first internal identifier greater than
 | 
						|
	// or equal to ID. If ID is smaller than the start of the range, the iteration
 | 
						|
	// will start there instead. If ID is greater than or equal to the end of
 | 
						|
	// the range, Next() call will return io.EOF.
 | 
						|
	Advance(ID IndexInternalID) (IndexInternalID, error)
 | 
						|
 | 
						|
	Size() int
 | 
						|
 | 
						|
	Close() error
 | 
						|
}
 | 
						|
 | 
						|
type BatchCallback func(error)
 | 
						|
 | 
						|
type Batch struct {
 | 
						|
	IndexOps          map[string]*document.Document
 | 
						|
	InternalOps       map[string][]byte
 | 
						|
	persistedCallback BatchCallback
 | 
						|
}
 | 
						|
 | 
						|
func NewBatch() *Batch {
 | 
						|
	return &Batch{
 | 
						|
		IndexOps:    make(map[string]*document.Document),
 | 
						|
		InternalOps: make(map[string][]byte),
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) Update(doc *document.Document) {
 | 
						|
	b.IndexOps[doc.ID] = doc
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) Delete(id string) {
 | 
						|
	b.IndexOps[id] = nil
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) SetInternal(key, val []byte) {
 | 
						|
	b.InternalOps[string(key)] = val
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) DeleteInternal(key []byte) {
 | 
						|
	b.InternalOps[string(key)] = nil
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) SetPersistedCallback(f BatchCallback) {
 | 
						|
	b.persistedCallback = f
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) PersistedCallback() BatchCallback {
 | 
						|
	return b.persistedCallback
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) String() string {
 | 
						|
	rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
 | 
						|
	for k, v := range b.IndexOps {
 | 
						|
		if v != nil {
 | 
						|
			rv += fmt.Sprintf("\tINDEX - '%s'\n", k)
 | 
						|
		} else {
 | 
						|
			rv += fmt.Sprintf("\tDELETE - '%s'\n", k)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	for k, v := range b.InternalOps {
 | 
						|
		if v != nil {
 | 
						|
			rv += fmt.Sprintf("\tSET INTERNAL - '%s'\n", k)
 | 
						|
		} else {
 | 
						|
			rv += fmt.Sprintf("\tDELETE INTERNAL - '%s'\n", k)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return rv
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) Reset() {
 | 
						|
	b.IndexOps = make(map[string]*document.Document)
 | 
						|
	b.InternalOps = make(map[string][]byte)
 | 
						|
	b.persistedCallback = nil
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) Merge(o *Batch) {
 | 
						|
	for k, v := range o.IndexOps {
 | 
						|
		b.IndexOps[k] = v
 | 
						|
	}
 | 
						|
	for k, v := range o.InternalOps {
 | 
						|
		b.InternalOps[k] = v
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
func (b *Batch) TotalDocSize() int {
 | 
						|
	var s int
 | 
						|
	for k, v := range b.IndexOps {
 | 
						|
		if v != nil {
 | 
						|
			s += v.Size() + size.SizeOfString
 | 
						|
		}
 | 
						|
		s += len(k)
 | 
						|
	}
 | 
						|
	return s
 | 
						|
}
 | 
						|
 | 
						|
// Optimizable represents an optional interface that implementable by
 | 
						|
// optimizable resources (e.g., TermFieldReaders, Searchers).  These
 | 
						|
// optimizable resources are provided the same OptimizableContext
 | 
						|
// instance, so that they can coordinate via dynamic interface
 | 
						|
// casting.
 | 
						|
type Optimizable interface {
 | 
						|
	Optimize(kind string, octx OptimizableContext) (OptimizableContext, error)
 | 
						|
}
 | 
						|
 | 
						|
// Represents a result of optimization -- see the Finish() method.
 | 
						|
type Optimized interface{}
 | 
						|
 | 
						|
type OptimizableContext interface {
 | 
						|
	// Once all the optimzable resources have been provided the same
 | 
						|
	// OptimizableContext instance, the optimization preparations are
 | 
						|
	// finished or completed via the Finish() method.
 | 
						|
	//
 | 
						|
	// Depending on the optimization being performed, the Finish()
 | 
						|
	// method might return a non-nil Optimized instance.  For example,
 | 
						|
	// the Optimized instance might represent an optimized
 | 
						|
	// TermFieldReader instance.
 | 
						|
	Finish() (Optimized, error)
 | 
						|
}
 | 
						|
 | 
						|
type DocValueReader interface {
 | 
						|
	VisitDocValues(id IndexInternalID, visitor DocumentFieldTermVisitor) error
 | 
						|
}
 | 
						|
 | 
						|
// IndexBuilder is an interface supported by some index schemes
 | 
						|
// to allow direct write-only index building
 | 
						|
type IndexBuilder interface {
 | 
						|
	Index(doc *document.Document) error
 | 
						|
	Close() error
 | 
						|
}
 |