mirror of
				https://gitee.com/gitea/gitea
				synced 2025-11-04 16:40:24 +08:00 
			
		
		
		
	Fix bug on elastic search (#12811)
* Fix bug on elastic search * Add more comments for elastic search result startIndex and endIndex * refactor indexPos * refactor indexPos * Fix bug
This commit is contained in:
		@@ -90,6 +90,7 @@ const (
 | 
				
			|||||||
				},
 | 
									},
 | 
				
			||||||
				"content": {
 | 
									"content": {
 | 
				
			||||||
					"type": "text",
 | 
										"type": "text",
 | 
				
			||||||
 | 
										"term_vector": "with_positions_offsets",
 | 
				
			||||||
					"index": true
 | 
										"index": true
 | 
				
			||||||
				},
 | 
									},
 | 
				
			||||||
				"commit_id": {
 | 
									"commit_id": {
 | 
				
			||||||
@@ -251,6 +252,22 @@ func (b *ElasticSearchIndexer) Delete(repoID int64) error {
 | 
				
			|||||||
	return err
 | 
						return err
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// indexPos find words positions for start and the following end on content. It will
 | 
				
			||||||
 | 
					// return the beginning position of the frist start and the ending position of the
 | 
				
			||||||
 | 
					// first end following the start string.
 | 
				
			||||||
 | 
					// If not found any of the positions, it will return -1, -1.
 | 
				
			||||||
 | 
					func indexPos(content, start, end string) (int, int) {
 | 
				
			||||||
 | 
						startIdx := strings.Index(content, start)
 | 
				
			||||||
 | 
						if startIdx < 0 {
 | 
				
			||||||
 | 
							return -1, -1
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						endIdx := strings.Index(content[startIdx+len(start):], end)
 | 
				
			||||||
 | 
						if endIdx < 0 {
 | 
				
			||||||
 | 
							return -1, -1
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						return startIdx, startIdx + len(start) + endIdx + len(end)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
 | 
					func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) {
 | 
				
			||||||
	hits := make([]*SearchResult, 0, pageSize)
 | 
						hits := make([]*SearchResult, 0, pageSize)
 | 
				
			||||||
	for _, hit := range searchResult.Hits.Hits {
 | 
						for _, hit := range searchResult.Hits.Hits {
 | 
				
			||||||
@@ -260,18 +277,12 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
 | 
				
			|||||||
		var startIndex, endIndex int = -1, -1
 | 
							var startIndex, endIndex int = -1, -1
 | 
				
			||||||
		c, ok := hit.Highlight["content"]
 | 
							c, ok := hit.Highlight["content"]
 | 
				
			||||||
		if ok && len(c) > 0 {
 | 
							if ok && len(c) > 0 {
 | 
				
			||||||
			var subStr = make([]rune, 0, len(kw))
 | 
								// FIXME: Since the high lighting content will include <em> and </em> for the keywords,
 | 
				
			||||||
			startIndex = strings.IndexFunc(c[0], func(r rune) bool {
 | 
								// now we should find the poisitions. But how to avoid html content which contains the
 | 
				
			||||||
				if len(subStr) >= len(kw) {
 | 
								// <em> and </em> tags? If elastic search has handled that?
 | 
				
			||||||
					subStr = subStr[1:]
 | 
								startIndex, endIndex = indexPos(c[0], "<em>", "</em>")
 | 
				
			||||||
				}
 | 
								if startIndex == -1 {
 | 
				
			||||||
				subStr = append(subStr, r)
 | 
									panic(fmt.Sprintf("1===%s,,,%#v,,,%s", kw, hit.Highlight, c[0]))
 | 
				
			||||||
				return strings.EqualFold(kw, string(subStr))
 | 
					 | 
				
			||||||
			})
 | 
					 | 
				
			||||||
			if startIndex > -1 {
 | 
					 | 
				
			||||||
				endIndex = startIndex + len(kw)
 | 
					 | 
				
			||||||
			} else {
 | 
					 | 
				
			||||||
				panic(fmt.Sprintf("1===%#v", hit.Highlight))
 | 
					 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
		} else {
 | 
							} else {
 | 
				
			||||||
			panic(fmt.Sprintf("2===%#v", hit.Highlight))
 | 
								panic(fmt.Sprintf("2===%#v", hit.Highlight))
 | 
				
			||||||
@@ -293,7 +304,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int)
 | 
				
			|||||||
			UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
 | 
								UpdatedUnix: timeutil.TimeStamp(res["updated_at"].(float64)),
 | 
				
			||||||
			Language:    language,
 | 
								Language:    language,
 | 
				
			||||||
			StartIndex:  startIndex,
 | 
								StartIndex:  startIndex,
 | 
				
			||||||
			EndIndex:    endIndex,
 | 
								EndIndex:    endIndex - 9, // remove the length <em></em> since we give Content the original data
 | 
				
			||||||
			Color:       enry.GetColor(language),
 | 
								Color:       enry.GetColor(language),
 | 
				
			||||||
		})
 | 
							})
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@@ -347,7 +358,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
 | 
				
			|||||||
			Index(b.indexerAliasName).
 | 
								Index(b.indexerAliasName).
 | 
				
			||||||
			Aggregation("language", aggregation).
 | 
								Aggregation("language", aggregation).
 | 
				
			||||||
			Query(query).
 | 
								Query(query).
 | 
				
			||||||
			Highlight(elastic.NewHighlight().Field("content")).
 | 
								Highlight(
 | 
				
			||||||
 | 
									elastic.NewHighlight().
 | 
				
			||||||
 | 
										Field("content").
 | 
				
			||||||
 | 
										NumOfFragments(0). // return all highting content on fragments
 | 
				
			||||||
 | 
										HighlighterType("fvh"),
 | 
				
			||||||
 | 
								).
 | 
				
			||||||
			Sort("repo_id", true).
 | 
								Sort("repo_id", true).
 | 
				
			||||||
			From(start).Size(pageSize).
 | 
								From(start).Size(pageSize).
 | 
				
			||||||
			Do(context.Background())
 | 
								Do(context.Background())
 | 
				
			||||||
@@ -373,7 +389,12 @@ func (b *ElasticSearchIndexer) Search(repoIDs []int64, language, keyword string,
 | 
				
			|||||||
	searchResult, err := b.client.Search().
 | 
						searchResult, err := b.client.Search().
 | 
				
			||||||
		Index(b.indexerAliasName).
 | 
							Index(b.indexerAliasName).
 | 
				
			||||||
		Query(query).
 | 
							Query(query).
 | 
				
			||||||
		Highlight(elastic.NewHighlight().Field("content")).
 | 
							Highlight(
 | 
				
			||||||
 | 
								elastic.NewHighlight().
 | 
				
			||||||
 | 
									Field("content").
 | 
				
			||||||
 | 
									NumOfFragments(0). // return all highting content on fragments
 | 
				
			||||||
 | 
									HighlighterType("fvh"),
 | 
				
			||||||
 | 
							).
 | 
				
			||||||
		Sort("repo_id", true).
 | 
							Sort("repo_id", true).
 | 
				
			||||||
		From(start).Size(pageSize).
 | 
							From(start).Size(pageSize).
 | 
				
			||||||
		Do(context.Background())
 | 
							Do(context.Background())
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,3 +34,9 @@ func TestESIndexAndSearch(t *testing.T) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	testIndexer("elastic_search", t, indexer)
 | 
						testIndexer("elastic_search", t, indexer)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func TestIndexPos(t *testing.T) {
 | 
				
			||||||
 | 
						startIdx, endIdx := indexPos("test index start and end", "start", "end")
 | 
				
			||||||
 | 
						assert.EqualValues(t, 11, startIdx)
 | 
				
			||||||
 | 
						assert.EqualValues(t, 24, endIdx)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user