Skip to content

Commit

Permalink
Revert "Determine fuzziness of bleve indexer by keyword length (go-gi…
Browse files Browse the repository at this point in the history
…tea#29706)"

This reverts commit b9c57fb.
  • Loading branch information
zjjhot committed Mar 24, 2024
1 parent 207fc43 commit 251d2f2
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 29 deletions.
15 changes: 8 additions & 7 deletions modules/indexer/code/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ import (
const (
unicodeNormalizeName = "unicodeNormalize"
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)

func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
Expand Down Expand Up @@ -241,12 +239,15 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
keywordQuery query.Query
)

phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
if opts.IsKeywordFuzzy {
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
} else {
prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery
}

if len(opts.RepoIDs) > 0 {
Expand Down
10 changes: 8 additions & 2 deletions modules/indexer/internal/bleve/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
}

// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
q.Fuzziness = fuzziness
return q
}

// PrefixQuery generates a match prefix query for the given prefix and field
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
q := bleve.NewPrefixQuery(matchPrefix)
q.FieldVal = field
return q
}

Expand Down
25 changes: 12 additions & 13 deletions modules/indexer/issues/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
})
}

const (
maxBatchSize = 16
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
fuzzyDenominator = 4
)
const maxBatchSize = 16

// IndexerData an update to the issue indexer
type IndexerData internal.IndexerData
Expand Down Expand Up @@ -160,16 +156,19 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
var queries []query.Query

if options.Keyword != "" {
fuzziness := 0
if options.IsFuzzyKeyword {
fuzziness = len(options.Keyword) / fuzzyDenominator
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
}...))
} else {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.PrefixQuery(options.Keyword, "title"),
inner_bleve.PrefixQuery(options.Keyword, "content"),
inner_bleve.PrefixQuery(options.Keyword, "comments"),
}...))
}

queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))
}

if len(options.RepoIDs) > 0 || options.AllPublic {
Expand Down
16 changes: 9 additions & 7 deletions tests/integration/repo_search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func TestSearchRepo(t *testing.T) {
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
assert.NoError(t, err)

code_indexer.UpdateRepoIndexer(repo)
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)

testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})

Expand All @@ -42,14 +42,12 @@ func TestSearchRepo(t *testing.T) {
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
assert.NoError(t, err)

code_indexer.UpdateRepoIndexer(repo)
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)

testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"})
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"})
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
}

func testSearch(t *testing.T, url string, expected []string) {
Expand All @@ -59,3 +57,7 @@ func testSearch(t *testing.T, url string, expected []string) {
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
assert.EqualValues(t, expected, filenames)
}

func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {
op(repo)
}

0 comments on commit 251d2f2

Please sign in to comment.