gomod: Bump zoekt for UseKeywordScoring rename (#62504)

- https://github.com/sourcegraph/zoekt/commit/69068bff5b zoekt-mirror-gerrit: add an option to fetch meta/config branch - https://github.com/sourcegraph/zoekt/commit/647e86c96a zoekt-indexserver: handle gerrit-mirror RepoNameFormat config - https://github.com/sourcegraph/zoekt/commit/7c5b77843b gitindex: update remote.origin.url if clone already exists - https://github.com/sourcegraph/zoekt/commit/9f35cb185d zoekt-mirror-gerrit: fix fetch for meta/config - https://github.com/sourcegraph/zoekt/commit/4e674a4979 Rename UseKeywordScoring to mention BM25
2026-02-06 15:12:02 +00:00 · 2024-05-07 13:14:45 -07:00 · 2024-05-07 13:14:45 -07:00 · 1ef2f3f5d5
commit 1ef2f3f5d5
parent 9d762e658f
7 changed files with 22 additions and 25 deletions
--- a/deps.bzl
+++ b/deps.bzl
@ -5676,8 +5676,8 @@ def go_dependencies():
        patches = [
            "//third_party/com_github_sourcegraph_zoekt:x_defs_version.patch",
        ],
-        sum = "h1:aXHLpH1rhdvg4gQOiQWLkqVd3D/DG2li5Nnf6WE7mRs=",
-        version = "v0.0.0-20240501072156-72f95004e6d6",
+        sum = "h1:eQIFTvf8qZcSLhgu5NrprfwgtJEqU9tvGXG8sf1SSgU=",
+        version = "v0.0.0-20240507175742-4e674a49795c",
    )
    go_repository(
        name = "com_github_spaolacci_murmur3",
--- a/go.mod
+++ b/go.mod
@ -602,7 +602,7 @@ require (
 	github.com/scim2/filter-parser/v2 v2.2.0
 	github.com/sourcegraph/conc v0.3.1-0.20240108182409-4afefce20f9b
 	github.com/sourcegraph/mountinfo v0.0.0-20240201124957-b314c0befab1
-	github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6
+	github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c
 	github.com/spf13/cobra v1.8.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/stretchr/objx v0.5.2 // indirect
--- a/go.sum
+++ b/go.sum
@ -1743,8 +1743,8 @@ github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240426173441-db5b0a1
 github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240426173441-db5b0a145ceb/go.mod h1:xul4Fiph3Pvdx/1qsmhCUL2GBeYjTcnga0LXZEbKdGo=
 github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152 h1:z/MpntplPaW6QW95pzcAR/72Z5TWDyDnSo0EOcyij9o=
 github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I=
-github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6 h1:aXHLpH1rhdvg4gQOiQWLkqVd3D/DG2li5Nnf6WE7mRs=
-github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6/go.mod h1:K7dYKxtKLPBRwu55Useje/JUZEuWgzlu5O1F8VFHfwE=
+github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c h1:eQIFTvf8qZcSLhgu5NrprfwgtJEqU9tvGXG8sf1SSgU=
+github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c/go.mod h1:K7dYKxtKLPBRwu55Useje/JUZEuWgzlu5O1F8VFHfwE=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
 github.com/spf13/afero v0.0.0-20170901052352-ee1bd8ee15a1/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
 github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
--- a/internal/conf/computed.go
+++ b/internal/conf/computed.go
@ -476,13 +476,13 @@ func RankingMaxQueueSizeBytes() int {

 // SearchFlushWallTime controls the amount of time that Zoekt shards collect and rank results. For
 // larger codebases, it can be helpful to increase this to improve the ranking stability and quality.
-func SearchFlushWallTime(keywordScoring bool) time.Duration {
+func SearchFlushWallTime(bm25Scoring bool) time.Duration {
 	ranking := ExperimentalFeatures().Ranking
 	if ranking != nil && ranking.FlushWallTimeMS > 0 {
 		return time.Duration(ranking.FlushWallTimeMS) * time.Millisecond
 	} else {
-		if keywordScoring {
-			// Keyword scoring takes longer than standard searches, so use a higher FlushWallTime
+		if bm25Scoring {
+			// BM25 scoring takes longer than standard searches, so use a higher FlushWallTime
 			// to help ensure ranking is stable
 			return 2 * time.Second
 		} else {
--- a/internal/search/backend/metered_searcher.go
+++ b/internal/search/backend/metered_searcher.go
@ -78,7 +78,7 @@ func (m *meteredSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zoe
 			attribute.Bool("opts.chunk_matches", opts.ChunkMatches),
 			attribute.Bool("opts.use_document_ranks", opts.UseDocumentRanks),
 			attribute.Float64("opts.document_ranks_weight", opts.DocumentRanksWeight),
-			attribute.Bool("opts.use_keyword_scoring", opts.UseKeywordScoring),
+			attribute.Bool("opts.use_bm25_scoring", opts.UseBM25Scoring),
 			attribute.Bool("opts.debug_score", opts.DebugScore),
 		)...)
 	}
--- a/internal/search/types.go
+++ b/internal/search/types.go
@ -185,24 +185,21 @@ func (o *ZoektParameters) ToSearchOptions(ctx context.Context) (searchOpts *zoek

 	defaultTimeout := 20 * time.Second
 	searchOpts = &zoekt.SearchOptions{
-		Trace:             policy.ShouldTrace(ctx),
-		MaxWallTime:       defaultTimeout,
-		ChunkMatches:      true,
-		UseKeywordScoring: o.PatternType == query.SearchTypeCodyContext,
-		NumContextLines:   o.NumContextLines,
+		Trace:           policy.ShouldTrace(ctx),
+		MaxWallTime:     defaultTimeout,
+		ChunkMatches:    true,
+		UseBM25Scoring:  o.PatternType == query.SearchTypeCodyContext,
+		NumContextLines: o.NumContextLines,
 	}

 	// These are reasonable default amounts of work to do per shard and
 	// replica respectively.
 	searchOpts.ShardMaxMatchCount = 10_000
 	searchOpts.TotalMaxMatchCount = 100_000
-	// KeywordScoring and Features.UseZoektParser represent different approaches we
-	// are evaluating to deliver a better keyword-based search experience. For now
-	// these are separate, but we might combine them in the future. Both profit from
-	// higher defaults.
-	if searchOpts.UseKeywordScoring || o.PatternType == query.SearchTypeKeyword {
-		// Keyword searches tends to match much more broadly than code searches, so we need to
-		// consider more candidates to ensure we don't miss highly-ranked documents
+	// Keyword searches tends to match much more broadly than code searches, so we need to
+	// consider more candidates to ensure we don't miss highly-ranked documents. The same
+	// holds for BM25 scoring, which is used for Cody context searches.
+	if searchOpts.UseBM25Scoring || o.PatternType == query.SearchTypeKeyword {
 		searchOpts.ShardMaxMatchCount *= 10
 		searchOpts.TotalMaxMatchCount *= 10
 	}
@ -232,7 +229,7 @@ func (o *ZoektParameters) ToSearchOptions(ctx context.Context) (searchOpts *zoek

 	// This enables our stream based ranking, where we wait a certain amount
 	// of time to collect results before ranking.
-	searchOpts.FlushWallTime = conf.SearchFlushWallTime(searchOpts.UseKeywordScoring)
+	searchOpts.FlushWallTime = conf.SearchFlushWallTime(searchOpts.UseBM25Scoring)

 	// Only use document ranks if the jobs to calculate the ranks are enabled. This
 	// is to make sure we don't use outdated ranks for scoring in Zoekt.
--- a/internal/search/types_test.go
+++ b/internal/search/types_test.go
@ -151,7 +151,7 @@ func TestZoektParameters(t *testing.T) {
 			},
 		},
 		{
-			name:    "test keyword scoring",
+			name:    "test bm25 scoring",
 			context: context.Background(),
 			params: &ZoektParameters{
 				FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
@ -161,11 +161,11 @@ func TestZoektParameters(t *testing.T) {
 				ShardMaxMatchCount:  100000,
 				TotalMaxMatchCount:  1000000,
 				MaxWallTime:         20000000000,
-				FlushWallTime:       2000000000, // for keyword search, default is 2 sec
+				FlushWallTime:       2000000000, // for bm25 scoring, default is 2 sec
 				MaxDocDisplayCount:  10000,
 				ChunkMatches:        true,
 				DocumentRanksWeight: 4500,
-				UseKeywordScoring:   true},
+				UseBM25Scoring:      true},
 		},
 	}