gomod: Bump zoekt for UseKeywordScoring rename (#62504)

- https://github.com/sourcegraph/zoekt/commit/69068bff5b zoekt-mirror-gerrit: add an option to fetch meta/config branch
- https://github.com/sourcegraph/zoekt/commit/647e86c96a zoekt-indexserver: handle gerrit-mirror RepoNameFormat config
- https://github.com/sourcegraph/zoekt/commit/7c5b77843b gitindex: update remote.origin.url if clone already exists
- https://github.com/sourcegraph/zoekt/commit/9f35cb185d zoekt-mirror-gerrit: fix fetch for meta/config
- https://github.com/sourcegraph/zoekt/commit/4e674a4979 Rename UseKeywordScoring to mention BM25
This commit is contained in:
Julie Tibshirani 2024-05-07 13:14:45 -07:00 committed by GitHub
parent 9d762e658f
commit 1ef2f3f5d5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 22 additions and 25 deletions

View File

@ -5676,8 +5676,8 @@ def go_dependencies():
patches = [
"//third_party/com_github_sourcegraph_zoekt:x_defs_version.patch",
],
sum = "h1:aXHLpH1rhdvg4gQOiQWLkqVd3D/DG2li5Nnf6WE7mRs=",
version = "v0.0.0-20240501072156-72f95004e6d6",
sum = "h1:eQIFTvf8qZcSLhgu5NrprfwgtJEqU9tvGXG8sf1SSgU=",
version = "v0.0.0-20240507175742-4e674a49795c",
)
go_repository(
name = "com_github_spaolacci_murmur3",

2
go.mod
View File

@ -602,7 +602,7 @@ require (
github.com/scim2/filter-parser/v2 v2.2.0
github.com/sourcegraph/conc v0.3.1-0.20240108182409-4afefce20f9b
github.com/sourcegraph/mountinfo v0.0.0-20240201124957-b314c0befab1
github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6
github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c
github.com/spf13/cobra v1.8.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.2 // indirect

4
go.sum
View File

@ -1743,8 +1743,8 @@ github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240426173441-db5b0a1
github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240426173441-db5b0a145ceb/go.mod h1:xul4Fiph3Pvdx/1qsmhCUL2GBeYjTcnga0LXZEbKdGo=
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152 h1:z/MpntplPaW6QW95pzcAR/72Z5TWDyDnSo0EOcyij9o=
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I=
github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6 h1:aXHLpH1rhdvg4gQOiQWLkqVd3D/DG2li5Nnf6WE7mRs=
github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6/go.mod h1:K7dYKxtKLPBRwu55Useje/JUZEuWgzlu5O1F8VFHfwE=
github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c h1:eQIFTvf8qZcSLhgu5NrprfwgtJEqU9tvGXG8sf1SSgU=
github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c/go.mod h1:K7dYKxtKLPBRwu55Useje/JUZEuWgzlu5O1F8VFHfwE=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v0.0.0-20170901052352-ee1bd8ee15a1/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=

View File

@ -476,13 +476,13 @@ func RankingMaxQueueSizeBytes() int {
// SearchFlushWallTime controls the amount of time that Zoekt shards collect and rank results. For
// larger codebases, it can be helpful to increase this to improve the ranking stability and quality.
func SearchFlushWallTime(keywordScoring bool) time.Duration {
func SearchFlushWallTime(bm25Scoring bool) time.Duration {
ranking := ExperimentalFeatures().Ranking
if ranking != nil && ranking.FlushWallTimeMS > 0 {
return time.Duration(ranking.FlushWallTimeMS) * time.Millisecond
} else {
if keywordScoring {
// Keyword scoring takes longer than standard searches, so use a higher FlushWallTime
if bm25Scoring {
// BM25 scoring takes longer than standard searches, so use a higher FlushWallTime
// to help ensure ranking is stable
return 2 * time.Second
} else {

View File

@ -78,7 +78,7 @@ func (m *meteredSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zoe
attribute.Bool("opts.chunk_matches", opts.ChunkMatches),
attribute.Bool("opts.use_document_ranks", opts.UseDocumentRanks),
attribute.Float64("opts.document_ranks_weight", opts.DocumentRanksWeight),
attribute.Bool("opts.use_keyword_scoring", opts.UseKeywordScoring),
attribute.Bool("opts.use_bm25_scoring", opts.UseBM25Scoring),
attribute.Bool("opts.debug_score", opts.DebugScore),
)...)
}

View File

@ -185,24 +185,21 @@ func (o *ZoektParameters) ToSearchOptions(ctx context.Context) (searchOpts *zoek
defaultTimeout := 20 * time.Second
searchOpts = &zoekt.SearchOptions{
Trace: policy.ShouldTrace(ctx),
MaxWallTime: defaultTimeout,
ChunkMatches: true,
UseKeywordScoring: o.PatternType == query.SearchTypeCodyContext,
NumContextLines: o.NumContextLines,
Trace: policy.ShouldTrace(ctx),
MaxWallTime: defaultTimeout,
ChunkMatches: true,
UseBM25Scoring: o.PatternType == query.SearchTypeCodyContext,
NumContextLines: o.NumContextLines,
}
// These are reasonable default amounts of work to do per shard and
// replica respectively.
searchOpts.ShardMaxMatchCount = 10_000
searchOpts.TotalMaxMatchCount = 100_000
// KeywordScoring and Features.UseZoektParser represent different approaches we
// are evaluating to deliver a better keyword-based search experience. For now
// these are separate, but we might combine them in the future. Both profit from
// higher defaults.
if searchOpts.UseKeywordScoring || o.PatternType == query.SearchTypeKeyword {
// Keyword searches tends to match much more broadly than code searches, so we need to
// consider more candidates to ensure we don't miss highly-ranked documents
// Keyword searches tends to match much more broadly than code searches, so we need to
// consider more candidates to ensure we don't miss highly-ranked documents. The same
// holds for BM25 scoring, which is used for Cody context searches.
if searchOpts.UseBM25Scoring || o.PatternType == query.SearchTypeKeyword {
searchOpts.ShardMaxMatchCount *= 10
searchOpts.TotalMaxMatchCount *= 10
}
@ -232,7 +229,7 @@ func (o *ZoektParameters) ToSearchOptions(ctx context.Context) (searchOpts *zoek
// This enables our stream based ranking, where we wait a certain amount
// of time to collect results before ranking.
searchOpts.FlushWallTime = conf.SearchFlushWallTime(searchOpts.UseKeywordScoring)
searchOpts.FlushWallTime = conf.SearchFlushWallTime(searchOpts.UseBM25Scoring)
// Only use document ranks if the jobs to calculate the ranks are enabled. This
// is to make sure we don't use outdated ranks for scoring in Zoekt.

View File

@ -151,7 +151,7 @@ func TestZoektParameters(t *testing.T) {
},
},
{
name: "test keyword scoring",
name: "test bm25 scoring",
context: context.Background(),
params: &ZoektParameters{
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
@ -161,11 +161,11 @@ func TestZoektParameters(t *testing.T) {
ShardMaxMatchCount: 100000,
TotalMaxMatchCount: 1000000,
MaxWallTime: 20000000000,
FlushWallTime: 2000000000, // for keyword search, default is 2 sec
FlushWallTime: 2000000000, // for bm25 scoring, default is 2 sec
MaxDocDisplayCount: 10000,
ChunkMatches: true,
DocumentRanksWeight: 4500,
UseKeywordScoring: true},
UseBM25Scoring: true},
},
}