mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 15:12:02 +00:00
gomod: Bump zoekt for UseKeywordScoring rename (#62504)
- https://github.com/sourcegraph/zoekt/commit/69068bff5b zoekt-mirror-gerrit: add an option to fetch meta/config branch - https://github.com/sourcegraph/zoekt/commit/647e86c96a zoekt-indexserver: handle gerrit-mirror RepoNameFormat config - https://github.com/sourcegraph/zoekt/commit/7c5b77843b gitindex: update remote.origin.url if clone already exists - https://github.com/sourcegraph/zoekt/commit/9f35cb185d zoekt-mirror-gerrit: fix fetch for meta/config - https://github.com/sourcegraph/zoekt/commit/4e674a4979 Rename UseKeywordScoring to mention BM25
This commit is contained in:
parent
9d762e658f
commit
1ef2f3f5d5
4
deps.bzl
4
deps.bzl
@ -5676,8 +5676,8 @@ def go_dependencies():
|
||||
patches = [
|
||||
"//third_party/com_github_sourcegraph_zoekt:x_defs_version.patch",
|
||||
],
|
||||
sum = "h1:aXHLpH1rhdvg4gQOiQWLkqVd3D/DG2li5Nnf6WE7mRs=",
|
||||
version = "v0.0.0-20240501072156-72f95004e6d6",
|
||||
sum = "h1:eQIFTvf8qZcSLhgu5NrprfwgtJEqU9tvGXG8sf1SSgU=",
|
||||
version = "v0.0.0-20240507175742-4e674a49795c",
|
||||
)
|
||||
go_repository(
|
||||
name = "com_github_spaolacci_murmur3",
|
||||
|
||||
2
go.mod
2
go.mod
@ -602,7 +602,7 @@ require (
|
||||
github.com/scim2/filter-parser/v2 v2.2.0
|
||||
github.com/sourcegraph/conc v0.3.1-0.20240108182409-4afefce20f9b
|
||||
github.com/sourcegraph/mountinfo v0.0.0-20240201124957-b314c0befab1
|
||||
github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6
|
||||
github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c
|
||||
github.com/spf13/cobra v1.8.0 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
github.com/stretchr/objx v0.5.2 // indirect
|
||||
|
||||
4
go.sum
4
go.sum
@ -1743,8 +1743,8 @@ github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240426173441-db5b0a1
|
||||
github.com/sourcegraph/sourcegraph-accounts-sdk-go v0.0.0-20240426173441-db5b0a145ceb/go.mod h1:xul4Fiph3Pvdx/1qsmhCUL2GBeYjTcnga0LXZEbKdGo=
|
||||
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152 h1:z/MpntplPaW6QW95pzcAR/72Z5TWDyDnSo0EOcyij9o=
|
||||
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I=
|
||||
github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6 h1:aXHLpH1rhdvg4gQOiQWLkqVd3D/DG2li5Nnf6WE7mRs=
|
||||
github.com/sourcegraph/zoekt v0.0.0-20240501072156-72f95004e6d6/go.mod h1:K7dYKxtKLPBRwu55Useje/JUZEuWgzlu5O1F8VFHfwE=
|
||||
github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c h1:eQIFTvf8qZcSLhgu5NrprfwgtJEqU9tvGXG8sf1SSgU=
|
||||
github.com/sourcegraph/zoekt v0.0.0-20240507175742-4e674a49795c/go.mod h1:K7dYKxtKLPBRwu55Useje/JUZEuWgzlu5O1F8VFHfwE=
|
||||
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
|
||||
github.com/spf13/afero v0.0.0-20170901052352-ee1bd8ee15a1/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
|
||||
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
|
||||
|
||||
@ -476,13 +476,13 @@ func RankingMaxQueueSizeBytes() int {
|
||||
|
||||
// SearchFlushWallTime controls the amount of time that Zoekt shards collect and rank results. For
|
||||
// larger codebases, it can be helpful to increase this to improve the ranking stability and quality.
|
||||
func SearchFlushWallTime(keywordScoring bool) time.Duration {
|
||||
func SearchFlushWallTime(bm25Scoring bool) time.Duration {
|
||||
ranking := ExperimentalFeatures().Ranking
|
||||
if ranking != nil && ranking.FlushWallTimeMS > 0 {
|
||||
return time.Duration(ranking.FlushWallTimeMS) * time.Millisecond
|
||||
} else {
|
||||
if keywordScoring {
|
||||
// Keyword scoring takes longer than standard searches, so use a higher FlushWallTime
|
||||
if bm25Scoring {
|
||||
// BM25 scoring takes longer than standard searches, so use a higher FlushWallTime
|
||||
// to help ensure ranking is stable
|
||||
return 2 * time.Second
|
||||
} else {
|
||||
|
||||
@ -78,7 +78,7 @@ func (m *meteredSearcher) StreamSearch(ctx context.Context, q query.Q, opts *zoe
|
||||
attribute.Bool("opts.chunk_matches", opts.ChunkMatches),
|
||||
attribute.Bool("opts.use_document_ranks", opts.UseDocumentRanks),
|
||||
attribute.Float64("opts.document_ranks_weight", opts.DocumentRanksWeight),
|
||||
attribute.Bool("opts.use_keyword_scoring", opts.UseKeywordScoring),
|
||||
attribute.Bool("opts.use_bm25_scoring", opts.UseBM25Scoring),
|
||||
attribute.Bool("opts.debug_score", opts.DebugScore),
|
||||
)...)
|
||||
}
|
||||
|
||||
@ -185,24 +185,21 @@ func (o *ZoektParameters) ToSearchOptions(ctx context.Context) (searchOpts *zoek
|
||||
|
||||
defaultTimeout := 20 * time.Second
|
||||
searchOpts = &zoekt.SearchOptions{
|
||||
Trace: policy.ShouldTrace(ctx),
|
||||
MaxWallTime: defaultTimeout,
|
||||
ChunkMatches: true,
|
||||
UseKeywordScoring: o.PatternType == query.SearchTypeCodyContext,
|
||||
NumContextLines: o.NumContextLines,
|
||||
Trace: policy.ShouldTrace(ctx),
|
||||
MaxWallTime: defaultTimeout,
|
||||
ChunkMatches: true,
|
||||
UseBM25Scoring: o.PatternType == query.SearchTypeCodyContext,
|
||||
NumContextLines: o.NumContextLines,
|
||||
}
|
||||
|
||||
// These are reasonable default amounts of work to do per shard and
|
||||
// replica respectively.
|
||||
searchOpts.ShardMaxMatchCount = 10_000
|
||||
searchOpts.TotalMaxMatchCount = 100_000
|
||||
// KeywordScoring and Features.UseZoektParser represent different approaches we
|
||||
// are evaluating to deliver a better keyword-based search experience. For now
|
||||
// these are separate, but we might combine them in the future. Both profit from
|
||||
// higher defaults.
|
||||
if searchOpts.UseKeywordScoring || o.PatternType == query.SearchTypeKeyword {
|
||||
// Keyword searches tends to match much more broadly than code searches, so we need to
|
||||
// consider more candidates to ensure we don't miss highly-ranked documents
|
||||
// Keyword searches tends to match much more broadly than code searches, so we need to
|
||||
// consider more candidates to ensure we don't miss highly-ranked documents. The same
|
||||
// holds for BM25 scoring, which is used for Cody context searches.
|
||||
if searchOpts.UseBM25Scoring || o.PatternType == query.SearchTypeKeyword {
|
||||
searchOpts.ShardMaxMatchCount *= 10
|
||||
searchOpts.TotalMaxMatchCount *= 10
|
||||
}
|
||||
@ -232,7 +229,7 @@ func (o *ZoektParameters) ToSearchOptions(ctx context.Context) (searchOpts *zoek
|
||||
|
||||
// This enables our stream based ranking, where we wait a certain amount
|
||||
// of time to collect results before ranking.
|
||||
searchOpts.FlushWallTime = conf.SearchFlushWallTime(searchOpts.UseKeywordScoring)
|
||||
searchOpts.FlushWallTime = conf.SearchFlushWallTime(searchOpts.UseBM25Scoring)
|
||||
|
||||
// Only use document ranks if the jobs to calculate the ranks are enabled. This
|
||||
// is to make sure we don't use outdated ranks for scoring in Zoekt.
|
||||
|
||||
@ -151,7 +151,7 @@ func TestZoektParameters(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "test keyword scoring",
|
||||
name: "test bm25 scoring",
|
||||
context: context.Background(),
|
||||
params: &ZoektParameters{
|
||||
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
|
||||
@ -161,11 +161,11 @@ func TestZoektParameters(t *testing.T) {
|
||||
ShardMaxMatchCount: 100000,
|
||||
TotalMaxMatchCount: 1000000,
|
||||
MaxWallTime: 20000000000,
|
||||
FlushWallTime: 2000000000, // for keyword search, default is 2 sec
|
||||
FlushWallTime: 2000000000, // for bm25 scoring, default is 2 sec
|
||||
MaxDocDisplayCount: 10000,
|
||||
ChunkMatches: true,
|
||||
DocumentRanksWeight: 4500,
|
||||
UseKeywordScoring: true},
|
||||
UseBM25Scoring: true},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user