mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 19:21:50 +00:00
Ranking: add an experimental setting for tuning DocumentsRankWeight (#47135)
This PR adds an experimental feature setting that allows for adjusting DocumentRanksWeight. This will help us tune the contribution of the file ranks in the overall search ranking, by playing around with this setting on test data. It also fixes a bug where we accidentally disable a repo search optimization when file-based ranking is turned on.
This commit is contained in:
parent
89df7ea527
commit
e09308436d
@ -327,6 +327,19 @@ func StructuralSearchEnabled() bool {
|
||||
return val == "enabled"
|
||||
}
|
||||
|
||||
// SearchDocumentRanksWeight controls the impact of document ranks on the final ranking when
|
||||
// SearchOptions.UseDocumentRanks is enabled. The default is 0.5 * 9000 (half the zoekt default),
|
||||
// to match existing behavior where ranks are given half the priority as existing scoring signals.
|
||||
// We plan to eventually remove this, once we experiment on real data to find a good default.
|
||||
func SearchDocumentRanksWeight() float64 {
|
||||
ranking := ExperimentalFeatures().Ranking
|
||||
if ranking != nil && ranking.DocumentRanksWeight != nil {
|
||||
return *ranking.DocumentRanksWeight
|
||||
} else {
|
||||
return 4500
|
||||
}
|
||||
}
|
||||
|
||||
func ExperimentalFeatures() schema.ExperimentalFeatures {
|
||||
val := Get().ExperimentalFeatures
|
||||
if val == nil {
|
||||
|
||||
@ -20,16 +20,19 @@ import (
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search"
|
||||
searchbackend "github.com/sourcegraph/sourcegraph/internal/search/backend"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/filter"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/job"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/limits"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/query"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/streaming"
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace"
|
||||
"github.com/sourcegraph/sourcegraph/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
"github.com/sourcegraph/sourcegraph/schema"
|
||||
)
|
||||
|
||||
func TestIndexedSearch(t *testing.T) {
|
||||
@ -495,6 +498,107 @@ func TestZoektResultCountFactor(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestZoektSearchOptions(t *testing.T) {
|
||||
cases := []struct {
|
||||
name string
|
||||
context context.Context
|
||||
options *Options
|
||||
ranksWeight float64
|
||||
want *zoekt.SearchOptions
|
||||
}{
|
||||
{
|
||||
name: "test defaults",
|
||||
context: context.Background(),
|
||||
options: &Options{
|
||||
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
|
||||
NumRepos: 3,
|
||||
},
|
||||
want: &zoekt.SearchOptions{
|
||||
ShardMaxMatchCount: 500000,
|
||||
TotalMaxMatchCount: 500000,
|
||||
MaxWallTime: 20000000000,
|
||||
MaxDocDisplayCount: 2500,
|
||||
ChunkMatches: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "test defaults with ranking feature enabled",
|
||||
context: context.Background(),
|
||||
options: &Options{
|
||||
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
|
||||
NumRepos: 3,
|
||||
Features: search.Features{
|
||||
Ranking: true,
|
||||
},
|
||||
},
|
||||
want: &zoekt.SearchOptions{
|
||||
ShardMaxMatchCount: 10000,
|
||||
TotalMaxMatchCount: 100000,
|
||||
MaxWallTime: 20000000000,
|
||||
FlushWallTime: 500000000,
|
||||
MaxDocDisplayCount: 500,
|
||||
ChunkMatches: true,
|
||||
UseDocumentRanks: true,
|
||||
DocumentRanksWeight: 4500,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "test repo search defaults",
|
||||
context: context.Background(),
|
||||
options: &Options{
|
||||
Selector: []string{filter.Repository},
|
||||
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
|
||||
NumRepos: 3,
|
||||
Features: search.Features{
|
||||
Ranking: true,
|
||||
},
|
||||
},
|
||||
want: &zoekt.SearchOptions{
|
||||
ShardRepoMaxMatchCount: 1,
|
||||
MaxWallTime: 20000000000,
|
||||
ChunkMatches: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "test document ranks weight",
|
||||
context: context.Background(),
|
||||
ranksWeight: 42.0,
|
||||
options: &Options{
|
||||
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
|
||||
NumRepos: 3,
|
||||
Features: search.Features{
|
||||
Ranking: true,
|
||||
},
|
||||
},
|
||||
want: &zoekt.SearchOptions{
|
||||
ShardMaxMatchCount: 10000,
|
||||
TotalMaxMatchCount: 100000,
|
||||
MaxWallTime: 20000000000,
|
||||
FlushWallTime: 500000000,
|
||||
MaxDocDisplayCount: 500,
|
||||
ChunkMatches: true,
|
||||
UseDocumentRanks: true,
|
||||
DocumentRanksWeight: 42,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if tt.ranksWeight > 0.0 {
|
||||
cfg := conf.Get()
|
||||
cfg.ExperimentalFeatures.Ranking = &schema.Ranking{
|
||||
DocumentRanksWeight: &tt.ranksWeight,
|
||||
}
|
||||
conf.Mock(cfg)
|
||||
}
|
||||
got := tt.options.ToSearch(tt.context)
|
||||
if diff := cmp.Diff(tt.want, got); diff != "" {
|
||||
t.Fatalf("search options mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestZoektIndexedRepos_single(t *testing.T) {
|
||||
branchesRepos := func(branch string, repo api.RepoID) map[string]*zoektquery.BranchRepos {
|
||||
return map[string]*zoektquery.BranchRepos{
|
||||
|
||||
@ -10,6 +10,7 @@ import (
|
||||
"github.com/sourcegraph/zoekt"
|
||||
zoektquery "github.com/sourcegraph/zoekt/query"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/filter"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/limits"
|
||||
@ -99,6 +100,12 @@ func (o *Options) ToSearch(ctx context.Context) *zoekt.SearchOptions {
|
||||
ChunkMatches: true,
|
||||
}
|
||||
|
||||
// If we're searching repos, ignore the other options and only check one file per repo
|
||||
if o.Selector.Root() == filter.Repository {
|
||||
searchOpts.ShardRepoMaxMatchCount = 1
|
||||
return searchOpts
|
||||
}
|
||||
|
||||
if o.Features.Debug {
|
||||
searchOpts.DebugScore = true
|
||||
}
|
||||
@ -129,17 +136,7 @@ func (o *Options) ToSearch(ctx context.Context) *zoekt.SearchOptions {
|
||||
|
||||
// This enables the use of document ranks in scoring, if they are available.
|
||||
searchOpts.UseDocumentRanks = true
|
||||
|
||||
// This controls the impact of document ranks on the final ranking. The value is set to 0.5 * 9000 (half the
|
||||
// zoekt default), to match existing behavior where ranks are given half the priority as existing scoring
|
||||
// signals. We plan to eventually remove this, once we experiment on real data to find a good default.
|
||||
searchOpts.DocumentRanksWeight = 4500
|
||||
|
||||
return searchOpts
|
||||
}
|
||||
|
||||
if o.Selector.Root() == filter.Repository {
|
||||
searchOpts.ShardRepoMaxMatchCount = 1
|
||||
searchOpts.DocumentRanksWeight = conf.SearchDocumentRanksWeight()
|
||||
} else {
|
||||
k := o.resultCountFactor()
|
||||
searchOpts.ShardMaxMatchCount = 100 * k
|
||||
|
||||
@ -1730,6 +1730,8 @@ type QuickLink struct {
|
||||
|
||||
// Ranking description: Experimental search result ranking options.
|
||||
type Ranking struct {
|
||||
// DocumentRanksWeight description: Controls the impact of document ranks on the final ranking when the 'search-ranking' feature is enabled. This is intended for internal testing purposes only, it's not recommended for users to change this.
|
||||
DocumentRanksWeight *float64 `json:"documentRanksWeight,omitempty"`
|
||||
// MaxQueueMatchCount description: The maximum number of matches that can be buffered to sort results. The default is -1 (unbounded). Setting this to a positive integer protects frontend against OOMs for queries with extremely high count of matches per repository.
|
||||
MaxQueueMatchCount *int `json:"maxQueueMatchCount,omitempty"`
|
||||
// MaxQueueSizeBytes description: The maximum number of bytes that can be buffered to sort results. The default is -1 (unbounded). Setting this to a positive integer protects frontend against OOMs.
|
||||
|
||||
@ -407,6 +407,13 @@
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"group": "Search"
|
||||
},
|
||||
"documentRanksWeight": {
|
||||
"description": "Controls the impact of document ranks on the final ranking when the 'search-ranking' feature is enabled. This is intended for internal testing purposes only, it's not recommended for users to change this.",
|
||||
"type": "number",
|
||||
"default": 4500,
|
||||
"group": "Search",
|
||||
"!go": { "pointer": true }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
Loading…
Reference in New Issue
Block a user