Ranking: add an experimental setting for tuning DocumentsRankWeight (#47135)

This PR adds an experimental feature setting that allows for adjusting
DocumentRanksWeight. This will help us tune the contribution of the file ranks in
the overall search ranking, by playing around with this setting on test data.

It also fixes a bug where we accidentally disable a repo search optimization
when file-based ranking is turned on.
This commit is contained in:
Julie Tibshirani 2023-01-31 08:58:06 -08:00 committed by GitHub
parent 89df7ea527
commit e09308436d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 134 additions and 11 deletions

View File

@ -327,6 +327,19 @@ func StructuralSearchEnabled() bool {
return val == "enabled"
}
// SearchDocumentRanksWeight controls the impact of document ranks on the final ranking when
// SearchOptions.UseDocumentRanks is enabled. The default is 0.5 * 9000 (half the zoekt default),
// to match existing behavior where ranks are given half the priority as existing scoring signals.
// We plan to eventually remove this, once we experiment on real data to find a good default.
func SearchDocumentRanksWeight() float64 {
ranking := ExperimentalFeatures().Ranking
if ranking != nil && ranking.DocumentRanksWeight != nil {
return *ranking.DocumentRanksWeight
} else {
return 4500
}
}
func ExperimentalFeatures() schema.ExperimentalFeatures {
val := Get().ExperimentalFeatures
if val == nil {

View File

@ -20,16 +20,19 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/conf"
"github.com/sourcegraph/sourcegraph/internal/search"
searchbackend "github.com/sourcegraph/sourcegraph/internal/search/backend"
"github.com/sourcegraph/sourcegraph/internal/search/filter"
"github.com/sourcegraph/sourcegraph/internal/search/job"
"github.com/sourcegraph/sourcegraph/internal/search/limits"
"github.com/sourcegraph/sourcegraph/internal/search/query"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/internal/search/streaming"
"github.com/sourcegraph/sourcegraph/internal/trace"
"github.com/sourcegraph/sourcegraph/internal/types"
"github.com/sourcegraph/sourcegraph/lib/errors"
"github.com/sourcegraph/sourcegraph/schema"
)
func TestIndexedSearch(t *testing.T) {
@ -495,6 +498,107 @@ func TestZoektResultCountFactor(t *testing.T) {
}
}
func TestZoektSearchOptions(t *testing.T) {
cases := []struct {
name string
context context.Context
options *Options
ranksWeight float64
want *zoekt.SearchOptions
}{
{
name: "test defaults",
context: context.Background(),
options: &Options{
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
NumRepos: 3,
},
want: &zoekt.SearchOptions{
ShardMaxMatchCount: 500000,
TotalMaxMatchCount: 500000,
MaxWallTime: 20000000000,
MaxDocDisplayCount: 2500,
ChunkMatches: true,
},
},
{
name: "test defaults with ranking feature enabled",
context: context.Background(),
options: &Options{
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
NumRepos: 3,
Features: search.Features{
Ranking: true,
},
},
want: &zoekt.SearchOptions{
ShardMaxMatchCount: 10000,
TotalMaxMatchCount: 100000,
MaxWallTime: 20000000000,
FlushWallTime: 500000000,
MaxDocDisplayCount: 500,
ChunkMatches: true,
UseDocumentRanks: true,
DocumentRanksWeight: 4500,
},
},
{
name: "test repo search defaults",
context: context.Background(),
options: &Options{
Selector: []string{filter.Repository},
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
NumRepos: 3,
Features: search.Features{
Ranking: true,
},
},
want: &zoekt.SearchOptions{
ShardRepoMaxMatchCount: 1,
MaxWallTime: 20000000000,
ChunkMatches: true,
},
},
{
name: "test document ranks weight",
context: context.Background(),
ranksWeight: 42.0,
options: &Options{
FileMatchLimit: limits.DefaultMaxSearchResultsStreaming,
NumRepos: 3,
Features: search.Features{
Ranking: true,
},
},
want: &zoekt.SearchOptions{
ShardMaxMatchCount: 10000,
TotalMaxMatchCount: 100000,
MaxWallTime: 20000000000,
FlushWallTime: 500000000,
MaxDocDisplayCount: 500,
ChunkMatches: true,
UseDocumentRanks: true,
DocumentRanksWeight: 42,
},
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
if tt.ranksWeight > 0.0 {
cfg := conf.Get()
cfg.ExperimentalFeatures.Ranking = &schema.Ranking{
DocumentRanksWeight: &tt.ranksWeight,
}
conf.Mock(cfg)
}
got := tt.options.ToSearch(tt.context)
if diff := cmp.Diff(tt.want, got); diff != "" {
t.Fatalf("search options mismatch (-want +got):\n%s", diff)
}
})
}
}
func TestZoektIndexedRepos_single(t *testing.T) {
branchesRepos := func(branch string, repo api.RepoID) map[string]*zoektquery.BranchRepos {
return map[string]*zoektquery.BranchRepos{

View File

@ -10,6 +10,7 @@ import (
"github.com/sourcegraph/zoekt"
zoektquery "github.com/sourcegraph/zoekt/query"
"github.com/sourcegraph/sourcegraph/internal/conf"
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/search/filter"
"github.com/sourcegraph/sourcegraph/internal/search/limits"
@ -99,6 +100,12 @@ func (o *Options) ToSearch(ctx context.Context) *zoekt.SearchOptions {
ChunkMatches: true,
}
// If we're searching repos, ignore the other options and only check one file per repo
if o.Selector.Root() == filter.Repository {
searchOpts.ShardRepoMaxMatchCount = 1
return searchOpts
}
if o.Features.Debug {
searchOpts.DebugScore = true
}
@ -129,17 +136,7 @@ func (o *Options) ToSearch(ctx context.Context) *zoekt.SearchOptions {
// This enables the use of document ranks in scoring, if they are available.
searchOpts.UseDocumentRanks = true
// This controls the impact of document ranks on the final ranking. The value is set to 0.5 * 9000 (half the
// zoekt default), to match existing behavior where ranks are given half the priority as existing scoring
// signals. We plan to eventually remove this, once we experiment on real data to find a good default.
searchOpts.DocumentRanksWeight = 4500
return searchOpts
}
if o.Selector.Root() == filter.Repository {
searchOpts.ShardRepoMaxMatchCount = 1
searchOpts.DocumentRanksWeight = conf.SearchDocumentRanksWeight()
} else {
k := o.resultCountFactor()
searchOpts.ShardMaxMatchCount = 100 * k

View File

@ -1730,6 +1730,8 @@ type QuickLink struct {
// Ranking description: Experimental search result ranking options.
type Ranking struct {
// DocumentRanksWeight description: Controls the impact of document ranks on the final ranking when the 'search-ranking' feature is enabled. This is intended for internal testing purposes only, it's not recommended for users to change this.
DocumentRanksWeight *float64 `json:"documentRanksWeight,omitempty"`
// MaxQueueMatchCount description: The maximum number of matches that can be buffered to sort results. The default is -1 (unbounded). Setting this to a positive integer protects frontend against OOMs for queries with extremely high count of matches per repository.
MaxQueueMatchCount *int `json:"maxQueueMatchCount,omitempty"`
// MaxQueueSizeBytes description: The maximum number of bytes that can be buffered to sort results. The default is -1 (unbounded). Setting this to a positive integer protects frontend against OOMs.

View File

@ -407,6 +407,13 @@
"type": "integer",
"default": 0,
"group": "Search"
},
"documentRanksWeight": {
"description": "Controls the impact of document ranks on the final ranking when the 'search-ranking' feature is enabled. This is intended for internal testing purposes only, it's not recommended for users to change this.",
"type": "number",
"default": 4500,
"group": "Search",
"!go": { "pointer": true }
}
}
},