search: Add search.index.revisions to site config (#28231)

This new config value in under experimental features allows mapping a
regex on repo name to a list of revisions to be indexed.

Part of #28028
This commit is contained in:
Tomás Senart 2021-11-29 15:07:22 +01:00 committed by GitHub
parent 7142713108
commit f5cd52bc90
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 140 additions and 8 deletions

View File

@ -198,7 +198,12 @@ func (h *searchIndexerServer) serveConfiguration(w http.ResponseWriter, r *http.
repoIDs[i] = int32(indexedIDs[i])
}
b := searchbackend.GetIndexOptions(&siteConfig, getRepoIndexOptions, getSearchContextRevisions, repoIDs...)
b := searchbackend.GetIndexOptions(
&siteConfig,
getRepoIndexOptions,
getSearchContextRevisions,
repoIDs...,
)
_, _ = w.Write(b)
return nil
}

View File

@ -3,10 +3,11 @@ package backend
import (
"bytes"
"encoding/json"
"regexp"
"sort"
"github.com/google/zoekt"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/schema"
)
@ -90,13 +91,14 @@ func GetIndexOptions(
// strain on gitserver (as ported from zoekt-sourcegraph-indexserver). In
// future we want a more intelligent global limit based on scale.
sema := make(chan struct{}, 32)
results := make([][]byte, len(repos))
getSiteConfigRevisions := siteConfigRevisionsRuleFunc(c)
for i := range repos {
sema <- struct{}{}
go func(i int) {
defer func() { <-sema }()
results[i] = getIndexOptions(c, repos[i], getRepoIndexOptions, getSearchContextRevisions)
results[i] = getIndexOptions(c, repos[i], getRepoIndexOptions, getSearchContextRevisions, getSiteConfigRevisions)
}(i)
}
@ -113,6 +115,7 @@ func getIndexOptions(
repoID int32,
getRepoIndexOptions func(repoID int32) (*RepoIndexOptions, error),
getSearchContextRevisions func(repoID int32) ([]string, error),
getSiteConfigRevisions revsRuleFunc,
) []byte {
opts, err := getRepoIndexOptions(repoID)
if err != nil {
@ -133,9 +136,9 @@ func getIndexOptions(
// Set of branch names. Always index HEAD
branches := map[string]struct{}{"HEAD": {}}
// Add all branches that are referenced by version contexts
if c.ExperimentalFeatures != nil {
for _, rev := range c.ExperimentalFeatures.SearchIndexBranches[opts.Name] {
// Add all branches that are referenced by search.index.branches and search.index.revisions.
if getSiteConfigRevisions != nil {
for _, rev := range getSiteConfigRevisions(opts) {
branches[rev] = struct{}{}
}
}
@ -189,6 +192,48 @@ func getIndexOptions(
return marshal(o)
}
type revsRuleFunc func(*RepoIndexOptions) (revs []string)
func siteConfigRevisionsRuleFunc(c *schema.SiteConfiguration) revsRuleFunc {
if c == nil || c.ExperimentalFeatures == nil {
return nil
}
rules := make([]revsRuleFunc, 0, len(c.ExperimentalFeatures.SearchIndexRevisions))
for _, rule := range c.ExperimentalFeatures.SearchIndexRevisions {
rule := rule
switch {
case rule.Name != "":
namePattern, err := regexp.Compile(rule.Name)
if err != nil {
log15.Error("error compiling regex from search.index.revisions", "regex", rule.Name, "err", err)
continue
}
rules = append(rules, func(o *RepoIndexOptions) []string {
if !namePattern.MatchString(o.Name) {
return nil
}
return rule.Revisions
})
}
}
return func(o *RepoIndexOptions) (matched []string) {
cfg := c.ExperimentalFeatures
if len(cfg.SearchIndexBranches) != 0 {
matched = append(matched, cfg.SearchIndexBranches[o.Name]...)
}
for _, rule := range rules {
matched = append(matched, rule(o)...)
}
return matched
}
}
func getBoolPtr(b *bool, default_ bool) bool {
if b == nil {
return default_

View File

@ -102,7 +102,8 @@ func TestGetIndexOptions(t *testing.T) {
}, {
name: "nosymbols",
conf: schema.SiteConfiguration{
SearchIndexSymbolsEnabled: boolPtr(false)},
SearchIndexSymbolsEnabled: boolPtr(false),
},
repo: REPO,
want: zoektIndexOptions{
RepoID: 1,
@ -139,6 +140,45 @@ func TestGetIndexOptions(t *testing.T) {
{Name: "a", Version: "!a"},
},
},
}, {
name: "conf index revisions",
conf: schema.SiteConfiguration{ExperimentalFeatures: &schema.ExperimentalFeatures{
SearchIndexRevisions: []*schema.SearchIndexRevisionsRule{
{Name: "repo-.*", Revisions: []string{"a"}},
},
}},
repo: REPO,
want: zoektIndexOptions{
RepoID: 1,
Name: "repo-01",
Symbols: true,
Branches: []zoekt.RepositoryBranch{
{Name: "HEAD", Version: "!HEAD"},
{Name: "a", Version: "!a"},
},
},
}, {
name: "conf index revisions and branches",
conf: schema.SiteConfiguration{ExperimentalFeatures: &schema.ExperimentalFeatures{
SearchIndexBranches: map[string][]string{
"repo-01": {"a", "b"},
},
SearchIndexRevisions: []*schema.SearchIndexRevisionsRule{
{Name: "repo-.*", Revisions: []string{"a", "c"}},
},
}},
repo: REPO,
want: zoektIndexOptions{
RepoID: 1,
Name: "repo-01",
Symbols: true,
Branches: []zoekt.RepositoryBranch{
{Name: "HEAD", Version: "!HEAD"},
{Name: "a", Version: "!a"},
{Name: "b", Version: "!b"},
{Name: "c", Version: "!c"},
},
},
}, {
name: "with search context revisions",
conf: schema.SiteConfiguration{},

View File

@ -587,6 +587,8 @@ type ExperimentalFeatures struct {
RateLimitAnonymous int `json:"rateLimitAnonymous,omitempty"`
// SearchIndexBranches description: A map from repository name to a list of extra revs (branch, ref, tag, commit sha, etc) to index for a repository. We always index the default branch ("HEAD") and revisions in version contexts. This allows specifying additional revisions. Sourcegraph can index up to 64 branches per repository.
SearchIndexBranches map[string][]string `json:"search.index.branches,omitempty"`
// SearchIndexRevisions description: An array of objects describing rules for extra revisions (branch, ref, tag, commit sha, etc) to be indexed for all repositories that match them. We always index the default branch ("HEAD") and revisions in version contexts. This allows specifying additional revisions. Sourcegraph can index up to 64 branches per repository.
SearchIndexRevisions []*SearchIndexRevisionsRule `json:"search.index.revisions,omitempty"`
// SearchMultipleRevisionsPerRepository description: DEPRECATED. Always on. Will be removed in 3.19.
SearchMultipleRevisionsPerRepository *bool `json:"searchMultipleRevisionsPerRepository,omitempty"`
// StructuralSearch description: Enables structural search.
@ -1364,6 +1366,12 @@ type SMTPServerConfig struct {
// Username description: The username to use when communicating with the SMTP server.
Username string `json:"username,omitempty"`
}
type SearchIndexRevisionsRule struct {
// Name description: Regular expression which matches against the name of a repository (e.g. "^github\.com/owner/name$").
Name string `json:"name,omitempty"`
// Revisions description: Revisions to index
Revisions []string `json:"revisions"`
}
// SearchLimits description: Limits that search applies for number of repositories searched and timeouts.
type SearchLimits struct {

View File

@ -209,6 +209,40 @@
]
]
},
"search.index.revisions": {
"description": "An array of objects describing rules for extra revisions (branch, ref, tag, commit sha, etc) to be indexed for all repositories that match them. We always index the default branch (\"HEAD\") and revisions in version contexts. This allows specifying additional revisions. Sourcegraph can index up to 64 branches per repository.",
"type": "array",
"items": {
"type": "object",
"title": "SearchIndexRevisionsRule",
"additionalProperties": false,
"required": ["revisions"],
"anyOf": [{ "required": ["name"] }],
"properties": {
"name": {
"description": "Regular expression which matches against the name of a repository (e.g. \"^github\\.com/owner/name$\").",
"type": "string",
"format": "regex"
},
"revisions": {
"description": "Revisions to index",
"type": "array",
"items": {
"type": "string",
"minLength": 1
}
}
}
},
"examples": [
[
{
"name": "^github.com/org/.*",
"revisions": ["3.17", "f6ca985c27486c2df5231ea3526caa4a4108ffb6", "v3.17.1"]
}
]
]
},
"search.index.branches": {
"description": "A map from repository name to a list of extra revs (branch, ref, tag, commit sha, etc) to index for a repository. We always index the default branch (\"HEAD\") and revisions in version contexts. This allows specifying additional revisions. Sourcegraph can index up to 64 branches per repository.",
"type": "object",