Structural search: fix precise lang filtering (#63791)

The feature `search-content-based-lang-detection` was never implemented
for structural search. This PR fills in the missing pieces.

Fixes #61714
Relates to
https://linear.app/sourcegraph/project/make-precise-lang-filtering-default-4f06cfa28567

## Test plan

Two new unit tests checking that the lang filters are now correctly
passed to Zoekt.
This commit is contained in:
Julie Tibshirani 2024-07-15 00:20:21 -07:00 committed by GitHub
parent 660d6866b5
commit cf05a9b2a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 70 additions and 5 deletions

View File

@ -22,7 +22,7 @@ import (
"github.com/sourcegraph/sourcegraph/lib/errors"
)
func handleFilePathPatterns(patternInfo *protocol.PatternInfo) (query.Q, error) {
func handleSearchFilters(patternInfo *protocol.PatternInfo) (query.Q, error) {
var and []query.Q
// Zoekt uses regular expressions for file paths.
@ -42,10 +42,18 @@ func handleFilePathPatterns(patternInfo *protocol.PatternInfo) (query.Q, error)
and = append(and, &query.Not{Child: q})
}
for _, lang := range patternInfo.IncludeLangs {
and = append(and, &query.Language{Language: lang})
}
for _, lang := range patternInfo.ExcludeLangs {
and = append(and, &query.Not{Child: &query.Language{Language: lang}})
}
return query.NewAnd(and...), nil
}
func buildQuery(pattern string, branchRepos []query.BranchRepos, filePathPatterns query.Q, shortcircuit bool) (query.Q, error) {
func buildQuery(pattern string, branchRepos []query.BranchRepos, filterQuery query.Q, shortcircuit bool) (query.Q, error) {
regexString := comby.StructuralPatToRegexpQuery(pattern, shortcircuit)
if len(regexString) == 0 {
return &query.Const{Value: true}, nil
@ -56,7 +64,7 @@ func buildQuery(pattern string, branchRepos []query.BranchRepos, filePathPattern
}
return query.NewAnd(
&query.BranchesRepos{List: branchRepos},
filePathPatterns,
filterQuery,
&query.Regexp{
Regexp: re,
CaseSensitive: true,
@ -87,13 +95,13 @@ func zoektSearch(ctx context.Context, logger log.Logger, client zoekt.Streamer,
}).ToSearchOptions(ctx)
searchOpts.Whole = true
filePathPatterns, err := handleFilePathPatterns(args)
filterQuery, err := handleSearchFilters(args)
if err != nil {
return err
}
t0 := time.Now()
q, err := buildQuery(atom.Value, branchRepos, filePathPatterns, false)
q, err := buildQuery(atom.Value, branchRepos, filterQuery, false)
if err != nil {
return err
}

View File

@ -62,3 +62,56 @@ func Test_zoektSearch(t *testing.T) {
)
require.Error(t, err)
}
func TestHandleSearchFilters(t *testing.T) {
tests := []struct {
name string
patternInfo *protocol.PatternInfo
expectedQ query.Q
expectedErr error
}{
{
name: "Include and exclude paths",
patternInfo: &protocol.PatternInfo{
IncludePaths: []string{"\\.go", "cmd"},
ExcludePaths: "vendor/",
IsCaseSensitive: false,
},
expectedQ: query.NewAnd(
&query.Substring{Pattern: ".go", FileName: true},
&query.Substring{Pattern: "cmd", FileName: true},
&query.Not{Child: &query.Substring{Pattern: "vendor/", FileName: true}},
),
expectedErr: nil,
},
{
name: "Include and exclude languages",
patternInfo: &protocol.PatternInfo{
IncludePaths: []string{"cmd"},
IncludeLangs: []string{"go", "typescript"},
ExcludeLangs: []string{"javascript"},
},
expectedQ: query.NewAnd(
&query.Substring{Pattern: "cmd", FileName: true},
&query.Language{Language: "go"},
&query.Language{Language: "typescript"},
&query.Not{Child: &query.Language{Language: "javascript"}},
),
expectedErr: nil,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
q, err := handleSearchFilters(tt.patternInfo)
if tt.expectedErr != nil {
require.Error(t, err)
require.Equal(t, tt.expectedErr.Error(), err.Error())
} else {
require.NoError(t, err)
require.Equal(t, tt.expectedQ, q)
}
})
}
}

View File

@ -1515,6 +1515,10 @@ func TestToTextPatternInfo(t *testing.T) {
}, {
input: `repo:^github\.com/sgtest/sourcegraph-typescript$ file:^README\.md "basic :[_] access :[_]" patterntype:structural`,
output: autogold.Expect(`{"Query":{"Value":"\"basic :[_] access :[_]\"","IsNegated":false,"IsRegExp":false},"IsStructuralPat":true,"CombyRule":"","IsCaseSensitive":false,"FileMatchLimit":30,"Index":"yes","Select":[],"IncludePaths":["^README\\.md"],"ExcludePaths":"","IncludeLangs":null,"ExcludeLangs":null,"PathPatternsAreCaseSensitive":false,"PatternMatchesContent":true,"PatternMatchesPath":true,"Languages":null}`),
}, {
input: `repo:^github\.com/sgtest/sourcegraph-typescript$ lang:typescript "basic :[_] access :[_]" patterntype:structural`,
feat: search.Features{ContentBasedLangFilters: true},
output: autogold.Expect(`{"Query":{"Value":"\"basic :[_] access :[_]\"","IsNegated":false,"IsRegExp":false},"IsStructuralPat":true,"CombyRule":"","IsCaseSensitive":false,"FileMatchLimit":30,"Index":"yes","Select":[],"IncludePaths":null,"ExcludePaths":"","IncludeLangs":["TypeScript"],"ExcludeLangs":null,"PathPatternsAreCaseSensitive":false,"PatternMatchesContent":true,"PatternMatchesPath":true,"Languages":["typescript"]}`),
}, {
input: `sgtest lang:magik type:file`,
feat: search.Features{ContentBasedLangFilters: true},