mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 14:51:44 +00:00
Context: match on filename (#61664)
Currently, Cody keyword context only matches on file contents. For many queries it's critical to match the filename too. The reason we only matched contents is purely historical -- I just chose the simplest implementation at first. Examples of when this is important: * "who owns third party licenses?" should match `third-party-licenses/CODEOWNERS` * "grafana version deps.bzl" should clearly match `deps.bzl` This PR extends the searches to match filename too. It doesn't immediately improve results for most Ownership or Changelog queries, because Zoekt does not count filename matches towards a result's score. I'll fix this in follow-up work.
This commit is contained in:
parent
b5014ccda9
commit
f63cda7b30
@ -40,16 +40,21 @@ go_library(
|
||||
|
||||
go_test(
|
||||
name = "codycontext_test",
|
||||
srcs = ["filter_test.go"],
|
||||
srcs = [
|
||||
"context_test.go",
|
||||
"filter_test.go",
|
||||
],
|
||||
embed = [":codycontext"],
|
||||
deps = [
|
||||
"//internal/api",
|
||||
"//internal/conf",
|
||||
"//internal/gitserver",
|
||||
"//internal/search/result",
|
||||
"//internal/types",
|
||||
"//lib/errors",
|
||||
"//lib/pointers",
|
||||
"//schema",
|
||||
"@com_github_google_go_cmp//cmp",
|
||||
"@com_github_sourcegraph_log//logtest",
|
||||
"@com_github_stretchr_testify//require",
|
||||
],
|
||||
|
||||
@ -3,7 +3,6 @@ package codycontext
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
@ -268,8 +267,8 @@ func (c *CodyContextClient) getKeywordContext(ctx context.Context, args GetConte
|
||||
regexEscapedRepoNames[i] = regexp.QuoteMeta(string(repo.Name))
|
||||
}
|
||||
|
||||
textQuery := fmt.Sprintf(`repo:^%s$ %s content:%s`, query.UnionRegExps(regexEscapedRepoNames), textFileFilter, strconv.Quote(args.Query))
|
||||
codeQuery := fmt.Sprintf(`repo:^%s$ -%s content:%s`, query.UnionRegExps(regexEscapedRepoNames), textFileFilter, strconv.Quote(args.Query))
|
||||
textQuery := fmt.Sprintf(`repo:^%s$ type:file type:path %s %s`, query.UnionRegExps(regexEscapedRepoNames), textFileFilter, args.Query)
|
||||
codeQuery := fmt.Sprintf(`repo:^%s$ type:file type:path -%s %s`, query.UnionRegExps(regexEscapedRepoNames), textFileFilter, args.Query)
|
||||
|
||||
doSearch := func(ctx context.Context, query string, limit int) ([]FileChunkContext, error) {
|
||||
if limit == 0 {
|
||||
@ -344,7 +343,15 @@ func (c *CodyContextClient) getKeywordContext(ctx context.Context, args GetConte
|
||||
|
||||
func fileMatchToContextMatches(fm *result.FileMatch) []FileChunkContext {
|
||||
if len(fm.ChunkMatches) == 0 {
|
||||
return nil
|
||||
// If this is a filename-only match, we return the first 20 lines of the file.
|
||||
return []FileChunkContext{{
|
||||
RepoName: fm.Repo.Name,
|
||||
RepoID: fm.Repo.ID,
|
||||
CommitID: fm.CommitID,
|
||||
Path: fm.Path,
|
||||
StartLine: 0,
|
||||
EndLine: 20,
|
||||
}}
|
||||
}
|
||||
|
||||
// To provide some context variety, we just use the top-ranked
|
||||
|
||||
75
internal/codycontext/context_test.go
Normal file
75
internal/codycontext/context_test.go
Normal file
@ -0,0 +1,75 @@
|
||||
package codycontext
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/internal/types"
|
||||
)
|
||||
|
||||
func TestFileMatchToContextMatches(t *testing.T) {
|
||||
cases := []struct {
|
||||
fileMatch *result.FileMatch
|
||||
want []FileChunkContext
|
||||
}{
|
||||
{
|
||||
// No chunk matches returns first 20 lines
|
||||
fileMatch: &result.FileMatch{
|
||||
File: result.File{
|
||||
Path: "main.go",
|
||||
CommitID: "abc123",
|
||||
Repo: types.MinimalRepo{
|
||||
Name: "repo",
|
||||
ID: 1,
|
||||
},
|
||||
},
|
||||
ChunkMatches: nil,
|
||||
},
|
||||
want: []FileChunkContext{{
|
||||
RepoName: "repo",
|
||||
RepoID: 1,
|
||||
CommitID: "abc123",
|
||||
Path: "main.go",
|
||||
StartLine: 0,
|
||||
EndLine: 20,
|
||||
}},
|
||||
},
|
||||
{
|
||||
// With chunk match returns context around first chunk
|
||||
fileMatch: &result.FileMatch{
|
||||
File: result.File{
|
||||
Path: "main.go",
|
||||
CommitID: "abc123",
|
||||
Repo: types.MinimalRepo{
|
||||
Name: "repo",
|
||||
ID: 1,
|
||||
},
|
||||
},
|
||||
ChunkMatches: []result.ChunkMatch{{
|
||||
Content: "first chunk of content",
|
||||
ContentStart: result.Location{Line: 90, Column: 2},
|
||||
}, {
|
||||
Content: "second chunk of content",
|
||||
ContentStart: result.Location{Line: 37, Column: 10},
|
||||
}},
|
||||
},
|
||||
want: []FileChunkContext{{
|
||||
RepoName: "repo",
|
||||
RepoID: 1,
|
||||
CommitID: "abc123",
|
||||
Path: "main.go",
|
||||
StartLine: 85,
|
||||
EndLine: 105,
|
||||
}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
got := fileMatchToContextMatches(tc.fileMatch)
|
||||
if diff := cmp.Diff(tc.want, got); diff != "" {
|
||||
t.Errorf("mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -31,11 +31,8 @@ func nodeToPatternsAndParameters(rootNode query.Node) ([]string, []query.Paramet
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
patterns := []string{}
|
||||
parameters := []query.Parameter{
|
||||
// Only search file content
|
||||
{Field: query.FieldType, Value: "file"},
|
||||
}
|
||||
var patterns []string
|
||||
var parameters []query.Parameter
|
||||
|
||||
switch operator.Kind {
|
||||
case query.And:
|
||||
@ -49,7 +46,7 @@ func nodeToPatternsAndParameters(rootNode query.Node) ([]string, []query.Paramet
|
||||
if op.Field == query.FieldContent {
|
||||
// Split any content field on white space into a set of patterns
|
||||
patterns = append(patterns, strings.Fields(op.Value)...)
|
||||
} else if op.Field != query.FieldCase && op.Field != query.FieldType {
|
||||
} else if op.Field != query.FieldCase {
|
||||
parameters = append(parameters, op)
|
||||
}
|
||||
case query.Pattern:
|
||||
|
||||
@ -50,32 +50,32 @@ func TestQueryStringToKeywordQuery(t *testing.T) {
|
||||
}{
|
||||
{
|
||||
query: "context:global abc",
|
||||
wantQuery: autogold.Expect("type:file context:global abc"),
|
||||
wantQuery: autogold.Expect("context:global abc"),
|
||||
wantPatterns: autogold.Expect([]string{"abc"}),
|
||||
},
|
||||
{
|
||||
query: "abc def",
|
||||
wantQuery: autogold.Expect("type:file (abc OR def)"),
|
||||
wantQuery: autogold.Expect("(abc OR def)"),
|
||||
wantPatterns: autogold.Expect([]string{"abc", "def"}),
|
||||
},
|
||||
{
|
||||
query: "context:global lang:Go how to unzip file",
|
||||
wantQuery: autogold.Expect("type:file context:global lang:Go (unzip OR file)"),
|
||||
wantQuery: autogold.Expect("context:global lang:Go (unzip OR file)"),
|
||||
wantPatterns: autogold.Expect([]string{"unzip", "file"}),
|
||||
},
|
||||
{
|
||||
query: "K MEANS CLUSTERING in python",
|
||||
wantQuery: autogold.Expect("type:file (cluster OR python)"),
|
||||
wantQuery: autogold.Expect("(cluster OR python)"),
|
||||
wantPatterns: autogold.Expect([]string{"cluster", "python"}),
|
||||
},
|
||||
{
|
||||
query: "context:global the who",
|
||||
wantQuery: autogold.Expect("type:file context:global"),
|
||||
wantQuery: autogold.Expect("context:global"),
|
||||
wantPatterns: autogold.Expect([]string{}),
|
||||
},
|
||||
{
|
||||
query: `outer content:"inner {with} (special) ^characters$ and keywords like file or repo"`,
|
||||
wantQuery: autogold.Expect("type:file (special OR ^characters$ OR keyword OR file OR repo OR outer)"),
|
||||
wantQuery: autogold.Expect("(special OR ^characters$ OR keyword OR file OR repo OR outer)"),
|
||||
wantPatterns: autogold.Expect([]string{
|
||||
"special", "^characters$", "keyword", "file",
|
||||
"repo",
|
||||
|
||||
Loading…
Reference in New Issue
Block a user