mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 17:51:57 +00:00
codeintel: Speed up syntactic and search-based usages using batch APIs (#64078)
Closes https://linear.app/sourcegraph/issue/GRAPH-771/chunk-syntactic-usage-tasks-and-use-batch-apis-to-handle-chunks This is the last step for properly implementing #63971. We split the `candidateFiles` search produces into chunks and process these in parallel using the new batch api on `MappedIndex`. ## Test plan Existing tests continue passing --------- Co-authored-by: Varun Gandhi <varun.gandhi@sourcegraph.com>
This commit is contained in:
parent
51cf4dcba8
commit
3f8620508b
@ -27,12 +27,21 @@ import (
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
// SYNTACTIC_USAGES_DOCUMENTS_CHUNK_SIZE is the batch size for SCIP documents and git diffs we load at a time.
|
||||
//
|
||||
// I collected traces for various sizes (on my local machine) and 20 gave me "nice looking" ones.
|
||||
// In general I expect 100 documents to be on the "higher end" of the number of documents to retrieve
|
||||
// for a single syntactic usage search and 5 concurrent queries and git requests seems like a reasonable
|
||||
// trade-off for concurrency vs load.
|
||||
const SYNTACTIC_USAGES_DOCUMENTS_CHUNK_SIZE = 20
|
||||
|
||||
type candidateMatch struct {
|
||||
range_ scip.Range
|
||||
surroundindContent string
|
||||
}
|
||||
|
||||
type candidateFile struct {
|
||||
path core.RepoRelPath
|
||||
matches []candidateMatch // Guaranteed to be sorted by range
|
||||
didSearchEntireFile bool // Or did we hit the search count limit?
|
||||
}
|
||||
@ -60,16 +69,16 @@ func findCandidateOccurrencesViaSearch(
|
||||
trace observation.TraceLogger,
|
||||
client searchclient.SearchClient,
|
||||
args searchArgs,
|
||||
) (orderedmap.OrderedMap[core.RepoRelPath, candidateFile], error) {
|
||||
) ([]candidateFile, error) {
|
||||
if args.identifier == "" {
|
||||
return *orderedmap.New[core.RepoRelPath, candidateFile](), nil
|
||||
return []candidateFile{}, nil
|
||||
}
|
||||
resultMap := *orderedmap.New[core.RepoRelPath, candidateFile]()
|
||||
// TODO: countLimit should be dependent on the number of requested usages, with a configured global limit
|
||||
// For now we're matching the current web app with 500
|
||||
searchResults, err := executeQuery(ctx, client, trace, args, "file", 500, 0)
|
||||
if err != nil {
|
||||
return resultMap, err
|
||||
return []candidateFile{}, err
|
||||
}
|
||||
|
||||
nonFileMatches := 0
|
||||
@ -109,8 +118,10 @@ func findCandidateOccurrencesViaSearch(
|
||||
return s1.range_.CompareStrict(s2.range_)
|
||||
})
|
||||
// OK to use Unchecked method here as search API only returns repo-root relative paths
|
||||
_, alreadyPresent := resultMap.Set(core.NewRepoRelPathUnchecked(path), candidateFile{
|
||||
relPath := core.NewRepoRelPathUnchecked(path)
|
||||
_, alreadyPresent := resultMap.Set(relPath, candidateFile{
|
||||
matches: matches,
|
||||
path: relPath,
|
||||
didSearchEntireFile: !fileMatch.LimitHit,
|
||||
})
|
||||
if alreadyPresent {
|
||||
@ -129,7 +140,11 @@ func findCandidateOccurrencesViaSearch(
|
||||
trace.Warn("Saw mismatched file paths between chunk matches in the same FileMatch. Report this to the search-platform")
|
||||
}
|
||||
|
||||
return resultMap, nil
|
||||
results := make([]candidateFile, 0, resultMap.Len())
|
||||
for pair := resultMap.Oldest(); pair != nil; pair = pair.Next() {
|
||||
results = append(results, pair.Value)
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
type symbolData struct {
|
||||
@ -309,26 +324,12 @@ func symbolAtRange(
|
||||
func findSyntacticMatchesForCandidateFile(
|
||||
ctx context.Context,
|
||||
trace observation.TraceLogger,
|
||||
mappedIndex MappedIndex,
|
||||
filePath core.RepoRelPath,
|
||||
document MappedDocument,
|
||||
candidateFile candidateFile,
|
||||
) ([]SyntacticMatch, []SearchBasedMatch, *SyntacticUsagesError) {
|
||||
documentOpt, docErr := mappedIndex.GetDocument(ctx, filePath)
|
||||
if docErr != nil {
|
||||
return nil, nil, &SyntacticUsagesError{
|
||||
Code: SU_Fatal,
|
||||
UnderlyingError: docErr,
|
||||
}
|
||||
}
|
||||
document, isSome := documentOpt.Get()
|
||||
if !isSome {
|
||||
return nil, nil, &SyntacticUsagesError{
|
||||
Code: SU_NoSyntacticIndex,
|
||||
}
|
||||
}
|
||||
) ([]SyntacticMatch, []SearchBasedMatch) {
|
||||
filePath := candidateFile.path
|
||||
syntacticMatches := []SyntacticMatch{}
|
||||
searchBasedMatches := []SearchBasedMatch{}
|
||||
|
||||
failedTranslationCount := 0
|
||||
for _, candidateMatch := range candidateFile.matches {
|
||||
foundSyntacticMatch := false
|
||||
@ -361,7 +362,7 @@ func findSyntacticMatchesForCandidateFile(
|
||||
if failedTranslationCount != 0 {
|
||||
trace.Info("findSyntacticMatchesForCandidateFile", log.Int("failedTranslationCount", failedTranslationCount))
|
||||
}
|
||||
return syntacticMatches, searchBasedMatches, nil
|
||||
return syntacticMatches, searchBasedMatches
|
||||
}
|
||||
|
||||
func syntacticUsagesImpl(
|
||||
@ -406,21 +407,31 @@ func syntacticUsagesImpl(
|
||||
}
|
||||
}
|
||||
|
||||
tasks := make([]orderedmap.Pair[core.RepoRelPath, candidateFile], 0, candidateMatches.Len())
|
||||
for pair := candidateMatches.Oldest(); pair != nil; pair = pair.Next() {
|
||||
tasks = append(tasks, *pair)
|
||||
}
|
||||
results := conciter.Map(tasks, func(pair *orderedmap.Pair[core.RepoRelPath, candidateFile]) []SyntacticMatch {
|
||||
// We're assuming the index we found earlier contains the relevant SCIP document
|
||||
// see NOTE(id: single-syntactic-upload)
|
||||
syntacticMatches, _, err := findSyntacticMatchesForCandidateFile(ctx, trace, mappedIndex, (*pair).Key, (*pair).Value)
|
||||
tasks, _ := genslices.ChunkEvery(candidateMatches, SYNTACTIC_USAGES_DOCUMENTS_CHUNK_SIZE)
|
||||
results, err := conciter.MapErr(tasks, func(files *[]candidateFile) ([]SyntacticMatch, error) {
|
||||
paths := genslices.Map(*files, func(f candidateFile) core.RepoRelPath {
|
||||
return f.path
|
||||
})
|
||||
documents, err := mappedIndex.GetDocuments(ctx, paths)
|
||||
if err != nil {
|
||||
// TODO: Errors that are not "no index found in the DB" should be reported
|
||||
// TODO: Track metrics about how often this happens (GRAPH-693)
|
||||
return []SyntacticMatch{}
|
||||
return []SyntacticMatch{}, err
|
||||
}
|
||||
return syntacticMatches
|
||||
results := [][]SyntacticMatch{}
|
||||
for _, file := range *files {
|
||||
if document, ok := documents[file.path]; ok {
|
||||
syntacticMatches, _ := findSyntacticMatchesForCandidateFile(ctx, trace, document, file)
|
||||
results = append(results, syntacticMatches)
|
||||
}
|
||||
}
|
||||
return slices.Concat(results...), nil
|
||||
})
|
||||
if err != nil {
|
||||
return SyntacticUsagesResult{}, PreviousSyntacticSearch{}, &SyntacticUsagesError{
|
||||
Code: SU_Fatal,
|
||||
UnderlyingError: err,
|
||||
}
|
||||
}
|
||||
|
||||
return SyntacticUsagesResult{Matches: slices.Concat(results...)}, PreviousSyntacticSearch{
|
||||
MappedIndex: mappedIndex,
|
||||
SymbolName: symbolName,
|
||||
@ -447,7 +458,7 @@ func searchBasedUsagesImpl(
|
||||
}
|
||||
|
||||
var matchResults struct {
|
||||
candidateMatches orderedmap.OrderedMap[core.RepoRelPath, candidateFile]
|
||||
candidateMatches []candidateFile
|
||||
err error
|
||||
}
|
||||
var symbolResults struct {
|
||||
@ -471,30 +482,36 @@ func searchBasedUsagesImpl(
|
||||
candidateMatches := matchResults.candidateMatches
|
||||
candidateSymbols := symbolResults.candidateSymbols
|
||||
|
||||
tasks := make([]orderedmap.Pair[core.RepoRelPath, candidateFile], 0, candidateMatches.Len())
|
||||
for pair := candidateMatches.Oldest(); pair != nil; pair = pair.Next() {
|
||||
tasks = append(tasks, *pair)
|
||||
}
|
||||
|
||||
results := conciter.Map(tasks, func(pair *orderedmap.Pair[core.RepoRelPath, candidateFile]) []SearchBasedMatch {
|
||||
if index, ok := syntacticIndex.Get(); ok {
|
||||
_, searchBasedMatches, err := findSyntacticMatchesForCandidateFile(ctx, trace, index, pair.Key, pair.Value)
|
||||
tasks, _ := genslices.ChunkEvery(candidateMatches, SYNTACTIC_USAGES_DOCUMENTS_CHUNK_SIZE)
|
||||
results := conciter.Map(tasks, func(files *[]candidateFile) []SearchBasedMatch {
|
||||
documents := map[core.RepoRelPath]MappedDocument{}
|
||||
if mappedIndex, ok := syntacticIndex.Get(); ok {
|
||||
paths := genslices.Map(*files, func(f candidateFile) core.RepoRelPath {
|
||||
return f.path
|
||||
})
|
||||
documentsMap, err := mappedIndex.GetDocuments(ctx, paths)
|
||||
if err == nil {
|
||||
return searchBasedMatches
|
||||
} else {
|
||||
trace.Info("findSyntacticMatches failed, skipping filtering search-based results", log.Error(err))
|
||||
documents = documentsMap
|
||||
}
|
||||
}
|
||||
matches := []SearchBasedMatch{}
|
||||
for _, match := range pair.Value.matches {
|
||||
matches = append(matches, SearchBasedMatch{
|
||||
Path: pair.Key,
|
||||
Range: match.range_,
|
||||
SurroundingContent: match.surroundindContent,
|
||||
IsDefinition: candidateSymbols.Contains(pair.Key, match.range_),
|
||||
})
|
||||
results := [][]SearchBasedMatch{}
|
||||
for _, file := range *files {
|
||||
var searchBasedMatches []SearchBasedMatch
|
||||
if document, ok := documents[file.path]; ok {
|
||||
_, searchBasedMatches = findSyntacticMatchesForCandidateFile(ctx, trace, document, file)
|
||||
} else {
|
||||
for _, match := range file.matches {
|
||||
searchBasedMatches = append(searchBasedMatches, SearchBasedMatch{
|
||||
Path: file.path,
|
||||
Range: match.range_,
|
||||
SurroundingContent: match.surroundindContent,
|
||||
IsDefinition: candidateSymbols.Contains(file.path, match.range_),
|
||||
})
|
||||
}
|
||||
}
|
||||
results = append(results, searchBasedMatches)
|
||||
}
|
||||
return matches
|
||||
return slices.Concat(results...)
|
||||
})
|
||||
return slices.Concat(results...), nil
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user