chore: Make location fetching queries more uniform (#64026)

This patch changes the location querying code so that:
1. We're populating structures corresponding to SCIP instead of LSIF
   (with "scheme" and "identifier" inside "MonikerData")
2. Avoid repeatedly allocating a constant string 'scip' for the scheme
    only to throw it away later.
3. Makes the two queries and their scanning code more similar for easier
    comparison. When I land precise usagesForSymbol, I will de-duplicate
    some of the scanning code between these two queries.

I have avoided renaming all of the local variables to avoid creating
more noise.

## Test plan

Covered by existing tests.
This commit is contained in:
Varun Gandhi 2024-07-24 17:14:22 +08:00 committed by GitHub
parent 9dd901f3c9
commit dc7da57edb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 111 additions and 87 deletions

View File

@ -7,6 +7,8 @@
# If this list is empty, all structs are tested.
# Default: []
include_types:
- '.+Locus$'
- '.+Loci$'
- '.+UploadMatchingOptions'
# Same as above, but for exclusion.

View File

@ -7,11 +7,13 @@ go_library(
"cleanup.go",
"data_store.go",
"insert.go",
"locus.go",
"observability.go",
],
importpath = "github.com/sourcegraph/sourcegraph/internal/codeintel/codegraph",
visibility = ["//:__subpackages__"],
deps = [
"//internal/codeintel/core",
"//internal/codeintel/shared",
"//internal/codeintel/shared/ranges",
"//internal/codeintel/shared/trie",

View File

@ -0,0 +1,27 @@
package codegraph
import (
"github.com/sourcegraph/scip/bindings/go/scip"
"github.com/sourcegraph/sourcegraph/internal/codeintel/core"
)
// Locus represents a source range within a document as found in the DB.
//
// We will eventually rename this to Location once we get rid of the
// existing Location, LocationData etc. types.
type Locus struct {
Path core.UploadRelPath
Range scip.Range
}
type UploadLoci struct {
UploadID int
Loci []Locus
}
type UploadSymbolLoci struct {
UploadID int
Symbol string
Loci []Locus
}

View File

@ -17,6 +17,7 @@ go_library(
tags = [TAG_PLATFORM_GRAPH],
visibility = ["//:__subpackages__"],
deps = [
"//internal/codeintel/codegraph",
"//internal/codeintel/codenav/shared",
"//internal/codeintel/core",
"//internal/codeintel/shared",

View File

@ -11,7 +11,6 @@ import (
"go.opentelemetry.io/otel/attribute"
"github.com/sourcegraph/sourcegraph/internal/codeintel/codenav/shared"
"github.com/sourcegraph/sourcegraph/internal/codeintel/core"
"github.com/sourcegraph/sourcegraph/internal/collections"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/lib/codeintel/precise"
@ -42,43 +41,39 @@ func (s *store) GetBulkMonikerLocations(ctx context.Context, usageKind shared.Us
}
query := sqlf.Sprintf(
bulkMonikerResultsQuery,
bulkSymbolUsagesQuery,
pq.Array(symbolNames),
pq.Array(uploadIDs),
sqlf.Sprintf(usageKind.RangesColumnName()),
sqlf.Sprintf(usageKind.RangesColumnName()),
)
locationData, err := s.scanQualifiedMonikerLocations(s.db.Query(ctx, query))
locationData, err := s.scanUploadSymbolLoci(s.db.Query(ctx, query))
if err != nil {
return nil, 0, err
}
totalCount = 0
for _, monikerLocations := range locationData {
totalCount += len(monikerLocations.Locations)
for _, data := range locationData {
totalCount += len(data.Loci)
}
trace.AddEvent("TODO Domain Owner",
attribute.Int("numUploads", len(locationData)),
attribute.Int("totalCount", totalCount))
max := totalCount
if totalCount > limit {
max = limit
}
locations := make([]shared.Location, 0, max)
locations := make([]shared.Location, 0, min(totalCount, limit))
outer:
for _, monikerLocations := range locationData {
for _, row := range monikerLocations.Locations {
for _, uploadSymbolLoci := range locationData {
for _, locus := range uploadSymbolLoci.Loci {
offset--
if offset >= 0 {
continue
}
locations = append(locations, shared.Location{
UploadID: monikerLocations.UploadID,
Path: core.NewUploadRelPathUnchecked(row.DocumentPath),
Range: shared.NewRange(row.StartLine, row.StartCharacter, row.EndLine, row.EndCharacter),
UploadID: uploadSymbolLoci.UploadID,
Path: locus.Path,
Range: shared.TranslateRange(locus.Range),
})
if len(locations) >= limit {
@ -91,18 +86,20 @@ outer:
return locations, totalCount, nil
}
const bulkMonikerResultsQuery = `
const bulkSymbolUsagesQuery = `
WITH RECURSIVE
` + symbolIDsCTEs + `
SELECT
ss.upload_id,
'scip',
msn.symbol_name,
%s,
document_path
FROM matching_symbol_names msn
JOIN codeintel_scip_symbols ss ON ss.upload_id = msn.upload_id AND ss.symbol_id = msn.id
JOIN codeintel_scip_document_lookup dl ON dl.id = ss.document_lookup_id
FROM codeintel_scip_symbols ss
JOIN codeintel_scip_document_lookup dl
ON dl.id = ss.document_lookup_id
JOIN matching_symbol_names msn
ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id
WHERE ss.%s IS NOT NULL
ORDER BY ss.upload_id, msn.symbol_name
`
@ -409,7 +406,7 @@ func (s *store) GetMinimalBulkMonikerLocations(ctx context.Context, usageKind sh
}
query := sqlf.Sprintf(
minimalBulkMonikerResultsQuery,
minimalBulkSymbolUsagesQuery,
pq.Array(symbolNames),
pq.Array(uploadIDs),
sqlf.Sprintf(usageKind.RangesColumnName()),
@ -417,37 +414,32 @@ func (s *store) GetMinimalBulkMonikerLocations(ctx context.Context, usageKind sh
sqlf.Join(skipConds, ", "),
)
locationData, err := s.scanDeduplicatedQualifiedMonikerLocations(s.db.Query(ctx, query))
locationData, err := s.scanDeduplicatedUploadLoci(s.db.Query(ctx, query))
if err != nil {
return nil, 0, err
}
totalCount = 0
for _, monikerLocations := range locationData {
totalCount += len(monikerLocations.Locations)
for _, data := range locationData {
totalCount += len(data.Loci)
}
trace.AddEvent("TODO Domain Owner",
attribute.Int("numUploads", len(locationData)),
attribute.Int("totalCount", totalCount))
max := totalCount
if totalCount > limit {
max = limit
}
locations := make([]shared.Location, 0, max)
locations := make([]shared.Location, 0, min(totalCount, limit))
outer:
for _, monikerLocations := range locationData {
for _, row := range monikerLocations.Locations {
for _, uploadLoci := range locationData {
for _, locus := range uploadLoci.Loci {
offset--
if offset >= 0 {
continue
}
locations = append(locations, shared.Location{
UploadID: monikerLocations.UploadID,
Path: core.NewUploadRelPathUnchecked(row.DocumentPath),
Range: shared.NewRange(row.StartLine, row.StartCharacter, row.EndLine, row.EndCharacter),
UploadID: uploadLoci.UploadID,
Path: locus.Path,
Range: shared.TranslateRange(locus.Range),
})
if len(locations) >= limit {
@ -460,7 +452,7 @@ outer:
return locations, totalCount, nil
}
const minimalBulkMonikerResultsQuery = `
const minimalBulkSymbolUsagesQuery = `
WITH RECURSIVE
` + symbolIDsCTEs + `
SELECT
@ -468,8 +460,10 @@ SELECT
%s,
document_path
FROM codeintel_scip_symbols ss
JOIN codeintel_scip_document_lookup dl ON dl.id = ss.document_lookup_id
JOIN matching_symbol_names msn ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id
JOIN codeintel_scip_document_lookup dl
ON dl.id = ss.document_lookup_id
JOIN matching_symbol_names msn
ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id
WHERE
ss.%s IS NOT NULL AND
(ss.upload_id, dl.document_path) NOT IN (%s)

View File

@ -3,11 +3,12 @@ package lsifstore
import (
"bytes"
"database/sql"
"fmt"
"github.com/sourcegraph/scip/bindings/go/scip"
"google.golang.org/protobuf/proto"
"github.com/sourcegraph/sourcegraph/internal/codeintel/codegraph"
"github.com/sourcegraph/sourcegraph/internal/codeintel/core"
"github.com/sourcegraph/sourcegraph/internal/codeintel/shared/ranges"
"github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/shared"
"github.com/sourcegraph/sourcegraph/internal/collections"
@ -18,7 +19,6 @@ import (
type qualifiedDocumentData struct {
UploadID int
Path string
LSIFData *precise.DocumentData
SCIPData *scip.Document
}
@ -91,15 +91,16 @@ type qualifiedMonikerLocations struct {
precise.MonikerLocations
}
func (s *store) scanQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_ []qualifiedMonikerLocations, err error) {
// Post-condition: number of entries == number of unique (upload, symbol, document) triples.
func (s *store) scanUploadSymbolLoci(rows *sql.Rows, queryErr error) (_ []codegraph.UploadSymbolLoci, err error) {
if queryErr != nil {
return nil, queryErr
}
defer func() { err = basestore.CloseRows(rows, err) }()
var values []qualifiedMonikerLocations
var values []codegraph.UploadSymbolLoci
for rows.Next() {
record, err := s.scanSingleQualifiedMonikerLocationsObject(rows)
record, err := s.scanSingleUploadSymbolLoci(rows)
if err != nil {
return nil, err
}
@ -110,93 +111,90 @@ func (s *store) scanQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_
return values, nil
}
func (s *store) scanSingleQualifiedMonikerLocationsObject(rows *sql.Rows) (qualifiedMonikerLocations, error) {
func (s *store) scanSingleUploadSymbolLoci(rows *sql.Rows) (codegraph.UploadSymbolLoci, error) {
var uploadID int
var symbol string
var customEncodedRanges []byte
var documentPath string
var scipPayload []byte
var record qualifiedMonikerLocations
if err := rows.Scan(&record.UploadID, &record.Scheme, &record.Identifier, &scipPayload, &documentPath); err != nil {
return qualifiedMonikerLocations{}, err
if err := rows.Scan(&uploadID, &symbol, &customEncodedRanges, &documentPath); err != nil {
return codegraph.UploadSymbolLoci{}, err
}
ranges, err := ranges.DecodeRanges(scipPayload)
ranges, err := ranges.DecodeRanges(customEncodedRanges)
if err != nil {
return qualifiedMonikerLocations{}, err
return codegraph.UploadSymbolLoci{}, err
}
locations := make([]precise.LocationData, 0, len(ranges))
locations := make([]codegraph.Locus, 0, len(ranges))
for _, r := range ranges {
locations = append(locations, precise.LocationData{
DocumentPath: documentPath,
StartLine: int(r.Start.Line),
StartCharacter: int(r.Start.Character),
EndLine: int(r.End.Line),
EndCharacter: int(r.End.Character),
locations = append(locations, codegraph.Locus{
Path: core.NewUploadRelPathUnchecked(documentPath),
Range: scip.NewRangeUnchecked([]int32{r.Start.Line, r.Start.Character, r.End.Line, r.End.Character}),
})
}
record.Locations = locations
return record, nil
return codegraph.UploadSymbolLoci{
UploadID: uploadID,
Symbol: symbol,
Loci: locations,
}, nil
}
//
//
func (s *store) scanDeduplicatedQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_ []qualifiedMonikerLocations, err error) {
// Post-condition: Returns one entry per upload.
func (s *store) scanDeduplicatedUploadLoci(rows *sql.Rows, queryErr error) (_ []codegraph.UploadLoci, err error) {
if queryErr != nil {
return nil, queryErr
}
defer func() { err = basestore.CloseRows(rows, err) }()
var values []qualifiedMonikerLocations
var values []codegraph.UploadLoci
for rows.Next() {
record, err := s.scanSingleMinimalQualifiedMonikerLocationsObject(rows)
record, err := s.scanSingleUploadLoci(rows)
if err != nil {
return nil, err
}
// TODO: Also use the ordering guarantees for document paths + range sorting
// on insertion to replace the Deduplicate with some simple checks here.
if n := len(values) - 1; n >= 0 && values[n].UploadID == record.UploadID {
values[n].Locations = append(values[n].Locations, record.Locations...)
values[n].Loci = append(values[n].Loci, record.Loci...)
} else {
values = append(values, record)
}
}
for i := range values {
values[i].Locations = collections.DeduplicateBy(values[i].Locations, locationDataKey)
values[i].Loci = collections.Deduplicate(values[i].Loci)
}
return values, nil
}
func (s *store) scanSingleMinimalQualifiedMonikerLocationsObject(rows *sql.Rows) (qualifiedMonikerLocations, error) {
func (s *store) scanSingleUploadLoci(rows *sql.Rows) (codegraph.UploadLoci, error) {
var uploadID int
var customEncodedRanges []byte
var documentPath string
var scipPayload []byte
var record qualifiedMonikerLocations
if err := rows.Scan(&record.UploadID, &scipPayload, &documentPath); err != nil {
return qualifiedMonikerLocations{}, err
if err := rows.Scan(&uploadID, &customEncodedRanges, &documentPath); err != nil {
return codegraph.UploadLoci{}, err
}
ranges, err := ranges.DecodeRanges(scipPayload)
ranges, err := ranges.DecodeRanges(customEncodedRanges)
if err != nil {
return qualifiedMonikerLocations{}, err
return codegraph.UploadLoci{}, err
}
locations := make([]precise.LocationData, 0, len(ranges))
locations := make([]codegraph.Locus, 0, len(ranges))
for _, r := range ranges {
locations = append(locations, precise.LocationData{
DocumentPath: documentPath,
StartLine: int(r.Start.Line),
StartCharacter: int(r.Start.Character),
EndLine: int(r.End.Line),
EndCharacter: int(r.End.Character),
locations = append(locations, codegraph.Locus{
Path: core.NewUploadRelPathUnchecked(documentPath),
Range: scip.NewRangeUnchecked([]int32{r.Start.Line, r.Start.Character, r.End.Line, r.End.Character}),
})
}
record.Locations = locations
return record, nil
}
func locationDataKey(v precise.LocationData) string {
return fmt.Sprintf("%s:%d:%d:%d:%d", v.DocumentPath, v.StartLine, v.StartCharacter, v.EndLine, v.EndCharacter)
return codegraph.UploadLoci{
UploadID: uploadID,
Loci: locations,
}, nil
}