mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 16:51:55 +00:00
chore: Make location fetching queries more uniform (#64026)
This patch changes the location querying code so that:
1. We're populating structures corresponding to SCIP instead of LSIF
(with "scheme" and "identifier" inside "MonikerData")
2. Avoid repeatedly allocating a constant string 'scip' for the scheme
only to throw it away later.
3. Makes the two queries and their scanning code more similar for easier
comparison. When I land precise usagesForSymbol, I will de-duplicate
some of the scanning code between these two queries.
I have avoided renaming all of the local variables to avoid creating
more noise.
## Test plan
Covered by existing tests.
This commit is contained in:
parent
9dd901f3c9
commit
dc7da57edb
@ -7,6 +7,8 @@
|
||||
# If this list is empty, all structs are tested.
|
||||
# Default: []
|
||||
include_types:
|
||||
- '.+Locus$'
|
||||
- '.+Loci$'
|
||||
- '.+UploadMatchingOptions'
|
||||
|
||||
# Same as above, but for exclusion.
|
||||
|
||||
@ -7,11 +7,13 @@ go_library(
|
||||
"cleanup.go",
|
||||
"data_store.go",
|
||||
"insert.go",
|
||||
"locus.go",
|
||||
"observability.go",
|
||||
],
|
||||
importpath = "github.com/sourcegraph/sourcegraph/internal/codeintel/codegraph",
|
||||
visibility = ["//:__subpackages__"],
|
||||
deps = [
|
||||
"//internal/codeintel/core",
|
||||
"//internal/codeintel/shared",
|
||||
"//internal/codeintel/shared/ranges",
|
||||
"//internal/codeintel/shared/trie",
|
||||
|
||||
27
internal/codeintel/codegraph/locus.go
Normal file
27
internal/codeintel/codegraph/locus.go
Normal file
@ -0,0 +1,27 @@
|
||||
package codegraph
|
||||
|
||||
import (
|
||||
"github.com/sourcegraph/scip/bindings/go/scip"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/codeintel/core"
|
||||
)
|
||||
|
||||
// Locus represents a source range within a document as found in the DB.
|
||||
//
|
||||
// We will eventually rename this to Location once we get rid of the
|
||||
// existing Location, LocationData etc. types.
|
||||
type Locus struct {
|
||||
Path core.UploadRelPath
|
||||
Range scip.Range
|
||||
}
|
||||
|
||||
type UploadLoci struct {
|
||||
UploadID int
|
||||
Loci []Locus
|
||||
}
|
||||
|
||||
type UploadSymbolLoci struct {
|
||||
UploadID int
|
||||
Symbol string
|
||||
Loci []Locus
|
||||
}
|
||||
@ -17,6 +17,7 @@ go_library(
|
||||
tags = [TAG_PLATFORM_GRAPH],
|
||||
visibility = ["//:__subpackages__"],
|
||||
deps = [
|
||||
"//internal/codeintel/codegraph",
|
||||
"//internal/codeintel/codenav/shared",
|
||||
"//internal/codeintel/core",
|
||||
"//internal/codeintel/shared",
|
||||
|
||||
@ -11,7 +11,6 @@ import (
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/codeintel/codenav/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/codeintel/core"
|
||||
"github.com/sourcegraph/sourcegraph/internal/collections"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/lib/codeintel/precise"
|
||||
@ -42,43 +41,39 @@ func (s *store) GetBulkMonikerLocations(ctx context.Context, usageKind shared.Us
|
||||
}
|
||||
|
||||
query := sqlf.Sprintf(
|
||||
bulkMonikerResultsQuery,
|
||||
bulkSymbolUsagesQuery,
|
||||
pq.Array(symbolNames),
|
||||
pq.Array(uploadIDs),
|
||||
sqlf.Sprintf(usageKind.RangesColumnName()),
|
||||
sqlf.Sprintf(usageKind.RangesColumnName()),
|
||||
)
|
||||
|
||||
locationData, err := s.scanQualifiedMonikerLocations(s.db.Query(ctx, query))
|
||||
locationData, err := s.scanUploadSymbolLoci(s.db.Query(ctx, query))
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
totalCount = 0
|
||||
for _, monikerLocations := range locationData {
|
||||
totalCount += len(monikerLocations.Locations)
|
||||
for _, data := range locationData {
|
||||
totalCount += len(data.Loci)
|
||||
}
|
||||
trace.AddEvent("TODO Domain Owner",
|
||||
attribute.Int("numUploads", len(locationData)),
|
||||
attribute.Int("totalCount", totalCount))
|
||||
|
||||
max := totalCount
|
||||
if totalCount > limit {
|
||||
max = limit
|
||||
}
|
||||
|
||||
locations := make([]shared.Location, 0, max)
|
||||
locations := make([]shared.Location, 0, min(totalCount, limit))
|
||||
outer:
|
||||
for _, monikerLocations := range locationData {
|
||||
for _, row := range monikerLocations.Locations {
|
||||
for _, uploadSymbolLoci := range locationData {
|
||||
for _, locus := range uploadSymbolLoci.Loci {
|
||||
offset--
|
||||
if offset >= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
locations = append(locations, shared.Location{
|
||||
UploadID: monikerLocations.UploadID,
|
||||
Path: core.NewUploadRelPathUnchecked(row.DocumentPath),
|
||||
Range: shared.NewRange(row.StartLine, row.StartCharacter, row.EndLine, row.EndCharacter),
|
||||
UploadID: uploadSymbolLoci.UploadID,
|
||||
Path: locus.Path,
|
||||
Range: shared.TranslateRange(locus.Range),
|
||||
})
|
||||
|
||||
if len(locations) >= limit {
|
||||
@ -91,18 +86,20 @@ outer:
|
||||
return locations, totalCount, nil
|
||||
}
|
||||
|
||||
const bulkMonikerResultsQuery = `
|
||||
const bulkSymbolUsagesQuery = `
|
||||
WITH RECURSIVE
|
||||
` + symbolIDsCTEs + `
|
||||
SELECT
|
||||
ss.upload_id,
|
||||
'scip',
|
||||
msn.symbol_name,
|
||||
%s,
|
||||
document_path
|
||||
FROM matching_symbol_names msn
|
||||
JOIN codeintel_scip_symbols ss ON ss.upload_id = msn.upload_id AND ss.symbol_id = msn.id
|
||||
JOIN codeintel_scip_document_lookup dl ON dl.id = ss.document_lookup_id
|
||||
FROM codeintel_scip_symbols ss
|
||||
JOIN codeintel_scip_document_lookup dl
|
||||
ON dl.id = ss.document_lookup_id
|
||||
JOIN matching_symbol_names msn
|
||||
ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id
|
||||
WHERE ss.%s IS NOT NULL
|
||||
ORDER BY ss.upload_id, msn.symbol_name
|
||||
`
|
||||
|
||||
@ -409,7 +406,7 @@ func (s *store) GetMinimalBulkMonikerLocations(ctx context.Context, usageKind sh
|
||||
}
|
||||
|
||||
query := sqlf.Sprintf(
|
||||
minimalBulkMonikerResultsQuery,
|
||||
minimalBulkSymbolUsagesQuery,
|
||||
pq.Array(symbolNames),
|
||||
pq.Array(uploadIDs),
|
||||
sqlf.Sprintf(usageKind.RangesColumnName()),
|
||||
@ -417,37 +414,32 @@ func (s *store) GetMinimalBulkMonikerLocations(ctx context.Context, usageKind sh
|
||||
sqlf.Join(skipConds, ", "),
|
||||
)
|
||||
|
||||
locationData, err := s.scanDeduplicatedQualifiedMonikerLocations(s.db.Query(ctx, query))
|
||||
locationData, err := s.scanDeduplicatedUploadLoci(s.db.Query(ctx, query))
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
totalCount = 0
|
||||
for _, monikerLocations := range locationData {
|
||||
totalCount += len(monikerLocations.Locations)
|
||||
for _, data := range locationData {
|
||||
totalCount += len(data.Loci)
|
||||
}
|
||||
trace.AddEvent("TODO Domain Owner",
|
||||
attribute.Int("numUploads", len(locationData)),
|
||||
attribute.Int("totalCount", totalCount))
|
||||
|
||||
max := totalCount
|
||||
if totalCount > limit {
|
||||
max = limit
|
||||
}
|
||||
|
||||
locations := make([]shared.Location, 0, max)
|
||||
locations := make([]shared.Location, 0, min(totalCount, limit))
|
||||
outer:
|
||||
for _, monikerLocations := range locationData {
|
||||
for _, row := range monikerLocations.Locations {
|
||||
for _, uploadLoci := range locationData {
|
||||
for _, locus := range uploadLoci.Loci {
|
||||
offset--
|
||||
if offset >= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
locations = append(locations, shared.Location{
|
||||
UploadID: monikerLocations.UploadID,
|
||||
Path: core.NewUploadRelPathUnchecked(row.DocumentPath),
|
||||
Range: shared.NewRange(row.StartLine, row.StartCharacter, row.EndLine, row.EndCharacter),
|
||||
UploadID: uploadLoci.UploadID,
|
||||
Path: locus.Path,
|
||||
Range: shared.TranslateRange(locus.Range),
|
||||
})
|
||||
|
||||
if len(locations) >= limit {
|
||||
@ -460,7 +452,7 @@ outer:
|
||||
return locations, totalCount, nil
|
||||
}
|
||||
|
||||
const minimalBulkMonikerResultsQuery = `
|
||||
const minimalBulkSymbolUsagesQuery = `
|
||||
WITH RECURSIVE
|
||||
` + symbolIDsCTEs + `
|
||||
SELECT
|
||||
@ -468,8 +460,10 @@ SELECT
|
||||
%s,
|
||||
document_path
|
||||
FROM codeintel_scip_symbols ss
|
||||
JOIN codeintel_scip_document_lookup dl ON dl.id = ss.document_lookup_id
|
||||
JOIN matching_symbol_names msn ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id
|
||||
JOIN codeintel_scip_document_lookup dl
|
||||
ON dl.id = ss.document_lookup_id
|
||||
JOIN matching_symbol_names msn
|
||||
ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id
|
||||
WHERE
|
||||
ss.%s IS NOT NULL AND
|
||||
(ss.upload_id, dl.document_path) NOT IN (%s)
|
||||
|
||||
@ -3,11 +3,12 @@ package lsifstore
|
||||
import (
|
||||
"bytes"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
|
||||
"github.com/sourcegraph/scip/bindings/go/scip"
|
||||
"google.golang.org/protobuf/proto"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/codeintel/codegraph"
|
||||
"github.com/sourcegraph/sourcegraph/internal/codeintel/core"
|
||||
"github.com/sourcegraph/sourcegraph/internal/codeintel/shared/ranges"
|
||||
"github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/shared"
|
||||
"github.com/sourcegraph/sourcegraph/internal/collections"
|
||||
@ -18,7 +19,6 @@ import (
|
||||
type qualifiedDocumentData struct {
|
||||
UploadID int
|
||||
Path string
|
||||
LSIFData *precise.DocumentData
|
||||
SCIPData *scip.Document
|
||||
}
|
||||
|
||||
@ -91,15 +91,16 @@ type qualifiedMonikerLocations struct {
|
||||
precise.MonikerLocations
|
||||
}
|
||||
|
||||
func (s *store) scanQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_ []qualifiedMonikerLocations, err error) {
|
||||
// Post-condition: number of entries == number of unique (upload, symbol, document) triples.
|
||||
func (s *store) scanUploadSymbolLoci(rows *sql.Rows, queryErr error) (_ []codegraph.UploadSymbolLoci, err error) {
|
||||
if queryErr != nil {
|
||||
return nil, queryErr
|
||||
}
|
||||
defer func() { err = basestore.CloseRows(rows, err) }()
|
||||
|
||||
var values []qualifiedMonikerLocations
|
||||
var values []codegraph.UploadSymbolLoci
|
||||
for rows.Next() {
|
||||
record, err := s.scanSingleQualifiedMonikerLocationsObject(rows)
|
||||
record, err := s.scanSingleUploadSymbolLoci(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -110,93 +111,90 @@ func (s *store) scanQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func (s *store) scanSingleQualifiedMonikerLocationsObject(rows *sql.Rows) (qualifiedMonikerLocations, error) {
|
||||
func (s *store) scanSingleUploadSymbolLoci(rows *sql.Rows) (codegraph.UploadSymbolLoci, error) {
|
||||
var uploadID int
|
||||
var symbol string
|
||||
var customEncodedRanges []byte
|
||||
var documentPath string
|
||||
var scipPayload []byte
|
||||
var record qualifiedMonikerLocations
|
||||
|
||||
if err := rows.Scan(&record.UploadID, &record.Scheme, &record.Identifier, &scipPayload, &documentPath); err != nil {
|
||||
return qualifiedMonikerLocations{}, err
|
||||
if err := rows.Scan(&uploadID, &symbol, &customEncodedRanges, &documentPath); err != nil {
|
||||
return codegraph.UploadSymbolLoci{}, err
|
||||
}
|
||||
|
||||
ranges, err := ranges.DecodeRanges(scipPayload)
|
||||
ranges, err := ranges.DecodeRanges(customEncodedRanges)
|
||||
if err != nil {
|
||||
return qualifiedMonikerLocations{}, err
|
||||
return codegraph.UploadSymbolLoci{}, err
|
||||
}
|
||||
|
||||
locations := make([]precise.LocationData, 0, len(ranges))
|
||||
locations := make([]codegraph.Locus, 0, len(ranges))
|
||||
for _, r := range ranges {
|
||||
locations = append(locations, precise.LocationData{
|
||||
DocumentPath: documentPath,
|
||||
StartLine: int(r.Start.Line),
|
||||
StartCharacter: int(r.Start.Character),
|
||||
EndLine: int(r.End.Line),
|
||||
EndCharacter: int(r.End.Character),
|
||||
locations = append(locations, codegraph.Locus{
|
||||
Path: core.NewUploadRelPathUnchecked(documentPath),
|
||||
Range: scip.NewRangeUnchecked([]int32{r.Start.Line, r.Start.Character, r.End.Line, r.End.Character}),
|
||||
})
|
||||
}
|
||||
|
||||
record.Locations = locations
|
||||
return record, nil
|
||||
return codegraph.UploadSymbolLoci{
|
||||
UploadID: uploadID,
|
||||
Symbol: symbol,
|
||||
Loci: locations,
|
||||
}, nil
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
|
||||
func (s *store) scanDeduplicatedQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_ []qualifiedMonikerLocations, err error) {
|
||||
// Post-condition: Returns one entry per upload.
|
||||
func (s *store) scanDeduplicatedUploadLoci(rows *sql.Rows, queryErr error) (_ []codegraph.UploadLoci, err error) {
|
||||
if queryErr != nil {
|
||||
return nil, queryErr
|
||||
}
|
||||
defer func() { err = basestore.CloseRows(rows, err) }()
|
||||
|
||||
var values []qualifiedMonikerLocations
|
||||
var values []codegraph.UploadLoci
|
||||
for rows.Next() {
|
||||
record, err := s.scanSingleMinimalQualifiedMonikerLocationsObject(rows)
|
||||
record, err := s.scanSingleUploadLoci(rows)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TODO: Also use the ordering guarantees for document paths + range sorting
|
||||
// on insertion to replace the Deduplicate with some simple checks here.
|
||||
if n := len(values) - 1; n >= 0 && values[n].UploadID == record.UploadID {
|
||||
values[n].Locations = append(values[n].Locations, record.Locations...)
|
||||
values[n].Loci = append(values[n].Loci, record.Loci...)
|
||||
} else {
|
||||
values = append(values, record)
|
||||
}
|
||||
}
|
||||
for i := range values {
|
||||
values[i].Locations = collections.DeduplicateBy(values[i].Locations, locationDataKey)
|
||||
values[i].Loci = collections.Deduplicate(values[i].Loci)
|
||||
}
|
||||
|
||||
return values, nil
|
||||
}
|
||||
|
||||
func (s *store) scanSingleMinimalQualifiedMonikerLocationsObject(rows *sql.Rows) (qualifiedMonikerLocations, error) {
|
||||
func (s *store) scanSingleUploadLoci(rows *sql.Rows) (codegraph.UploadLoci, error) {
|
||||
var uploadID int
|
||||
var customEncodedRanges []byte
|
||||
var documentPath string
|
||||
var scipPayload []byte
|
||||
var record qualifiedMonikerLocations
|
||||
|
||||
if err := rows.Scan(&record.UploadID, &scipPayload, &documentPath); err != nil {
|
||||
return qualifiedMonikerLocations{}, err
|
||||
if err := rows.Scan(&uploadID, &customEncodedRanges, &documentPath); err != nil {
|
||||
return codegraph.UploadLoci{}, err
|
||||
}
|
||||
|
||||
ranges, err := ranges.DecodeRanges(scipPayload)
|
||||
ranges, err := ranges.DecodeRanges(customEncodedRanges)
|
||||
if err != nil {
|
||||
return qualifiedMonikerLocations{}, err
|
||||
return codegraph.UploadLoci{}, err
|
||||
}
|
||||
|
||||
locations := make([]precise.LocationData, 0, len(ranges))
|
||||
locations := make([]codegraph.Locus, 0, len(ranges))
|
||||
for _, r := range ranges {
|
||||
locations = append(locations, precise.LocationData{
|
||||
DocumentPath: documentPath,
|
||||
StartLine: int(r.Start.Line),
|
||||
StartCharacter: int(r.Start.Character),
|
||||
EndLine: int(r.End.Line),
|
||||
EndCharacter: int(r.End.Character),
|
||||
locations = append(locations, codegraph.Locus{
|
||||
Path: core.NewUploadRelPathUnchecked(documentPath),
|
||||
Range: scip.NewRangeUnchecked([]int32{r.Start.Line, r.Start.Character, r.End.Line, r.End.Character}),
|
||||
})
|
||||
}
|
||||
|
||||
record.Locations = locations
|
||||
return record, nil
|
||||
}
|
||||
|
||||
func locationDataKey(v precise.LocationData) string {
|
||||
return fmt.Sprintf("%s:%d:%d:%d:%d", v.DocumentPath, v.StartLine, v.StartCharacter, v.EndLine, v.EndCharacter)
|
||||
return codegraph.UploadLoci{
|
||||
UploadID: uploadID,
|
||||
Loci: locations,
|
||||
}, nil
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user