From dc7da57edb1b5dbfb358b940d32cd40112fa967f Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Wed, 24 Jul 2024 17:14:22 +0800 Subject: [PATCH] chore: Make location fetching queries more uniform (#64026) This patch changes the location querying code so that: 1. We're populating structures corresponding to SCIP instead of LSIF (with "scheme" and "identifier" inside "MonikerData") 2. Avoid repeatedly allocating a constant string 'scip' for the scheme only to throw it away later. 3. Makes the two queries and their scanning code more similar for easier comparison. When I land precise usagesForSymbol, I will de-duplicate some of the scanning code between these two queries. I have avoided renaming all of the local variables to avoid creating more noise. ## Test plan Covered by existing tests. --- dev/linters/exhaustruct/lint-config.yaml | 2 + internal/codeintel/codegraph/BUILD.bazel | 2 + internal/codeintel/codegraph/locus.go | 27 ++++++ .../codenav/internal/lsifstore/BUILD.bazel | 1 + .../lsifstore/locations_by_position.go | 72 +++++++------- .../codenav/internal/lsifstore/scan.go | 94 +++++++++---------- 6 files changed, 111 insertions(+), 87 deletions(-) create mode 100644 internal/codeintel/codegraph/locus.go diff --git a/dev/linters/exhaustruct/lint-config.yaml b/dev/linters/exhaustruct/lint-config.yaml index b4224d006c5..5b38811bd08 100644 --- a/dev/linters/exhaustruct/lint-config.yaml +++ b/dev/linters/exhaustruct/lint-config.yaml @@ -7,6 +7,8 @@ # If this list is empty, all structs are tested. # Default: [] include_types: +- '.+Locus$' +- '.+Loci$' - '.+UploadMatchingOptions' # Same as above, but for exclusion. diff --git a/internal/codeintel/codegraph/BUILD.bazel b/internal/codeintel/codegraph/BUILD.bazel index 26f605dafb5..544be751bbb 100644 --- a/internal/codeintel/codegraph/BUILD.bazel +++ b/internal/codeintel/codegraph/BUILD.bazel @@ -7,11 +7,13 @@ go_library( "cleanup.go", "data_store.go", "insert.go", + "locus.go", "observability.go", ], importpath = "github.com/sourcegraph/sourcegraph/internal/codeintel/codegraph", visibility = ["//:__subpackages__"], deps = [ + "//internal/codeintel/core", "//internal/codeintel/shared", "//internal/codeintel/shared/ranges", "//internal/codeintel/shared/trie", diff --git a/internal/codeintel/codegraph/locus.go b/internal/codeintel/codegraph/locus.go new file mode 100644 index 00000000000..e139109854f --- /dev/null +++ b/internal/codeintel/codegraph/locus.go @@ -0,0 +1,27 @@ +package codegraph + +import ( + "github.com/sourcegraph/scip/bindings/go/scip" + + "github.com/sourcegraph/sourcegraph/internal/codeintel/core" +) + +// Locus represents a source range within a document as found in the DB. +// +// We will eventually rename this to Location once we get rid of the +// existing Location, LocationData etc. types. +type Locus struct { + Path core.UploadRelPath + Range scip.Range +} + +type UploadLoci struct { + UploadID int + Loci []Locus +} + +type UploadSymbolLoci struct { + UploadID int + Symbol string + Loci []Locus +} diff --git a/internal/codeintel/codenav/internal/lsifstore/BUILD.bazel b/internal/codeintel/codenav/internal/lsifstore/BUILD.bazel index 71c90e22d52..b9e4ed8b9bc 100644 --- a/internal/codeintel/codenav/internal/lsifstore/BUILD.bazel +++ b/internal/codeintel/codenav/internal/lsifstore/BUILD.bazel @@ -17,6 +17,7 @@ go_library( tags = [TAG_PLATFORM_GRAPH], visibility = ["//:__subpackages__"], deps = [ + "//internal/codeintel/codegraph", "//internal/codeintel/codenav/shared", "//internal/codeintel/core", "//internal/codeintel/shared", diff --git a/internal/codeintel/codenav/internal/lsifstore/locations_by_position.go b/internal/codeintel/codenav/internal/lsifstore/locations_by_position.go index 19a552afc84..ab2c4019d35 100644 --- a/internal/codeintel/codenav/internal/lsifstore/locations_by_position.go +++ b/internal/codeintel/codenav/internal/lsifstore/locations_by_position.go @@ -11,7 +11,6 @@ import ( "go.opentelemetry.io/otel/attribute" "github.com/sourcegraph/sourcegraph/internal/codeintel/codenav/shared" - "github.com/sourcegraph/sourcegraph/internal/codeintel/core" "github.com/sourcegraph/sourcegraph/internal/collections" "github.com/sourcegraph/sourcegraph/internal/observation" "github.com/sourcegraph/sourcegraph/lib/codeintel/precise" @@ -42,43 +41,39 @@ func (s *store) GetBulkMonikerLocations(ctx context.Context, usageKind shared.Us } query := sqlf.Sprintf( - bulkMonikerResultsQuery, + bulkSymbolUsagesQuery, pq.Array(symbolNames), pq.Array(uploadIDs), sqlf.Sprintf(usageKind.RangesColumnName()), + sqlf.Sprintf(usageKind.RangesColumnName()), ) - locationData, err := s.scanQualifiedMonikerLocations(s.db.Query(ctx, query)) + locationData, err := s.scanUploadSymbolLoci(s.db.Query(ctx, query)) if err != nil { return nil, 0, err } totalCount = 0 - for _, monikerLocations := range locationData { - totalCount += len(monikerLocations.Locations) + for _, data := range locationData { + totalCount += len(data.Loci) } trace.AddEvent("TODO Domain Owner", attribute.Int("numUploads", len(locationData)), attribute.Int("totalCount", totalCount)) - max := totalCount - if totalCount > limit { - max = limit - } - - locations := make([]shared.Location, 0, max) + locations := make([]shared.Location, 0, min(totalCount, limit)) outer: - for _, monikerLocations := range locationData { - for _, row := range monikerLocations.Locations { + for _, uploadSymbolLoci := range locationData { + for _, locus := range uploadSymbolLoci.Loci { offset-- if offset >= 0 { continue } locations = append(locations, shared.Location{ - UploadID: monikerLocations.UploadID, - Path: core.NewUploadRelPathUnchecked(row.DocumentPath), - Range: shared.NewRange(row.StartLine, row.StartCharacter, row.EndLine, row.EndCharacter), + UploadID: uploadSymbolLoci.UploadID, + Path: locus.Path, + Range: shared.TranslateRange(locus.Range), }) if len(locations) >= limit { @@ -91,18 +86,20 @@ outer: return locations, totalCount, nil } -const bulkMonikerResultsQuery = ` +const bulkSymbolUsagesQuery = ` WITH RECURSIVE ` + symbolIDsCTEs + ` SELECT ss.upload_id, - 'scip', msn.symbol_name, %s, document_path -FROM matching_symbol_names msn -JOIN codeintel_scip_symbols ss ON ss.upload_id = msn.upload_id AND ss.symbol_id = msn.id -JOIN codeintel_scip_document_lookup dl ON dl.id = ss.document_lookup_id +FROM codeintel_scip_symbols ss +JOIN codeintel_scip_document_lookup dl + ON dl.id = ss.document_lookup_id +JOIN matching_symbol_names msn + ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id +WHERE ss.%s IS NOT NULL ORDER BY ss.upload_id, msn.symbol_name ` @@ -409,7 +406,7 @@ func (s *store) GetMinimalBulkMonikerLocations(ctx context.Context, usageKind sh } query := sqlf.Sprintf( - minimalBulkMonikerResultsQuery, + minimalBulkSymbolUsagesQuery, pq.Array(symbolNames), pq.Array(uploadIDs), sqlf.Sprintf(usageKind.RangesColumnName()), @@ -417,37 +414,32 @@ func (s *store) GetMinimalBulkMonikerLocations(ctx context.Context, usageKind sh sqlf.Join(skipConds, ", "), ) - locationData, err := s.scanDeduplicatedQualifiedMonikerLocations(s.db.Query(ctx, query)) + locationData, err := s.scanDeduplicatedUploadLoci(s.db.Query(ctx, query)) if err != nil { return nil, 0, err } totalCount = 0 - for _, monikerLocations := range locationData { - totalCount += len(monikerLocations.Locations) + for _, data := range locationData { + totalCount += len(data.Loci) } trace.AddEvent("TODO Domain Owner", attribute.Int("numUploads", len(locationData)), attribute.Int("totalCount", totalCount)) - max := totalCount - if totalCount > limit { - max = limit - } - - locations := make([]shared.Location, 0, max) + locations := make([]shared.Location, 0, min(totalCount, limit)) outer: - for _, monikerLocations := range locationData { - for _, row := range monikerLocations.Locations { + for _, uploadLoci := range locationData { + for _, locus := range uploadLoci.Loci { offset-- if offset >= 0 { continue } locations = append(locations, shared.Location{ - UploadID: monikerLocations.UploadID, - Path: core.NewUploadRelPathUnchecked(row.DocumentPath), - Range: shared.NewRange(row.StartLine, row.StartCharacter, row.EndLine, row.EndCharacter), + UploadID: uploadLoci.UploadID, + Path: locus.Path, + Range: shared.TranslateRange(locus.Range), }) if len(locations) >= limit { @@ -460,7 +452,7 @@ outer: return locations, totalCount, nil } -const minimalBulkMonikerResultsQuery = ` +const minimalBulkSymbolUsagesQuery = ` WITH RECURSIVE ` + symbolIDsCTEs + ` SELECT @@ -468,8 +460,10 @@ SELECT %s, document_path FROM codeintel_scip_symbols ss -JOIN codeintel_scip_document_lookup dl ON dl.id = ss.document_lookup_id -JOIN matching_symbol_names msn ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id +JOIN codeintel_scip_document_lookup dl + ON dl.id = ss.document_lookup_id +JOIN matching_symbol_names msn + ON msn.upload_id = ss.upload_id AND msn.id = ss.symbol_id WHERE ss.%s IS NOT NULL AND (ss.upload_id, dl.document_path) NOT IN (%s) diff --git a/internal/codeintel/codenav/internal/lsifstore/scan.go b/internal/codeintel/codenav/internal/lsifstore/scan.go index d82297e1d97..70e7fbef5e7 100644 --- a/internal/codeintel/codenav/internal/lsifstore/scan.go +++ b/internal/codeintel/codenav/internal/lsifstore/scan.go @@ -3,11 +3,12 @@ package lsifstore import ( "bytes" "database/sql" - "fmt" "github.com/sourcegraph/scip/bindings/go/scip" "google.golang.org/protobuf/proto" + "github.com/sourcegraph/sourcegraph/internal/codeintel/codegraph" + "github.com/sourcegraph/sourcegraph/internal/codeintel/core" "github.com/sourcegraph/sourcegraph/internal/codeintel/shared/ranges" "github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/shared" "github.com/sourcegraph/sourcegraph/internal/collections" @@ -18,7 +19,6 @@ import ( type qualifiedDocumentData struct { UploadID int Path string - LSIFData *precise.DocumentData SCIPData *scip.Document } @@ -91,15 +91,16 @@ type qualifiedMonikerLocations struct { precise.MonikerLocations } -func (s *store) scanQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_ []qualifiedMonikerLocations, err error) { +// Post-condition: number of entries == number of unique (upload, symbol, document) triples. +func (s *store) scanUploadSymbolLoci(rows *sql.Rows, queryErr error) (_ []codegraph.UploadSymbolLoci, err error) { if queryErr != nil { return nil, queryErr } defer func() { err = basestore.CloseRows(rows, err) }() - var values []qualifiedMonikerLocations + var values []codegraph.UploadSymbolLoci for rows.Next() { - record, err := s.scanSingleQualifiedMonikerLocationsObject(rows) + record, err := s.scanSingleUploadSymbolLoci(rows) if err != nil { return nil, err } @@ -110,93 +111,90 @@ func (s *store) scanQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_ return values, nil } -func (s *store) scanSingleQualifiedMonikerLocationsObject(rows *sql.Rows) (qualifiedMonikerLocations, error) { +func (s *store) scanSingleUploadSymbolLoci(rows *sql.Rows) (codegraph.UploadSymbolLoci, error) { + var uploadID int + var symbol string + var customEncodedRanges []byte var documentPath string - var scipPayload []byte - var record qualifiedMonikerLocations - - if err := rows.Scan(&record.UploadID, &record.Scheme, &record.Identifier, &scipPayload, &documentPath); err != nil { - return qualifiedMonikerLocations{}, err + if err := rows.Scan(&uploadID, &symbol, &customEncodedRanges, &documentPath); err != nil { + return codegraph.UploadSymbolLoci{}, err } - ranges, err := ranges.DecodeRanges(scipPayload) + ranges, err := ranges.DecodeRanges(customEncodedRanges) if err != nil { - return qualifiedMonikerLocations{}, err + return codegraph.UploadSymbolLoci{}, err } - locations := make([]precise.LocationData, 0, len(ranges)) + locations := make([]codegraph.Locus, 0, len(ranges)) for _, r := range ranges { - locations = append(locations, precise.LocationData{ - DocumentPath: documentPath, - StartLine: int(r.Start.Line), - StartCharacter: int(r.Start.Character), - EndLine: int(r.End.Line), - EndCharacter: int(r.End.Character), + locations = append(locations, codegraph.Locus{ + Path: core.NewUploadRelPathUnchecked(documentPath), + Range: scip.NewRangeUnchecked([]int32{r.Start.Line, r.Start.Character, r.End.Line, r.End.Character}), }) } - record.Locations = locations - return record, nil + return codegraph.UploadSymbolLoci{ + UploadID: uploadID, + Symbol: symbol, + Loci: locations, + }, nil } // // -func (s *store) scanDeduplicatedQualifiedMonikerLocations(rows *sql.Rows, queryErr error) (_ []qualifiedMonikerLocations, err error) { +// Post-condition: Returns one entry per upload. +func (s *store) scanDeduplicatedUploadLoci(rows *sql.Rows, queryErr error) (_ []codegraph.UploadLoci, err error) { if queryErr != nil { return nil, queryErr } defer func() { err = basestore.CloseRows(rows, err) }() - var values []qualifiedMonikerLocations + var values []codegraph.UploadLoci for rows.Next() { - record, err := s.scanSingleMinimalQualifiedMonikerLocationsObject(rows) + record, err := s.scanSingleUploadLoci(rows) if err != nil { return nil, err } + // TODO: Also use the ordering guarantees for document paths + range sorting + // on insertion to replace the Deduplicate with some simple checks here. if n := len(values) - 1; n >= 0 && values[n].UploadID == record.UploadID { - values[n].Locations = append(values[n].Locations, record.Locations...) + values[n].Loci = append(values[n].Loci, record.Loci...) } else { values = append(values, record) } } for i := range values { - values[i].Locations = collections.DeduplicateBy(values[i].Locations, locationDataKey) + values[i].Loci = collections.Deduplicate(values[i].Loci) } return values, nil } -func (s *store) scanSingleMinimalQualifiedMonikerLocationsObject(rows *sql.Rows) (qualifiedMonikerLocations, error) { +func (s *store) scanSingleUploadLoci(rows *sql.Rows) (codegraph.UploadLoci, error) { + var uploadID int + var customEncodedRanges []byte var documentPath string - var scipPayload []byte - var record qualifiedMonikerLocations - - if err := rows.Scan(&record.UploadID, &scipPayload, &documentPath); err != nil { - return qualifiedMonikerLocations{}, err + if err := rows.Scan(&uploadID, &customEncodedRanges, &documentPath); err != nil { + return codegraph.UploadLoci{}, err } - ranges, err := ranges.DecodeRanges(scipPayload) + ranges, err := ranges.DecodeRanges(customEncodedRanges) if err != nil { - return qualifiedMonikerLocations{}, err + return codegraph.UploadLoci{}, err } - locations := make([]precise.LocationData, 0, len(ranges)) + locations := make([]codegraph.Locus, 0, len(ranges)) for _, r := range ranges { - locations = append(locations, precise.LocationData{ - DocumentPath: documentPath, - StartLine: int(r.Start.Line), - StartCharacter: int(r.Start.Character), - EndLine: int(r.End.Line), - EndCharacter: int(r.End.Character), + locations = append(locations, codegraph.Locus{ + Path: core.NewUploadRelPathUnchecked(documentPath), + Range: scip.NewRangeUnchecked([]int32{r.Start.Line, r.Start.Character, r.End.Line, r.End.Character}), }) } - record.Locations = locations - return record, nil -} - -func locationDataKey(v precise.LocationData) string { - return fmt.Sprintf("%s:%d:%d:%d:%d", v.DocumentPath, v.StartLine, v.StartCharacter, v.EndLine, v.EndCharacter) + return codegraph.UploadLoci{ + UploadID: uploadID, + Loci: locations, + }, nil }