diff --git a/cmd/frontend/graphqlbackend/codeintel.codenav.graphql b/cmd/frontend/graphqlbackend/codeintel.codenav.graphql index 1de7bbdd597..7d55a31e3a5 100644 --- a/cmd/frontend/graphqlbackend/codeintel.codenav.graphql +++ b/cmd/frontend/graphqlbackend/codeintel.codenav.graphql @@ -47,6 +47,170 @@ extend type GitBlob { Experimental: This API is likely to change in the future. """ symbolInfo(line: Int!, character: Int!): SymbolInfo + + """ + Return the code graph data associated with this blob. + + If there are multiple tools (i.e. name and version pairs) which + have uploaded precise indexes for this blob, then this API will + return multiple results even if + filter == { provenance: { equals: Precise } }. + + Commit matching is done based on graph order. + For merge commits or their children, it is possible that the + same blob may have code graph data at different ancestors, + in which case this API will return multiple results. + + EXPERIMENTAL: This API may change in the future. + """ + codeGraphData(filter: CodeGraphDataFilter): [CodeGraphData!] +} + +""" +EXPERIMENTAL: This type may change in a backwards-incompatible way. +""" +input CodeGraphDataFilter { + """ + If this field is not set, then the codeGraphData API + will go through each provenance each provenance one by one + in the order Precise -> Syntactic -> SearchBased + and stop when some data is available. + """ + provenance: CodeGraphDataProvenanceComparator +} + +""" +EXPERIMENTAL: This type may change in a backwards-incompatible way. +""" +input CodeGraphDataProvenanceComparator { + """ + Checks for exact equality. + """ + equals: CodeGraphDataProvenance +} + +""" +EXPERIMENTAL: This type may change in a backwards-incompatible way. + +TODO(issue: GRAPH-614): 'commit' field should have type GitCommit +before stabilizing this API. +""" +type CodeGraphData { + """ + Coarse description of the data source for this code graph data. + """ + provenance: CodeGraphDataProvenance! + + """ + The commit associated with this code graph data. + + In general, this will be an ancestor of the commit at which code + graph data was requested, as code graph data may not be available + at the exact commit for the blob. + """ + commit: String! + + """ + Information about the tool which generated this code graph data + """ + toolInfo: CodeGraphToolInfo + + """ + Occurrences are guaranteed to be sorted by range. It is possible + for there to be multiple occurrences for the same exact source range. + """ + occurrences(first: Int, after: String): SCIPOccurrenceConnection +} + +""" +Identifies the tool which generated some code graph data. +""" +type CodeGraphToolInfo { + """ + The name of the tool, e.g. scip-java. + """ + name: String + """ + The version of the tool, e.g. v1.0.0 or some SHA. + """ + version: String +} + +""" +Connection type for a list of occurrences. +""" +type SCIPOccurrenceConnection { + """ + List of occurrences within a given page. + """ + nodes: [SCIPOccurrence!]! + + """ + Pagination information. + """ + pageInfo: PageInfo! +} + +""" +EXPERIMENTAL: This type may change in a backwards-incompatible way. +""" +enum CodeGraphDataProvenance { + """ + Based on a compiler, a type-checker or a similar data source + which doesn't have false positives. + Generally, the results are specific to a particular build configuration, + such as for a specific OS or CPU, which can matter for + codebases having a large amount of platform-specific code. + """ + PRECISE + """ + Based on a data source that uses an abstract or concrete syntax + tree, but without access to reliable type information. + """ + SYNTACTIC + """ + Based on a data source that only does textual analysis, say + using regular expressions. + """ + SEARCH_BASED +} + +""" +Metadata for a given (repo, commit, path, range) tuple. + +EXPERIMENTAL: This type may change in a backwards-incompatible way. +""" +type SCIPOccurrence { + """ + Symbol name using syntax specified by the SCIP schema. + https://github.com/sourcegraph/scip/blob/main/scip.proto#L147-L188 + """ + symbol: String + """ + Source range associated with this occurrence. + """ + range: Range! + """ + Description of the role a symbol plays at a particular source range. + + The 'Definition' and 'Reference' roles are mutually exclusive (exactly + one of those will be present in the array below). + """ + roles: [SymbolRole!] + # We can add diagnostics etc. here in the future if needed. +} + +""" +EXPERIMENTAL: This type may change in a backwards-compatible way. +""" +enum SymbolRole { + DEFINITION + REFERENCE + """ + Applicable for forward declarations in languages with header files (C, C++ etc.) + as well as standalone signatures in languages with separate interface files (OCaml etc.). + """ + FORWARD_DEFINITION } """ diff --git a/cmd/frontend/graphqlbackend/git_tree_entry.go b/cmd/frontend/graphqlbackend/git_tree_entry.go index 077d9534f9c..32a30570ef1 100644 --- a/cmd/frontend/graphqlbackend/git_tree_entry.go +++ b/cmd/frontend/graphqlbackend/git_tree_entry.go @@ -472,6 +472,19 @@ func (r *GitTreeEntryResolver) LSIF(ctx context.Context, args *struct{ ToolName }) } +func (r *GitTreeEntryResolver) CodeGraphData(ctx context.Context, args *resolverstubs.CodeGraphDataArgs) (*[]resolverstubs.CodeGraphDataResolver, error) { + repo, err := r.commit.repoResolver.getRepo(ctx) + if err != nil { + return nil, err + } + return EnterpriseResolvers.codeIntelResolver.CodeGraphData(ctx, &resolverstubs.CodeGraphDataOpts{ + Args: args, + Repo: repo, + Commit: api.CommitID(r.Commit().OID()), + Path: r.Path(), + }) +} + func (r *GitTreeEntryResolver) LocalCodeIntel(ctx context.Context) (*JSONValue, error) { repo, err := r.commit.repoResolver.getRepo(ctx) if err != nil { diff --git a/deps.bzl b/deps.bzl index 78037a9536b..60fc792f8e2 100644 --- a/deps.bzl +++ b/deps.bzl @@ -5683,6 +5683,12 @@ def go_dependencies(): ], build_file_proto_mode = "disable_global", importpath = "github.com/sourcegraph/scip", + patch_args = ["-p1"], + # For some reason, this isn't being included automatically, causing a test failure + # when we try to use the dependency. + patches = [ + "//third_party/com_github_sourcegraph_scip:add_parser_h_to_srcs.patch", + ], sum = "h1:3EOkChYOntwHl0pPSAju7rj0oRuujh8owC4vjGDEr0s=", version = "v0.3.3", ) diff --git a/internal/codeintel/codenav/internal/lsifstore/store.go b/internal/codeintel/codenav/internal/lsifstore/store.go index 01a41d4a46b..4dd62cc028f 100644 --- a/internal/codeintel/codenav/internal/lsifstore/store.go +++ b/internal/codeintel/codenav/internal/lsifstore/store.go @@ -13,10 +13,11 @@ import ( ) type LsifStore interface { - // Whole-document metadata + // Whole-document data GetPathExists(ctx context.Context, bundleID int, path string) (bool, error) GetStencil(ctx context.Context, bundleID int, path string) ([]shared.Range, error) GetRanges(ctx context.Context, bundleID int, path string, startLine, endLine int) ([]shared.CodeIntelligenceRange, error) + SCIPDocument(ctx context.Context, uploadID int, path string) (_ *scip.Document, err error) // Fetch symbol names by position GetMonikersByPosition(ctx context.Context, uploadID int, path string, line, character int) ([][]precise.MonikerData, error) @@ -33,7 +34,6 @@ type LsifStore interface { // Metadata by position GetHover(ctx context.Context, bundleID int, path string, line, character int) (string, shared.Range, bool, error) GetDiagnostics(ctx context.Context, bundleID int, prefix string, limit, offset int) ([]shared.Diagnostic, int, error) - SCIPDocument(ctx context.Context, uploadID int, path string) (_ *scip.Document, err error) // Extraction methods ExtractDefinitionLocationsFromPosition(ctx context.Context, locationKey LocationKey) ([]shared.Location, []string, error) diff --git a/internal/codeintel/codenav/service.go b/internal/codeintel/codenav/service.go index 4d14ddc870c..c6f34a375b5 100644 --- a/internal/codeintel/codenav/service.go +++ b/internal/codeintel/codenav/service.go @@ -945,3 +945,7 @@ func (s *Service) SnapshotForDocument(ctx context.Context, repositoryID int, com return } + +func (s *Service) SCIPDocument(ctx context.Context, uploadID int, path string) (*scip.Document, error) { + return s.lsifstore.SCIPDocument(ctx, uploadID, path) +} diff --git a/internal/codeintel/codenav/transport/graphql/BUILD.bazel b/internal/codeintel/codenav/transport/graphql/BUILD.bazel index b053833df75..ba396896b77 100644 --- a/internal/codeintel/codenav/transport/graphql/BUILD.bazel +++ b/internal/codeintel/codenav/transport/graphql/BUILD.bazel @@ -8,6 +8,7 @@ go_library( "iface.go", "observability.go", "root_resolver.go", + "root_resolver_code_graph.go", "root_resolver_definitions.go", "root_resolver_diagnostics.go", "root_resolver_hover.go", @@ -23,6 +24,7 @@ go_library( tags = [TAG_PLATFORM_GRAPH], visibility = ["//:__subpackages__"], deps = [ + "//cmd/frontend/graphqlbackend/graphqlutil", "//internal/api", "//internal/authz", "//internal/codeintel/codenav", @@ -42,6 +44,8 @@ go_library( "@com_github_graph_gophers_graphql_go//:graphql-go", "@com_github_sourcegraph_go_lsp//:go-lsp", "@com_github_sourcegraph_log//:log", + "@com_github_sourcegraph_scip//bindings/go/scip", + "@com_github_wk8_go_ordered_map_v2//:go-ordered-map", "@io_opentelemetry_go_otel//attribute", ], ) @@ -67,7 +71,13 @@ go_test( "//internal/gitserver/gitdomain", "//internal/observation", "//internal/types", + "//lib/errors", + "//lib/pointers", "@com_github_derision_test_go_mockgen_v2//testutil/require", + "@com_github_hexops_autogold_v2//:autogold", + "@com_github_sourcegraph_scip//bindings/go/scip", + "@com_github_sourcegraph_scip//cmd/scip/tests/reprolang/bindings/go/repro", + "@com_github_stretchr_testify//require", ], ) diff --git a/internal/codeintel/codenav/transport/graphql/iface.go b/internal/codeintel/codenav/transport/graphql/iface.go index 6f2b08cb274..df2734420c7 100644 --- a/internal/codeintel/codenav/transport/graphql/iface.go +++ b/internal/codeintel/codenav/transport/graphql/iface.go @@ -3,6 +3,8 @@ package graphql import ( "context" + "github.com/sourcegraph/scip/bindings/go/scip" + "github.com/sourcegraph/sourcegraph/internal/codeintel/codenav" "github.com/sourcegraph/sourcegraph/internal/codeintel/codenav/shared" uploadsshared "github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/shared" @@ -22,6 +24,7 @@ type CodeNavService interface { GetClosestCompletedUploadsForBlob(context.Context, uploadsshared.UploadMatchingOptions) (_ []uploadsshared.CompletedUpload, err error) VisibleUploadsForPath(ctx context.Context, requestState codenav.RequestState) ([]uploadsshared.CompletedUpload, error) SnapshotForDocument(ctx context.Context, repositoryID int, commit, path string, uploadID int) (data []shared.SnapshotData, err error) + SCIPDocument(ctx context.Context, uploadID int, path string) (*scip.Document, error) } type AutoIndexingService interface { diff --git a/internal/codeintel/codenav/transport/graphql/mocks_test.go b/internal/codeintel/codenav/transport/graphql/mocks_test.go index 056fd9f3291..c1910827cbd 100644 --- a/internal/codeintel/codenav/transport/graphql/mocks_test.go +++ b/internal/codeintel/codenav/transport/graphql/mocks_test.go @@ -10,6 +10,7 @@ import ( "context" "sync" + scip "github.com/sourcegraph/scip/bindings/go/scip" codenav "github.com/sourcegraph/sourcegraph/internal/codeintel/codenav" shared1 "github.com/sourcegraph/sourcegraph/internal/codeintel/codenav/shared" shared "github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/shared" @@ -206,6 +207,9 @@ type MockCodeNavService struct { // GetStencilFunc is an instance of a mock function object controlling // the behavior of the method GetStencil. GetStencilFunc *CodeNavServiceGetStencilFunc + // SCIPDocumentFunc is an instance of a mock function object controlling + // the behavior of the method SCIPDocument. + SCIPDocumentFunc *CodeNavServiceSCIPDocumentFunc // SnapshotForDocumentFunc is an instance of a mock function object // controlling the behavior of the method SnapshotForDocument. SnapshotForDocumentFunc *CodeNavServiceSnapshotForDocumentFunc @@ -263,6 +267,11 @@ func NewMockCodeNavService() *MockCodeNavService { return }, }, + SCIPDocumentFunc: &CodeNavServiceSCIPDocumentFunc{ + defaultHook: func(context.Context, int, string) (r0 *scip.Document, r1 error) { + return + }, + }, SnapshotForDocumentFunc: &CodeNavServiceSnapshotForDocumentFunc{ defaultHook: func(context.Context, int, string, string, int) (r0 []shared1.SnapshotData, r1 error) { return @@ -325,6 +334,11 @@ func NewStrictMockCodeNavService() *MockCodeNavService { panic("unexpected invocation of MockCodeNavService.GetStencil") }, }, + SCIPDocumentFunc: &CodeNavServiceSCIPDocumentFunc{ + defaultHook: func(context.Context, int, string) (*scip.Document, error) { + panic("unexpected invocation of MockCodeNavService.SCIPDocument") + }, + }, SnapshotForDocumentFunc: &CodeNavServiceSnapshotForDocumentFunc{ defaultHook: func(context.Context, int, string, string, int) ([]shared1.SnapshotData, error) { panic("unexpected invocation of MockCodeNavService.SnapshotForDocument") @@ -370,6 +384,9 @@ func NewMockCodeNavServiceFrom(i CodeNavService) *MockCodeNavService { GetStencilFunc: &CodeNavServiceGetStencilFunc{ defaultHook: i.GetStencil, }, + SCIPDocumentFunc: &CodeNavServiceSCIPDocumentFunc{ + defaultHook: i.SCIPDocument, + }, SnapshotForDocumentFunc: &CodeNavServiceSnapshotForDocumentFunc{ defaultHook: i.SnapshotForDocument, }, @@ -1419,6 +1436,117 @@ func (c CodeNavServiceGetStencilFuncCall) Results() []interface{} { return []interface{}{c.Result0, c.Result1} } +// CodeNavServiceSCIPDocumentFunc describes the behavior when the +// SCIPDocument method of the parent MockCodeNavService instance is invoked. +type CodeNavServiceSCIPDocumentFunc struct { + defaultHook func(context.Context, int, string) (*scip.Document, error) + hooks []func(context.Context, int, string) (*scip.Document, error) + history []CodeNavServiceSCIPDocumentFuncCall + mutex sync.Mutex +} + +// SCIPDocument delegates to the next hook function in the queue and stores +// the parameter and result values of this invocation. +func (m *MockCodeNavService) SCIPDocument(v0 context.Context, v1 int, v2 string) (*scip.Document, error) { + r0, r1 := m.SCIPDocumentFunc.nextHook()(v0, v1, v2) + m.SCIPDocumentFunc.appendCall(CodeNavServiceSCIPDocumentFuncCall{v0, v1, v2, r0, r1}) + return r0, r1 +} + +// SetDefaultHook sets function that is called when the SCIPDocument method +// of the parent MockCodeNavService instance is invoked and the hook queue +// is empty. +func (f *CodeNavServiceSCIPDocumentFunc) SetDefaultHook(hook func(context.Context, int, string) (*scip.Document, error)) { + f.defaultHook = hook +} + +// PushHook adds a function to the end of hook queue. Each invocation of the +// SCIPDocument method of the parent MockCodeNavService instance invokes the +// hook at the front of the queue and discards it. After the queue is empty, +// the default hook function is invoked for any future action. +func (f *CodeNavServiceSCIPDocumentFunc) PushHook(hook func(context.Context, int, string) (*scip.Document, error)) { + f.mutex.Lock() + f.hooks = append(f.hooks, hook) + f.mutex.Unlock() +} + +// SetDefaultReturn calls SetDefaultHook with a function that returns the +// given values. +func (f *CodeNavServiceSCIPDocumentFunc) SetDefaultReturn(r0 *scip.Document, r1 error) { + f.SetDefaultHook(func(context.Context, int, string) (*scip.Document, error) { + return r0, r1 + }) +} + +// PushReturn calls PushHook with a function that returns the given values. +func (f *CodeNavServiceSCIPDocumentFunc) PushReturn(r0 *scip.Document, r1 error) { + f.PushHook(func(context.Context, int, string) (*scip.Document, error) { + return r0, r1 + }) +} + +func (f *CodeNavServiceSCIPDocumentFunc) nextHook() func(context.Context, int, string) (*scip.Document, error) { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(f.hooks) == 0 { + return f.defaultHook + } + + hook := f.hooks[0] + f.hooks = f.hooks[1:] + return hook +} + +func (f *CodeNavServiceSCIPDocumentFunc) appendCall(r0 CodeNavServiceSCIPDocumentFuncCall) { + f.mutex.Lock() + f.history = append(f.history, r0) + f.mutex.Unlock() +} + +// History returns a sequence of CodeNavServiceSCIPDocumentFuncCall objects +// describing the invocations of this function. +func (f *CodeNavServiceSCIPDocumentFunc) History() []CodeNavServiceSCIPDocumentFuncCall { + f.mutex.Lock() + history := make([]CodeNavServiceSCIPDocumentFuncCall, len(f.history)) + copy(history, f.history) + f.mutex.Unlock() + + return history +} + +// CodeNavServiceSCIPDocumentFuncCall is an object that describes an +// invocation of method SCIPDocument on an instance of MockCodeNavService. +type CodeNavServiceSCIPDocumentFuncCall struct { + // Arg0 is the value of the 1st argument passed to this method + // invocation. + Arg0 context.Context + // Arg1 is the value of the 2nd argument passed to this method + // invocation. + Arg1 int + // Arg2 is the value of the 3rd argument passed to this method + // invocation. + Arg2 string + // Result0 is the value of the 1st result returned from this method + // invocation. + Result0 *scip.Document + // Result1 is the value of the 2nd result returned from this method + // invocation. + Result1 error +} + +// Args returns an interface slice containing the arguments of this +// invocation. +func (c CodeNavServiceSCIPDocumentFuncCall) Args() []interface{} { + return []interface{}{c.Arg0, c.Arg1, c.Arg2} +} + +// Results returns an interface slice containing the results of this +// invocation. +func (c CodeNavServiceSCIPDocumentFuncCall) Results() []interface{} { + return []interface{}{c.Result0, c.Result1} +} + // CodeNavServiceSnapshotForDocumentFunc describes the behavior when the // SnapshotForDocument method of the parent MockCodeNavService instance is // invoked. diff --git a/internal/codeintel/codenav/transport/graphql/observability.go b/internal/codeintel/codenav/transport/graphql/observability.go index 80800e3320f..3e9971cd0b0 100644 --- a/internal/codeintel/codenav/transport/graphql/observability.go +++ b/internal/codeintel/codenav/transport/graphql/observability.go @@ -15,6 +15,8 @@ import ( type operations struct { gitBlobLsifData *observation.Operation + codeGraphData *observation.Operation + occurrences *observation.Operation hover *observation.Operation definitions *observation.Operation references *observation.Operation @@ -45,6 +47,8 @@ func newOperations(observationCtx *observation.Context) *operations { return &operations{ gitBlobLsifData: op("GitBlobLsifData"), + codeGraphData: op("CodeGraphData"), + occurrences: op("Occurrences"), hover: op("Hover"), definitions: op("Definitions"), references: op("References"), diff --git a/internal/codeintel/codenav/transport/graphql/root_resolver.go b/internal/codeintel/codenav/transport/graphql/root_resolver.go index 4d8f360d471..1580befc293 100644 --- a/internal/codeintel/codenav/transport/graphql/root_resolver.go +++ b/internal/codeintel/codenav/transport/graphql/root_resolver.go @@ -2,15 +2,21 @@ package graphql import ( "context" + "fmt" "strings" + "sync" + orderedmap "github.com/wk8/go-ordered-map/v2" "go.opentelemetry.io/otel/attribute" + "github.com/sourcegraph/scip/bindings/go/scip" + "github.com/sourcegraph/sourcegraph/internal/authz" "github.com/sourcegraph/sourcegraph/internal/codeintel/codenav" resolverstubs "github.com/sourcegraph/sourcegraph/internal/codeintel/resolvers" sharedresolvers "github.com/sourcegraph/sourcegraph/internal/codeintel/shared/resolvers" "github.com/sourcegraph/sourcegraph/internal/codeintel/shared/resolvers/gitresolvers" + "github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/shared" uploadsgraphql "github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/transport/graphql" "github.com/sourcegraph/sourcegraph/internal/database" "github.com/sourcegraph/sourcegraph/internal/dotcom" @@ -111,6 +117,79 @@ func (r *rootResolver) GitBlobLSIFData(ctx context.Context, args *resolverstubs. ), nil } +func (r *rootResolver) CodeGraphData(ctx context.Context, opts *resolverstubs.CodeGraphDataOpts) (_ *[]resolverstubs.CodeGraphDataResolver, err error) { + ctx, _, endObservation := r.operations.codeGraphData.WithErrors(ctx, &err, observation.Args{Attrs: opts.Attrs()}) + endObservation.OnCancel(ctx, 1, observation.Args{}) + + makeResolvers := func(prov resolverstubs.CodeGraphDataProvenance) ([]resolverstubs.CodeGraphDataResolver, error) { + indexer := "" + if prov == resolverstubs.ProvenanceSyntactic { + indexer = shared.SyntacticIndexer + } + uploads, err := r.svc.GetClosestCompletedUploadsForBlob(ctx, shared.UploadMatchingOptions{ + RepositoryID: int(opts.Repo.ID), + Commit: string(opts.Commit), + Path: opts.Path, + RootToPathMatching: shared.RootMustEnclosePath, + Indexer: indexer, + }) + if err != nil || len(uploads) == 0 { + return nil, err + } + resolvers := []resolverstubs.CodeGraphDataResolver{} + for _, upload := range preferUploadsWithLongestRoots(uploads) { + resolvers = append(resolvers, newCodeGraphDataResolver(r.svc, upload, opts, prov, r.operations)) + } + return resolvers, nil + } + + provs := opts.Args.ProvenancesForSCIPData() + if provs.Precise { + preciseResolvers, err := makeResolvers(resolverstubs.ProvenancePrecise) + if len(preciseResolvers) != 0 || err != nil { + return &preciseResolvers, err + } + } + + if provs.Syntactic { + syntacticResolvers, err := makeResolvers(resolverstubs.ProvenanceSyntactic) + if len(syntacticResolvers) != 0 || err != nil { + return &syntacticResolvers, err + } + + // Enhancement idea: if a syntactic SCIP index is unavailable, + // but the language is supported by scip-syntax, we could generate + // a syntactic SCIP index on-the-fly by having the syntax-highlighter + // analyze the file. + } + + // We do not currently have any way of generating SCIP data + // during purely textual means. + + return &[]resolverstubs.CodeGraphDataResolver{}, nil +} + +func preferUploadsWithLongestRoots(uploads []shared.CompletedUpload) []shared.CompletedUpload { + // Use orderedmap instead of a map to preserve the order of the uploads + // and to avoid introducing non-determinism. + sortedMap := orderedmap.New[string, shared.CompletedUpload]() + for _, upload := range uploads { + key := fmt.Sprintf("%s:%s", upload.Indexer, upload.Commit) + if val, found := sortedMap.Get(key); found { + if len(val.Root) < len(upload.Root) { + sortedMap.Set(key, upload) + } + } else { + sortedMap.Set(key, upload) + } + } + out := make([]shared.CompletedUpload, 0, sortedMap.Len()) + for pair := sortedMap.Oldest(); pair != nil; pair = pair.Next() { + out = append(out, pair.Value) + } + return out +} + // gitBlobLSIFDataResolver is the main interface to bundle-related operations exposed to the GraphQL API. This // resolver concerns itself with GraphQL/API-specific behaviors (auth, validation, marshaling, etc.). // All code intel-specific behavior is delegated to the underlying resolver instance, which is defined @@ -189,3 +268,60 @@ func (r *gitBlobLSIFDataResolver) VisibleIndexes(ctx context.Context) (_ *[]reso return &resolvers, nil } + +type codeGraphDataResolver struct { + // Retrieved data/state + retrievedDocument sync.Once + document *scip.Document + documentRetrievalError error + + // Arguments + svc CodeNavService + upload shared.CompletedUpload + opts *resolverstubs.CodeGraphDataOpts + provenance resolverstubs.CodeGraphDataProvenance + + // O11y + operations *operations +} + +func newCodeGraphDataResolver( + svc CodeNavService, + upload shared.CompletedUpload, + opts *resolverstubs.CodeGraphDataOpts, + provenance resolverstubs.CodeGraphDataProvenance, + operations *operations, +) resolverstubs.CodeGraphDataResolver { + return &codeGraphDataResolver{ + sync.Once{}, + /*document*/ nil, + /*documentRetrievalError*/ nil, + svc, + upload, + opts, + provenance, + operations, + } +} + +func (c *codeGraphDataResolver) tryRetrieveDocument(ctx context.Context) (*scip.Document, error) { + // NOTE(id: scip-doc-optimization): In the case of pagination, if we retrieve the document ID + // from the database, we can avoid performing a JOIN between codeintel_scip_document_lookup + // and codeintel_scip_documents + c.retrievedDocument.Do(func() { + c.document, c.documentRetrievalError = c.svc.SCIPDocument(ctx, c.upload.ID, c.opts.Path) + }) + return c.document, c.documentRetrievalError +} + +func (c *codeGraphDataResolver) Provenance(_ context.Context) (resolverstubs.CodeGraphDataProvenance, error) { + return c.provenance, nil +} + +func (c *codeGraphDataResolver) Commit(_ context.Context) (string, error) { + return c.upload.Commit, nil +} + +func (c *codeGraphDataResolver) ToolInfo(_ context.Context) (*resolverstubs.CodeGraphToolInfo, error) { + return &resolverstubs.CodeGraphToolInfo{Name_: &c.upload.Indexer, Version_: &c.upload.IndexerVersion}, nil +} diff --git a/internal/codeintel/codenav/transport/graphql/root_resolver_code_graph.go b/internal/codeintel/codenav/transport/graphql/root_resolver_code_graph.go new file mode 100644 index 00000000000..76b1125b195 --- /dev/null +++ b/internal/codeintel/codenav/transport/graphql/root_resolver_code_graph.go @@ -0,0 +1,158 @@ +package graphql + +import ( + "context" + "encoding/base64" + "encoding/json" + "github.com/sourcegraph/go-lsp" + "github.com/sourcegraph/scip/bindings/go/scip" + + "github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/graphqlutil" + resolverstubs "github.com/sourcegraph/sourcegraph/internal/codeintel/resolvers" + "github.com/sourcegraph/sourcegraph/internal/database" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/lib/pointers" +) + +func (c *codeGraphDataResolver) Occurrences(ctx context.Context, args *resolverstubs.OccurrencesArgs) (_ resolverstubs.SCIPOccurrenceConnectionResolver, err error) { + _, _, endObservation := c.operations.occurrences.WithErrors(ctx, &err, observation.Args{Attrs: c.opts.Attrs()}) + defer endObservation(1, observation.Args{}) + + const maxPageSize = 100000 + args.Normalize(maxPageSize) + + impl, err := graphqlutil.NewConnectionResolver[resolverstubs.SCIPOccurrenceResolver]( + &occurrenceConnectionStore{c}, + &graphqlutil.ConnectionResolverArgs{First: args.First, After: args.After}, + &graphqlutil.ConnectionResolverOptions{MaxPageSize: maxPageSize, Reverse: pointers.Ptr(false)}) + if err != nil { + return nil, err + } + return &occurrenceConnectionResolver{impl, c, args}, nil +} + +type occurrenceConnectionResolver struct { + impl *graphqlutil.ConnectionResolver[resolverstubs.SCIPOccurrenceResolver] + + // Arguments + graphData *codeGraphDataResolver + args *resolverstubs.OccurrencesArgs +} + +var _ resolverstubs.SCIPOccurrenceConnectionResolver = &occurrenceConnectionResolver{} + +func (o *occurrenceConnectionResolver) Nodes(ctx context.Context) ([]resolverstubs.SCIPOccurrenceResolver, error) { + return o.impl.Nodes(ctx) +} + +func (o *occurrenceConnectionResolver) PageInfo(ctx context.Context) (*graphqlutil.ConnectionPageInfo[resolverstubs.SCIPOccurrenceResolver], error) { + return o.impl.PageInfo(ctx) +} + +var _ graphqlutil.ConnectionResolverStore[resolverstubs.SCIPOccurrenceResolver] = &occurrenceConnectionStore{} + +type scipOccurrence struct { + impl *scip.Occurrence + + // For cursor state, because a single value is passed to MarshalCursor + cursor +} + +func (s scipOccurrence) Roles() (*[]resolverstubs.SymbolRole, error) { + roles := s.impl.GetSymbolRoles() + out := []resolverstubs.SymbolRole{} + if roles&int32(scip.SymbolRole_Definition) != 0 { + out = append(out, resolverstubs.SymbolRoleDefinition) + } else { + out = append(out, resolverstubs.SymbolRoleReference) + } + if roles&int32(scip.SymbolRole_ForwardDefinition) != 0 { + out = append(out, resolverstubs.SymbolRoleForwardDefinition) + } + return &out, nil +} + +var _ resolverstubs.SCIPOccurrenceResolver = scipOccurrence{} + +type occurrenceConnectionStore struct { + graphData *codeGraphDataResolver +} + +var _ graphqlutil.ConnectionResolverStore[resolverstubs.SCIPOccurrenceResolver] = &occurrenceConnectionStore{} + +func (o *occurrenceConnectionStore) ComputeTotal(ctx context.Context) (int32, error) { + doc, err := o.graphData.tryRetrieveDocument(ctx) + if doc == nil || err != nil { + return 0, err + } + return int32(len(doc.Occurrences)), nil +} + +func (o *occurrenceConnectionStore) ComputeNodes(ctx context.Context, paginationArgs *database.PaginationArgs) ([]resolverstubs.SCIPOccurrenceResolver, error) { + doc, err := o.graphData.tryRetrieveDocument(ctx) + if err != nil { + return nil, err + } + if paginationArgs != nil { + // Strictly speaking, 'After' is expected to have length 0 or 1, + // but handling the general case to avoid panicking or returning an + // error in the multiple element case. + for i := range paginationArgs.After { + if c, ok := paginationArgs.After[i].(cursor); ok { + paginationArgs.After[i] = c.Index + } + } + } + occs, _, err2 := database.OffsetBasedCursorSlice(doc.Occurrences, paginationArgs) + if err2 != nil { + return nil, err2 + } + + out := make([]resolverstubs.SCIPOccurrenceResolver, 0, len(occs)) + for idx, occ := range occs { + out = append(out, scipOccurrence{occ, cursor{idx}}) + } + return out, nil +} + +func (o *occurrenceConnectionStore) MarshalCursor(n resolverstubs.SCIPOccurrenceResolver, _ database.OrderBy) (*string, error) { + return marshalCursor(n.(scipOccurrence).cursor) +} + +func marshalCursor(c cursor) (*string, error) { + buf, err := json.Marshal(c) + if err != nil { + return nil, err + } + return pointers.Ptr(base64.StdEncoding.EncodeToString(buf)), nil +} + +func (o *occurrenceConnectionStore) UnmarshalCursor(s string, _ database.OrderBy) ([]any, error) { + buf, err := base64.StdEncoding.DecodeString(s) + if err != nil { + return nil, err + } + var c cursor + if err = json.Unmarshal(buf, &c); err != nil { + return nil, err + } + return []any{c}, nil +} + +type cursor struct { + // Index inside occurrences array in document + Index int +} + +func (s scipOccurrence) Symbol() (*string, error) { + return pointers.Ptr(s.impl.Symbol), nil +} + +func (s scipOccurrence) Range() (resolverstubs.RangeResolver, error) { + // FIXME(issue: GRAPH-571): Below code is correct iff the indexer uses UTF-16 offsets + r := scip.NewRange(s.impl.Range) + return newRangeResolver(lsp.Range{ + Start: lsp.Position{Line: int(r.Start.Line), Character: int(r.Start.Character)}, + End: lsp.Position{Line: int(r.End.Line), Character: int(r.End.Character)}, + }), nil +} diff --git a/internal/codeintel/codenav/transport/graphql/root_resolver_test.go b/internal/codeintel/codenav/transport/graphql/root_resolver_test.go index d34ea2ee3ab..3bd394d9019 100644 --- a/internal/codeintel/codenav/transport/graphql/root_resolver_test.go +++ b/internal/codeintel/codenav/transport/graphql/root_resolver_test.go @@ -4,9 +4,15 @@ import ( "context" "encoding/base64" "fmt" + "strings" "testing" mockrequire "github.com/derision-test/go-mockgen/v2/testutil/require" + "github.com/hexops/autogold/v2" + "github.com/stretchr/testify/require" + + "github.com/sourcegraph/scip/bindings/go/scip" + "github.com/sourcegraph/scip/cmd/scip/tests/reprolang/bindings/go/repro" "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/codeintel/codenav" @@ -19,6 +25,8 @@ import ( "github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain" "github.com/sourcegraph/sourcegraph/internal/observation" sgtypes "github.com/sourcegraph/sourcegraph/internal/types" + "github.com/sourcegraph/sourcegraph/lib/errors" + "github.com/sourcegraph/sourcegraph/lib/pointers" ) func TestRanges(t *testing.T) { @@ -398,3 +406,196 @@ func TestResolveLocations(t *testing.T) { t.Errorf("unexpected canonical url. want=%s have=%s", "/repo53@deadbeef4/-/blob/p4?L42:43-44:45", url) } } + +func sampleSourceFiles() []*scip.SourceFile { + testFiles := []struct { + path string + content string + }{ + { + path: "locals.repro", + content: `definition local_a +reference local_a +`, + }, + } + out := []*scip.SourceFile{} + for _, testFile := range testFiles { + out = append(out, &scip.SourceFile{ + AbsolutePath: "/var/myproject/" + testFile.path, + RelativePath: testFile.path, + Text: testFile.content, + Lines: strings.Split(testFile.content, "\n"), + }) + } + return out +} + +func unwrap[T any](v T, err error) func(*testing.T) T { + return func(t *testing.T) T { + require.NoError(t, err) + return v + } +} + +func makeTestResolver(t *testing.T) resolverstubs.CodeGraphDataResolver { + codeNavSvc := NewStrictMockCodeNavService() + index := unwrap(repro.Index("", "testpkg", sampleSourceFiles(), nil))(t) + errUploadNotFound := errors.New("upload not found") + errDocumentNotFound := errors.New("document not found") + testUpload := uploadsshared.CompletedUpload{ID: 82} + codeNavSvc.SCIPDocumentFunc.SetDefaultHook(func(_ context.Context, uploadID int, path string) (*scip.Document, error) { + if uploadID != testUpload.ID { + return nil, errUploadNotFound + } + for _, d := range index.Documents { + if path == d.RelativePath { + return d, nil + } + } + return nil, errDocumentNotFound + }) + + return newCodeGraphDataResolver( + codeNavSvc, testUpload, + &resolverstubs.CodeGraphDataOpts{Repo: &sgtypes.Repo{}, Path: "locals.repro"}, + resolverstubs.ProvenancePrecise, newOperations(&observation.TestContext)) +} + +func TestOccurrences_BadArgs(t *testing.T) { + resolver := makeTestResolver(t) + bgCtx := context.Background() + + t.Run("fetching with undeserializable 'after'", func(t *testing.T) { + badArgs := resolverstubs.OccurrencesArgs{After: pointers.Ptr("not-a-cursor")} + badArgs.Normalize(10) + occs := unwrap(resolver.Occurrences(bgCtx, &badArgs))(t) + _, err := occs.Nodes(bgCtx) + require.Error(t, err) + }) + + t.Run("fetching with out-of-bounds 'after'", func(t *testing.T) { + oobCursor := unwrap(marshalCursor(cursor{100}))(t) + badArgs := resolverstubs.OccurrencesArgs{After: oobCursor} + badArgs.Normalize(10) + occs := unwrap(resolver.Occurrences(bgCtx, &badArgs))(t) + nodes, err := occs.Nodes(bgCtx) + // TODO: I think this should be an out-of-bounds error, Slack discussion: + // https://sourcegraph.slack.com/archives/C02UC4WUX1Q/p1716378462737019 + require.NoError(t, err) + require.Equal(t, 0, len(nodes)) + }) +} + +func TestOccurrences_Pages(t *testing.T) { + resolver := makeTestResolver(t) + bgCtx := context.Background() + + type TestCase struct { + name string + initialArgs *resolverstubs.OccurrencesArgs + // Run with go test -update to update the wantPages values + wantPages autogold.Value + } + + type occurrenceNode struct { + Symbol string + Range []int32 + Roles []string + } + + testCases := []TestCase{ + { + name: "Single page", + initialArgs: (&resolverstubs.OccurrencesArgs{}).Normalize(10), + wantPages: autogold.Expect([][]occurrenceNode{{ + { + Symbol: "local _a", + Range: []int32{ + 0, + 11, + 0, + 18, + }, + Roles: []string{"DEFINITION"}, + }, + { + Symbol: "local _a", + Range: []int32{ + 1, + 10, + 1, + 17, + }, + Roles: []string{"REFERENCE"}, + }, + }}), + }, + { + name: "Multiple pages", + initialArgs: (&resolverstubs.OccurrencesArgs{}).Normalize(1), + wantPages: autogold.Expect([][]occurrenceNode{ + { + { + Symbol: "local _a", + Range: []int32{ + 0, + 11, + 0, + 18, + }, + Roles: []string{"DEFINITION"}, + }, + }, + {{ + Symbol: "local _a", + Range: []int32{ + 1, + 10, + 1, + 17, + }, + Roles: []string{"REFERENCE"}, + }}, + }), + }, + } + + for _, testCase := range testCases { + t.Run(testCase.name, func(t *testing.T) { + allOccurrences := [][]occurrenceNode{} + args := testCase.initialArgs + const maxIters = 10 + i := 0 + for ; i < maxIters; i++ { + connx := unwrap(resolver.Occurrences(bgCtx, args))(t) + occs := unwrap(connx.Nodes(bgCtx))(t) + var nodes []occurrenceNode + for _, occ := range occs { + s := unwrap(occ.Symbol())(t) + r := unwrap(occ.Range())(t) + roles := unwrap(occ.Roles())(t) + var rolesStrs []string + for _, role := range *roles { + rolesStrs = append(rolesStrs, string(role)) + } + nodes = append(nodes, occurrenceNode{ + Symbol: *s, + Range: []int32{r.Start().Line(), r.Start().Character(), r.End().Line(), r.End().Character()}, + Roles: rolesStrs, + }) + } + allOccurrences = append(allOccurrences, nodes) + pages := unwrap(connx.PageInfo(bgCtx))(t) + if pages.HasNextPage() { + endCursor := unwrap(pages.EndCursor())(t) + args.After = endCursor + } else { + break + } + } + require.Less(t, i, maxIters) + testCase.wantPages.Equal(t, allOccurrences) + }) + } +} diff --git a/internal/codeintel/resolvers/BUILD.bazel b/internal/codeintel/resolvers/BUILD.bazel index 5c7616f758e..46c96568c9e 100644 --- a/internal/codeintel/resolvers/BUILD.bazel +++ b/internal/codeintel/resolvers/BUILD.bazel @@ -16,6 +16,7 @@ go_library( tags = [TAG_PLATFORM_GRAPH], visibility = ["//:__subpackages__"], deps = [ + "//cmd/frontend/graphqlbackend/graphqlutil", "//internal/api", "//internal/codeintel/uploads/shared", "//internal/gitserver/gitdomain", @@ -26,5 +27,6 @@ go_library( "//lib/pointers", "@com_github_graph_gophers_graphql_go//:graphql-go", "@com_github_graph_gophers_graphql_go//relay", + "@io_opentelemetry_go_otel//attribute", ], ) diff --git a/internal/codeintel/resolvers/codenav.go b/internal/codeintel/resolvers/codenav.go index 0e33e8a08df..94588bbcfca 100644 --- a/internal/codeintel/resolvers/codenav.go +++ b/internal/codeintel/resolvers/codenav.go @@ -2,9 +2,12 @@ package resolvers import ( "context" + "fmt" "github.com/graph-gophers/graphql-go" + "go.opentelemetry.io/otel/attribute" + "github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/graphqlutil" "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/codeintel/uploads/shared" "github.com/sourcegraph/sourcegraph/internal/markdown" @@ -13,6 +16,11 @@ import ( type CodeNavServiceResolver interface { GitBlobLSIFData(ctx context.Context, args *GitBlobLSIFDataArgs) (GitBlobLSIFDataResolver, error) + // CodeGraphData is a newer API that is more SCIP-oriented. + // The second parameter is called 'opts' and not 'args' to reflect + // that it is not what is exactly provided as input from the GraphQL + // client. + CodeGraphData(ctx context.Context, opts *CodeGraphDataOpts) (*[]CodeGraphDataResolver, error) } type GitBlobLSIFDataArgs struct { @@ -140,3 +148,132 @@ type DiagnosticResolver interface { Message() (*string, error) Location(ctx context.Context) (LocationResolver, error) } + +type CodeGraphDataResolver interface { + Provenance(ctx context.Context) (CodeGraphDataProvenance, error) + Commit(ctx context.Context) (string, error) + ToolInfo(ctx context.Context) (*CodeGraphToolInfo, error) + // Pre-condition: args are Normalized. + Occurrences(ctx context.Context, args *OccurrencesArgs) (SCIPOccurrenceConnectionResolver, error) +} + +type CodeGraphDataProvenance string + +const ( + ProvenancePrecise CodeGraphDataProvenance = "PRECISE" + ProvenanceSyntactic CodeGraphDataProvenance = "SYNTACTIC" + ProvenanceSearchBased CodeGraphDataProvenance = "SEARCH_BASED" +) + +type CodeGraphDataProvenanceComparator struct { + Equals *CodeGraphDataProvenance +} + +type CodeGraphDataFilter struct { + Provenance *CodeGraphDataProvenanceComparator +} + +// String is meant as a debugging-only representation without round-trippability +func (f *CodeGraphDataFilter) String() string { + if f != nil && f.Provenance != nil && f.Provenance.Equals != nil { + return fmt.Sprintf("provenance == %s", string(*f.Provenance.Equals)) + } + return "" +} + +type CodeGraphDataArgs struct { + Filter *CodeGraphDataFilter +} + +func (args *CodeGraphDataArgs) Attrs() []attribute.KeyValue { + if args == nil { + return nil + } + return []attribute.KeyValue{attribute.String("args.filter", args.Filter.String())} +} + +type ForEachProvenance[T any] struct { + SearchBased T + Syntactic T + Precise T +} + +func (a *CodeGraphDataArgs) ProvenancesForSCIPData() ForEachProvenance[bool] { + var out ForEachProvenance[bool] + if a == nil || a.Filter == nil || a.Filter.Provenance == nil || a.Filter.Provenance.Equals == nil { + out.Syntactic = true + out.Precise = true + } else { + p := *a.Filter.Provenance.Equals + switch p { + case ProvenancePrecise: + out.Precise = true + case ProvenanceSyntactic: + out.Syntactic = true + case ProvenanceSearchBased: + } + } + return out +} + +type CodeGraphDataOpts struct { + Args *CodeGraphDataArgs + Repo *types.Repo + Commit api.CommitID + Path string +} + +func (opts *CodeGraphDataOpts) Attrs() []attribute.KeyValue { + return append([]attribute.KeyValue{attribute.String("repo", opts.Repo.String()), + opts.Commit.Attr(), + attribute.String("path", opts.Path)}, opts.Args.Attrs()...) +} + +type CodeGraphToolInfo struct { + Name_ *string + Version_ *string +} + +func (ti *CodeGraphToolInfo) Name() *string { + return ti.Name_ +} + +func (ti *CodeGraphToolInfo) Version() *string { + return ti.Version_ +} + +type OccurrencesArgs struct { + First *int32 + After *string +} + +// Normalize returns args for convenience of chaining +func (args *OccurrencesArgs) Normalize(maxPageSize int32) *OccurrencesArgs { + if args == nil { + *args = OccurrencesArgs{} + } + if args.First == nil || *args.First > maxPageSize { + args.First = &maxPageSize + } + return args +} + +type SCIPOccurrenceConnectionResolver interface { + ConnectionResolver[SCIPOccurrenceResolver] + PageInfo(ctx context.Context) (*graphqlutil.ConnectionPageInfo[SCIPOccurrenceResolver], error) +} + +type SCIPOccurrenceResolver interface { + Symbol() (*string, error) + Range() (RangeResolver, error) + Roles() (*[]SymbolRole, error) +} + +type SymbolRole string + +// ⚠️ CAUTION: These constants are part of the public GraphQL API +const ( + SymbolRoleDefinition SymbolRole = "DEFINITION" + SymbolRoleReference SymbolRole = "REFERENCE" + SymbolRoleForwardDefinition SymbolRole = "FORWARD_DEFINITION" +) diff --git a/internal/codeintel/resolvers/root_resolver.go b/internal/codeintel/resolvers/root_resolver.go index a4d1a037531..f1255efca8e 100644 --- a/internal/codeintel/resolvers/root_resolver.go +++ b/internal/codeintel/resolvers/root_resolver.go @@ -125,6 +125,10 @@ func (r *Resolver) GitBlobLSIFData(ctx context.Context, args *GitBlobLSIFDataArg return r.codenavResolver.GitBlobLSIFData(ctx, args) } +func (r *Resolver) CodeGraphData(ctx context.Context, opts *CodeGraphDataOpts) (*[]CodeGraphDataResolver, error) { + return r.codenavResolver.CodeGraphData(ctx, opts) +} + func (r *Resolver) ConfigurationPolicyByID(ctx context.Context, id graphql.ID) (_ CodeIntelligenceConfigurationPolicyResolver, err error) { return r.policiesRootResolver.ConfigurationPolicyByID(ctx, id) } diff --git a/internal/codeintel/uploads/shared/types.go b/internal/codeintel/uploads/shared/types.go index dc63dcce8d4..6343d3b5a7e 100644 --- a/internal/codeintel/uploads/shared/types.go +++ b/internal/codeintel/uploads/shared/types.go @@ -358,7 +358,7 @@ type UploadMatchingOptions struct { // // Indexer must be shared.SyntacticIndexer for syntactic indexes to be considered. // - // If Indexer is empty, then all uploads will be considered. + // If Indexer is empty, then all precise indexes will be considered. Indexer string } diff --git a/internal/database/helpers.go b/internal/database/helpers.go index 7665b98b1ee..7184fb481ff 100644 --- a/internal/database/helpers.go +++ b/internal/database/helpers.go @@ -198,6 +198,7 @@ func (p *PaginationArgs) SQL() *QueryArgs { return queryArgs } +// Pre-condition: values in args.After and args.Before should have type 'int'. func OffsetBasedCursorSlice[T any](nodes []T, args *PaginationArgs) ([]T, int, error) { start := 0 end := 0 diff --git a/third_party/com_github_sourcegraph_scip/BUILD.bazel b/third_party/com_github_sourcegraph_scip/BUILD.bazel new file mode 100644 index 00000000000..d518110449e --- /dev/null +++ b/third_party/com_github_sourcegraph_scip/BUILD.bazel @@ -0,0 +1 @@ +exports_files(glob(["*.patch"])) diff --git a/third_party/com_github_sourcegraph_scip/add_parser_h_to_srcs.patch b/third_party/com_github_sourcegraph_scip/add_parser_h_to_srcs.patch new file mode 100644 index 00000000000..12615400651 --- /dev/null +++ b/third_party/com_github_sourcegraph_scip/add_parser_h_to_srcs.patch @@ -0,0 +1,12 @@ +diff --git a/cmd/scip/tests/reprolang/src/BUILD.bazel b/cmd/scip/tests/reprolang/src/BUILD.bazel +index e75c26d..3b53842 100644 +--- a/cmd/scip/tests/reprolang/src/BUILD.bazel ++++ b/cmd/scip/tests/reprolang/src/BUILD.bazel +@@ -4,6 +4,7 @@ go_library( + name = "src", + srcs = [ + "binding.go", ++ "tree_sitter/parser.h", + "parser.c", + "workaround.go", + ],