From e2ff2b1f3fa8e33ae79f8560e63650bffdee0b6f Mon Sep 17 00:00:00 2001 From: Eric Fritz Date: Tue, 30 Nov 2021 11:56:45 -0600 Subject: [PATCH] symbols: Bring this baby into 2021 (#27986) Co-authored-by: Noah Santschi-Cooney --- cmd/symbols/config.go | 38 + cmd/symbols/internal/api/gen.go | 3 + cmd/symbols/internal/api/handler.go | 79 ++ cmd/symbols/internal/api/handler_test.go | 154 ++++ cmd/symbols/internal/api/mock_iface_test.go | 304 +++++++ cmd/symbols/internal/api/observability.go | 34 + cmd/symbols/internal/api/search.go | 52 ++ cmd/symbols/internal/database/init.go | 17 + .../database/janitor/cache_evicter.go | 57 ++ .../internal/database/janitor/metrics.go | 42 + cmd/symbols/internal/database/sanity_check.go | 20 + cmd/symbols/internal/database/store/meta.go | 30 + cmd/symbols/internal/database/store/search.go | 143 ++++ .../internal/database/store/search_test.go | 41 + cmd/symbols/internal/database/store/store.go | 87 ++ .../internal/database/store/symbols.go | 107 +++ cmd/symbols/internal/database/writer/cache.go | 53 ++ cmd/symbols/internal/database/writer/fs.go | 59 ++ .../internal/database/writer/writer.go | 156 ++++ cmd/symbols/internal/fetcher/gen.go | 3 + .../internal/fetcher/mock_iface_test.go | 304 +++++++ cmd/symbols/internal/fetcher/observability.go | 53 ++ .../internal/fetcher/repository_fetcher.go | 196 +++++ .../fetcher/repository_fetcher_test.go | 82 ++ cmd/symbols/internal/gitserver/client.go | 104 +++ cmd/symbols/internal/gitserver/client_test.go | 65 ++ .../internal/gitserver/observability.go | 35 + cmd/symbols/internal/gitserver/tar.go | 49 ++ cmd/symbols/internal/parser/observability.go | 74 ++ cmd/symbols/internal/parser/parser.go | 205 +++++ cmd/symbols/internal/parser/parser_factory.go | 5 + .../internal/parser/parser_factory_ctags.go | 23 + .../parser_factory_ctags_test.go} | 8 +- cmd/symbols/internal/parser/parser_pool.go | 57 ++ cmd/symbols/internal/symbols/ctags.go | 48 -- cmd/symbols/internal/symbols/fetch.go | 151 ---- cmd/symbols/internal/symbols/parse.go | 212 ----- cmd/symbols/internal/symbols/search.go | 599 ------------- cmd/symbols/internal/symbols/search_test.go | 82 -- cmd/symbols/internal/symbols/service.go | 131 --- cmd/symbols/internal/symbols/service_test.go | 184 ---- .../symbols.go => types/search_args.go} | 2 +- cmd/symbols/main.go | 185 ++-- dev/check/go-dbconn-import.sh | 2 + doc/admin/observability/alert_solutions.md | 46 - doc/admin/observability/dashboards.md | 797 +++++++++++++++++- .../batches/store/batch_spec_workspaces.go | 1 + .../internal/batches/store/changeset_jobs.go | 1 + .../internal/batches/store/changeset_specs.go | 1 + .../batch_spec_workspace_execution_jobs.go | 1 + .../codeintel/stores/dbstore/commits.go | 3 + .../codeintel/stores/dbstore/packages.go | 1 + .../codeintel/stores/dbstore/references.go | 1 + .../codeintel/stores/lsifstore/data_write.go | 2 +- .../lsifstore/data_write_documentation.go | 2 + .../stores/lsifstore/migration/migrator.go | 1 + internal/database/batch/batch.go | 31 +- internal/database/batch/batch_test.go | 10 +- internal/database/event_logs.go | 1 + monitoring/definitions/shared/codeintel.go | 218 +++++ monitoring/definitions/symbols.go | 30 +- sg.config.yaml | 3 + 62 files changed, 3852 insertions(+), 1633 deletions(-) create mode 100644 cmd/symbols/config.go create mode 100644 cmd/symbols/internal/api/gen.go create mode 100644 cmd/symbols/internal/api/handler.go create mode 100644 cmd/symbols/internal/api/handler_test.go create mode 100644 cmd/symbols/internal/api/mock_iface_test.go create mode 100644 cmd/symbols/internal/api/observability.go create mode 100644 cmd/symbols/internal/api/search.go create mode 100644 cmd/symbols/internal/database/init.go create mode 100644 cmd/symbols/internal/database/janitor/cache_evicter.go create mode 100644 cmd/symbols/internal/database/janitor/metrics.go create mode 100644 cmd/symbols/internal/database/sanity_check.go create mode 100644 cmd/symbols/internal/database/store/meta.go create mode 100644 cmd/symbols/internal/database/store/search.go create mode 100644 cmd/symbols/internal/database/store/search_test.go create mode 100644 cmd/symbols/internal/database/store/store.go create mode 100644 cmd/symbols/internal/database/store/symbols.go create mode 100644 cmd/symbols/internal/database/writer/cache.go create mode 100644 cmd/symbols/internal/database/writer/fs.go create mode 100644 cmd/symbols/internal/database/writer/writer.go create mode 100644 cmd/symbols/internal/fetcher/gen.go create mode 100644 cmd/symbols/internal/fetcher/mock_iface_test.go create mode 100644 cmd/symbols/internal/fetcher/observability.go create mode 100644 cmd/symbols/internal/fetcher/repository_fetcher.go create mode 100644 cmd/symbols/internal/fetcher/repository_fetcher_test.go create mode 100644 cmd/symbols/internal/gitserver/client.go create mode 100644 cmd/symbols/internal/gitserver/client_test.go create mode 100644 cmd/symbols/internal/gitserver/observability.go create mode 100644 cmd/symbols/internal/gitserver/tar.go create mode 100644 cmd/symbols/internal/parser/observability.go create mode 100644 cmd/symbols/internal/parser/parser.go create mode 100644 cmd/symbols/internal/parser/parser_factory.go create mode 100644 cmd/symbols/internal/parser/parser_factory_ctags.go rename cmd/symbols/internal/{symbols/ctags_test.go => parser/parser_factory_ctags_test.go} (93%) create mode 100644 cmd/symbols/internal/parser/parser_pool.go delete mode 100644 cmd/symbols/internal/symbols/ctags.go delete mode 100644 cmd/symbols/internal/symbols/fetch.go delete mode 100644 cmd/symbols/internal/symbols/parse.go delete mode 100644 cmd/symbols/internal/symbols/search.go delete mode 100644 cmd/symbols/internal/symbols/search_test.go delete mode 100644 cmd/symbols/internal/symbols/service.go delete mode 100644 cmd/symbols/internal/symbols/service_test.go rename cmd/symbols/internal/{protocol/symbols.go => types/search_args.go} (98%) diff --git a/cmd/symbols/config.go b/cmd/symbols/config.go new file mode 100644 index 00000000000..4c192f7bb37 --- /dev/null +++ b/cmd/symbols/config.go @@ -0,0 +1,38 @@ +package main + +import ( + "os" + "runtime" + "strconv" + + "github.com/sourcegraph/sourcegraph/internal/env" +) + +type Config struct { + env.BaseConfig + + ctagsCommand string + ctagsPatternLengthLimit int + ctagsLogErrors bool + ctagsDebugLogs bool + + sanityCheck bool + cacheDir string + cacheSizeMB int + numCtagsProcesses int +} + +var config = &Config{} + +// Load reads from the environment and stores the transformed data on the config object for later retrieval. +func (c *Config) Load() { + c.ctagsCommand = c.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)") + c.ctagsPatternLengthLimit = c.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags") + c.ctagsLogErrors = os.Getenv("DEPLOY_TYPE") == "dev" + c.ctagsDebugLogs = false + + c.sanityCheck = c.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not") + c.cacheDir = c.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols") + c.cacheSizeMB = c.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)") + c.numCtagsProcesses = c.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run") +} diff --git a/cmd/symbols/internal/api/gen.go b/cmd/symbols/internal/api/gen.go new file mode 100644 index 00000000000..54c920d6cc3 --- /dev/null +++ b/cmd/symbols/internal/api/gen.go @@ -0,0 +1,3 @@ +package api + +//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go diff --git a/cmd/symbols/internal/api/handler.go b/cmd/symbols/internal/api/handler.go new file mode 100644 index 00000000000..dafd2bca006 --- /dev/null +++ b/cmd/symbols/internal/api/handler.go @@ -0,0 +1,79 @@ +package api + +import ( + "context" + "encoding/json" + "net/http" + + "github.com/cockroachdb/errors" + "github.com/inconshreveable/log15" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +type apiHandler struct { + cachedDatabaseWriter writer.CachedDatabaseWriter + operations *operations +} + +func NewHandler( + cachedDatabaseWriter writer.CachedDatabaseWriter, + observationContext *observation.Context, +) http.Handler { + h := newAPIHandler(cachedDatabaseWriter, observationContext) + + mux := http.NewServeMux() + mux.HandleFunc("/search", h.handleSearch) + mux.HandleFunc("/healthz", h.handleHealthCheck) + return mux +} + +func newAPIHandler( + cachedDatabaseWriter writer.CachedDatabaseWriter, + observationContext *observation.Context, +) *apiHandler { + return &apiHandler{ + cachedDatabaseWriter: cachedDatabaseWriter, + operations: newOperations(observationContext), + } +} + +const maxNumSymbolResults = 500 + +func (h *apiHandler) handleSearch(w http.ResponseWriter, r *http.Request) { + var args types.SearchArgs + if err := json.NewDecoder(r.Body).Decode(&args); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return + } + + if args.First < 0 || args.First > maxNumSymbolResults { + args.First = maxNumSymbolResults + } + + result, err := h.handleSearchInternal(r.Context(), args) + if err != nil { + // Ignore reporting errors where client disconnected + if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) { + return + } + + log15.Error("Symbol search failed", "args", args, "error", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } +} + +func (h *apiHandler) handleHealthCheck(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + + if _, err := w.Write([]byte("OK")); err != nil { + log15.Error("failed to write response to health check, err: %s", err) + } +} diff --git a/cmd/symbols/internal/api/handler_test.go b/cmd/symbols/internal/api/handler_test.go new file mode 100644 index 00000000000..45cb1a9faa4 --- /dev/null +++ b/cmd/symbols/internal/api/handler_test.go @@ -0,0 +1,154 @@ +package api + +import ( + "context" + "net/http/httptest" + "os" + "reflect" + "testing" + "time" + + "github.com/sourcegraph/go-ctags" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" + "github.com/sourcegraph/sourcegraph/internal/diskcache" + "github.com/sourcegraph/sourcegraph/internal/httpcli" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/search" + "github.com/sourcegraph/sourcegraph/internal/search/result" + symbolsclient "github.com/sourcegraph/sourcegraph/internal/symbols" +) + +func init() { + database.Init() +} + +func TestHandler(t *testing.T) { + tmpDir, err := os.MkdirTemp("", "") + if err != nil { + t.Fatal(err) + } + defer func() { os.RemoveAll(tmpDir) }() + + cache := &diskcache.Store{ + Dir: tmpDir, + Component: "symbols", + BackgroundTimeout: 20 * time.Minute, + } + + parserFactory := func() (ctags.Parser, error) { + return newMockParser("x", "y"), nil + } + parserPool, err := parser.NewParserPool(parserFactory, 15) + if err != nil { + t.Fatal(err) + } + + files := map[string]string{ + "a.js": "var x = 1", + } + gitserverClient := NewMockGitserverClient() + gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(files)) + + parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 15, &observation.TestContext), &observation.TestContext) + databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser) + cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache) + handler := NewHandler(cachedDatabaseWriter, &observation.TestContext) + + server := httptest.NewServer(handler) + defer server.Close() + + client := symbolsclient.Client{ + URL: server.URL, + HTTPClient: httpcli.InternalDoer, + } + + x := result.Symbol{Name: "x", Path: "a.js"} + y := result.Symbol{Name: "y", Path: "a.js"} + + testCases := map[string]struct { + args search.SymbolsParameters + expected result.Symbols + }{ + "simple": { + args: search.SymbolsParameters{First: 10}, + expected: []result.Symbol{x, y}, + }, + "onematch": { + args: search.SymbolsParameters{Query: "x", First: 10}, + expected: []result.Symbol{x}, + }, + "nomatches": { + args: search.SymbolsParameters{Query: "foo", First: 10}, + expected: nil, + }, + "caseinsensitiveexactmatch": { + args: search.SymbolsParameters{Query: "^X$", First: 10}, + expected: []result.Symbol{x}, + }, + "casesensitiveexactmatch": { + args: search.SymbolsParameters{Query: "^x$", IsCaseSensitive: true, First: 10}, + expected: []result.Symbol{x}, + }, + "casesensitivenoexactmatch": { + args: search.SymbolsParameters{Query: "^X$", IsCaseSensitive: true, First: 10}, + expected: nil, + }, + "caseinsensitiveexactpathmatch": { + args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, First: 10}, + expected: []result.Symbol{x, y}, + }, + "casesensitiveexactpathmatch": { + args: search.SymbolsParameters{IncludePatterns: []string{"^a.js$"}, IsCaseSensitive: true, First: 10}, + expected: []result.Symbol{x, y}, + }, + "casesensitivenoexactpathmatch": { + args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, IsCaseSensitive: true, First: 10}, + expected: nil, + }, + "exclude": { + args: search.SymbolsParameters{ExcludePattern: "a.js", IsCaseSensitive: true, First: 10}, + expected: nil, + }, + } + + for label, testCase := range testCases { + t.Run(label, func(t *testing.T) { + result, err := client.Search(context.Background(), testCase.args) + if err != nil { + t.Fatalf("unexpected error performing search: %s", err) + } + + if result == nil { + if testCase.expected != nil { + t.Errorf("unexpected search result. want=%+v, have=nil", testCase.expected) + } + } else if !reflect.DeepEqual(*result, testCase.expected) { + t.Errorf("unexpected search result. want=%+v, have=%+v", testCase.expected, *result) + } + }) + } +} + +type mockParser struct { + names []string +} + +func newMockParser(names ...string) ctags.Parser { + return &mockParser{names: names} +} + +func (m *mockParser) Parse(name string, content []byte) ([]*ctags.Entry, error) { + entries := make([]*ctags.Entry, 0, len(m.names)) + for _, name := range m.names { + entries = append(entries, &ctags.Entry{Name: name, Path: "a.js"}) + } + + return entries, nil +} + +func (m *mockParser) Close() {} diff --git a/cmd/symbols/internal/api/mock_iface_test.go b/cmd/symbols/internal/api/mock_iface_test.go new file mode 100644 index 00000000000..9cd9567c326 --- /dev/null +++ b/cmd/symbols/internal/api/mock_iface_test.go @@ -0,0 +1,304 @@ +// Code generated by go-mockgen 1.1.2; DO NOT EDIT. + +package api + +import ( + "context" + "io" + "sync" + + gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + api "github.com/sourcegraph/sourcegraph/internal/api" +) + +// MockGitserverClient is a mock implementation of the GitserverClient +// interface (from the package +// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used +// for unit testing. +type MockGitserverClient struct { + // FetchTarFunc is an instance of a mock function object controlling the + // behavior of the method FetchTar. + FetchTarFunc *GitserverClientFetchTarFunc + // GitDiffFunc is an instance of a mock function object controlling the + // behavior of the method GitDiff. + GitDiffFunc *GitserverClientGitDiffFunc +} + +// NewMockGitserverClient creates a new mock of the GitserverClient +// interface. All methods return zero values for all results, unless +// overwritten. +func NewMockGitserverClient() *MockGitserverClient { + return &MockGitserverClient{ + FetchTarFunc: &GitserverClientFetchTarFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + return nil, nil + }, + }, + GitDiffFunc: &GitserverClientGitDiffFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + return gitserver.Changes{}, nil + }, + }, + } +} + +// NewStrictMockGitserverClient creates a new mock of the GitserverClient +// interface. All methods panic on invocation, unless overwritten. +func NewStrictMockGitserverClient() *MockGitserverClient { + return &MockGitserverClient{ + FetchTarFunc: &GitserverClientFetchTarFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + panic("unexpected invocation of MockGitserverClient.FetchTar") + }, + }, + GitDiffFunc: &GitserverClientGitDiffFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + panic("unexpected invocation of MockGitserverClient.GitDiff") + }, + }, + } +} + +// NewMockGitserverClientFrom creates a new mock of the MockGitserverClient +// interface. All methods delegate to the given implementation, unless +// overwritten. +func NewMockGitserverClientFrom(i gitserver.GitserverClient) *MockGitserverClient { + return &MockGitserverClient{ + FetchTarFunc: &GitserverClientFetchTarFunc{ + defaultHook: i.FetchTar, + }, + GitDiffFunc: &GitserverClientGitDiffFunc{ + defaultHook: i.GitDiff, + }, + } +} + +// GitserverClientFetchTarFunc describes the behavior when the FetchTar +// method of the parent MockGitserverClient instance is invoked. +type GitserverClientFetchTarFunc struct { + defaultHook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) + hooks []func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) + history []GitserverClientFetchTarFuncCall + mutex sync.Mutex +} + +// FetchTar delegates to the next hook function in the queue and stores the +// parameter and result values of this invocation. +func (m *MockGitserverClient) FetchTar(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 []string) (io.ReadCloser, error) { + r0, r1 := m.FetchTarFunc.nextHook()(v0, v1, v2, v3) + m.FetchTarFunc.appendCall(GitserverClientFetchTarFuncCall{v0, v1, v2, v3, r0, r1}) + return r0, r1 +} + +// SetDefaultHook sets function that is called when the FetchTar method of +// the parent MockGitserverClient instance is invoked and the hook queue is +// empty. +func (f *GitserverClientFetchTarFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) { + f.defaultHook = hook +} + +// PushHook adds a function to the end of hook queue. Each invocation of the +// FetchTar method of the parent MockGitserverClient instance invokes the +// hook at the front of the queue and discards it. After the queue is empty, +// the default hook function is invoked for any future action. +func (f *GitserverClientFetchTarFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) { + f.mutex.Lock() + f.hooks = append(f.hooks, hook) + f.mutex.Unlock() +} + +// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns +// the given values. +func (f *GitserverClientFetchTarFunc) SetDefaultReturn(r0 io.ReadCloser, r1 error) { + f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + return r0, r1 + }) +} + +// PushReturn calls PushDefaultHook with a function that returns the given +// values. +func (f *GitserverClientFetchTarFunc) PushReturn(r0 io.ReadCloser, r1 error) { + f.PushHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + return r0, r1 + }) +} + +func (f *GitserverClientFetchTarFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(f.hooks) == 0 { + return f.defaultHook + } + + hook := f.hooks[0] + f.hooks = f.hooks[1:] + return hook +} + +func (f *GitserverClientFetchTarFunc) appendCall(r0 GitserverClientFetchTarFuncCall) { + f.mutex.Lock() + f.history = append(f.history, r0) + f.mutex.Unlock() +} + +// History returns a sequence of GitserverClientFetchTarFuncCall objects +// describing the invocations of this function. +func (f *GitserverClientFetchTarFunc) History() []GitserverClientFetchTarFuncCall { + f.mutex.Lock() + history := make([]GitserverClientFetchTarFuncCall, len(f.history)) + copy(history, f.history) + f.mutex.Unlock() + + return history +} + +// GitserverClientFetchTarFuncCall is an object that describes an invocation +// of method FetchTar on an instance of MockGitserverClient. +type GitserverClientFetchTarFuncCall struct { + // Arg0 is the value of the 1st argument passed to this method + // invocation. + Arg0 context.Context + // Arg1 is the value of the 2nd argument passed to this method + // invocation. + Arg1 api.RepoName + // Arg2 is the value of the 3rd argument passed to this method + // invocation. + Arg2 api.CommitID + // Arg3 is the value of the 4th argument passed to this method + // invocation. + Arg3 []string + // Result0 is the value of the 1st result returned from this method + // invocation. + Result0 io.ReadCloser + // Result1 is the value of the 2nd result returned from this method + // invocation. + Result1 error +} + +// Args returns an interface slice containing the arguments of this +// invocation. +func (c GitserverClientFetchTarFuncCall) Args() []interface{} { + return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3} +} + +// Results returns an interface slice containing the results of this +// invocation. +func (c GitserverClientFetchTarFuncCall) Results() []interface{} { + return []interface{}{c.Result0, c.Result1} +} + +// GitserverClientGitDiffFunc describes the behavior when the GitDiff method +// of the parent MockGitserverClient instance is invoked. +type GitserverClientGitDiffFunc struct { + defaultHook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) + hooks []func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) + history []GitserverClientGitDiffFuncCall + mutex sync.Mutex +} + +// GitDiff delegates to the next hook function in the queue and stores the +// parameter and result values of this invocation. +func (m *MockGitserverClient) GitDiff(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 api.CommitID) (gitserver.Changes, error) { + r0, r1 := m.GitDiffFunc.nextHook()(v0, v1, v2, v3) + m.GitDiffFunc.appendCall(GitserverClientGitDiffFuncCall{v0, v1, v2, v3, r0, r1}) + return r0, r1 +} + +// SetDefaultHook sets function that is called when the GitDiff method of +// the parent MockGitserverClient instance is invoked and the hook queue is +// empty. +func (f *GitserverClientGitDiffFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) { + f.defaultHook = hook +} + +// PushHook adds a function to the end of hook queue. Each invocation of the +// GitDiff method of the parent MockGitserverClient instance invokes the +// hook at the front of the queue and discards it. After the queue is empty, +// the default hook function is invoked for any future action. +func (f *GitserverClientGitDiffFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) { + f.mutex.Lock() + f.hooks = append(f.hooks, hook) + f.mutex.Unlock() +} + +// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns +// the given values. +func (f *GitserverClientGitDiffFunc) SetDefaultReturn(r0 gitserver.Changes, r1 error) { + f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + return r0, r1 + }) +} + +// PushReturn calls PushDefaultHook with a function that returns the given +// values. +func (f *GitserverClientGitDiffFunc) PushReturn(r0 gitserver.Changes, r1 error) { + f.PushHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + return r0, r1 + }) +} + +func (f *GitserverClientGitDiffFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(f.hooks) == 0 { + return f.defaultHook + } + + hook := f.hooks[0] + f.hooks = f.hooks[1:] + return hook +} + +func (f *GitserverClientGitDiffFunc) appendCall(r0 GitserverClientGitDiffFuncCall) { + f.mutex.Lock() + f.history = append(f.history, r0) + f.mutex.Unlock() +} + +// History returns a sequence of GitserverClientGitDiffFuncCall objects +// describing the invocations of this function. +func (f *GitserverClientGitDiffFunc) History() []GitserverClientGitDiffFuncCall { + f.mutex.Lock() + history := make([]GitserverClientGitDiffFuncCall, len(f.history)) + copy(history, f.history) + f.mutex.Unlock() + + return history +} + +// GitserverClientGitDiffFuncCall is an object that describes an invocation +// of method GitDiff on an instance of MockGitserverClient. +type GitserverClientGitDiffFuncCall struct { + // Arg0 is the value of the 1st argument passed to this method + // invocation. + Arg0 context.Context + // Arg1 is the value of the 2nd argument passed to this method + // invocation. + Arg1 api.RepoName + // Arg2 is the value of the 3rd argument passed to this method + // invocation. + Arg2 api.CommitID + // Arg3 is the value of the 4th argument passed to this method + // invocation. + Arg3 api.CommitID + // Result0 is the value of the 1st result returned from this method + // invocation. + Result0 gitserver.Changes + // Result1 is the value of the 2nd result returned from this method + // invocation. + Result1 error +} + +// Args returns an interface slice containing the arguments of this +// invocation. +func (c GitserverClientGitDiffFuncCall) Args() []interface{} { + return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3} +} + +// Results returns an interface slice containing the results of this +// invocation. +func (c GitserverClientGitDiffFuncCall) Results() []interface{} { + return []interface{}{c.Result0, c.Result1} +} diff --git a/cmd/symbols/internal/api/observability.go b/cmd/symbols/internal/api/observability.go new file mode 100644 index 00000000000..3038d90418f --- /dev/null +++ b/cmd/symbols/internal/api/observability.go @@ -0,0 +1,34 @@ +package api + +import ( + "fmt" + + "github.com/sourcegraph/sourcegraph/internal/metrics" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +type operations struct { + search *observation.Operation +} + +func newOperations(observationContext *observation.Context) *operations { + metrics := metrics.NewREDMetrics( + observationContext.Registerer, + "codeintel_symbols_api", + metrics.WithLabels("op"), + metrics.WithCountHelp("Total number of method invocations."), + metrics.WithDurationBuckets([]float64{1, 2, 5, 10, 30, 60}), + ) + + op := func(name string) *observation.Operation { + return observationContext.Operation(observation.Op{ + Name: fmt.Sprintf("codeintel.symbols.api.%s", name), + MetricLabelValues: []string{name}, + Metrics: metrics, + }) + } + + return &operations{ + search: op("Search"), + } +} diff --git a/cmd/symbols/internal/api/search.go b/cmd/symbols/internal/api/search.go new file mode 100644 index 00000000000..3fbed7f6c36 --- /dev/null +++ b/cmd/symbols/internal/api/search.go @@ -0,0 +1,52 @@ +package api + +import ( + "context" + "strings" + "time" + + "github.com/cockroachdb/errors" + "github.com/opentracing/opentracing-go/log" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/search/result" +) + +const searchTimeout = 60 * time.Second + +func (h *apiHandler) handleSearchInternal(ctx context.Context, args types.SearchArgs) (_ *result.Symbols, err error) { + ctx, traceLog, endObservation := h.operations.search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{ + log.String("repo", string(args.Repo)), + log.String("commitID", string(args.CommitID)), + log.String("query", args.Query), + log.Bool("isRegExp", args.IsRegExp), + log.Bool("isCaseSensitive", args.IsCaseSensitive), + log.Int("numIncludePatterns", len(args.IncludePatterns)), + log.String("includePatterns", strings.Join(args.IncludePatterns, ":")), + log.String("excludePattern", args.ExcludePattern), + log.Int("first", args.First), + }}) + defer endObservation(1, observation.Args{}) + + ctx, cancel := context.WithTimeout(ctx, searchTimeout) + defer cancel() + + dbFile, err := h.cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args) + if err != nil { + return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile") + } + traceLog(log.String("dbFile", dbFile)) + + var results result.Symbols + err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) { + if results, err = db.Search(ctx, args); err != nil { + return errors.Wrap(err, "store.Search") + } + + return nil + }) + + return &results, err +} diff --git a/cmd/symbols/internal/database/init.go b/cmd/symbols/internal/database/init.go new file mode 100644 index 00000000000..34fc375309e --- /dev/null +++ b/cmd/symbols/internal/database/init.go @@ -0,0 +1,17 @@ +package database + +import ( + "database/sql" + "regexp" + + "github.com/mattn/go-sqlite3" +) + +func Init() { + sql.Register("sqlite3_with_regexp", + &sqlite3.SQLiteDriver{ + ConnectHook: func(conn *sqlite3.SQLiteConn) error { + return conn.RegisterFunc("REGEXP", regexp.MatchString, true) + }, + }) +} diff --git a/cmd/symbols/internal/database/janitor/cache_evicter.go b/cmd/symbols/internal/database/janitor/cache_evicter.go new file mode 100644 index 00000000000..68ee68bee89 --- /dev/null +++ b/cmd/symbols/internal/database/janitor/cache_evicter.go @@ -0,0 +1,57 @@ +package janitor + +import ( + "context" + "time" + + "github.com/cockroachdb/errors" + "github.com/inconshreveable/log15" + + "github.com/sourcegraph/sourcegraph/internal/diskcache" + "github.com/sourcegraph/sourcegraph/internal/goroutine" +) + +type cacheEvicter struct { + // cache is the disk backed cache. + cache *diskcache.Store + + // maxCacheSizeBytes is the maximum size of the cache in bytes. Note that we can + // be larger than maxCacheSizeBytes temporarily between runs of this handler. + // When we go over maxCacheSizeBytes we trigger delete files until we get below + // maxCacheSizeBytes. + maxCacheSizeBytes int64 + + metrics *Metrics +} + +var _ goroutine.Handler = &cacheEvicter{} +var _ goroutine.ErrorHandler = &cacheEvicter{} + +func NewCacheEvicter(interval time.Duration, cache *diskcache.Store, maxCacheSizeBytes int64, metrics *Metrics) goroutine.BackgroundRoutine { + return goroutine.NewPeriodicGoroutine(context.Background(), interval, &cacheEvicter{ + cache: cache, + maxCacheSizeBytes: maxCacheSizeBytes, + metrics: metrics, + }) +} + +// Handle periodically checks the size of the cache and evicts/deletes items. +func (e *cacheEvicter) Handle(ctx context.Context) error { + if e.maxCacheSizeBytes == 0 { + return nil + } + + stats, err := e.cache.Evict(e.maxCacheSizeBytes) + if err != nil { + return errors.Wrap(err, "cache.Evict") + } + + e.metrics.cacheSizeBytes.Set(float64(stats.CacheSize)) + e.metrics.evictions.Add(float64(stats.Evicted)) + return nil +} + +func (e *cacheEvicter) HandleError(err error) { + e.metrics.errors.Inc() + log15.Error("Failed to evict items from cache", "error", err) +} diff --git a/cmd/symbols/internal/database/janitor/metrics.go b/cmd/symbols/internal/database/janitor/metrics.go new file mode 100644 index 00000000000..3b8bcdf7f2b --- /dev/null +++ b/cmd/symbols/internal/database/janitor/metrics.go @@ -0,0 +1,42 @@ +package janitor + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +type Metrics struct { + cacheSizeBytes prometheus.Gauge + evictions prometheus.Counter + errors prometheus.Counter +} + +func NewMetrics(observationContext *observation.Context) *Metrics { + cacheSizeBytes := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "src", + Name: "codeintel_symbols_store_cache_size_bytes", + Help: "The total size of items in the on disk cache.", + }) + observationContext.Registerer.MustRegister(cacheSizeBytes) + + evictions := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "src", + Name: "codeintel_symbols_store_evictions_total", + Help: "The total number of items evicted from the cache.", + }) + observationContext.Registerer.MustRegister(evictions) + + errors := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "src", + Name: "codeintel_symbols_store_errors_total", + Help: "The total number of failures evicting items from the cache.", + }) + observationContext.Registerer.MustRegister(errors) + + return &Metrics{ + cacheSizeBytes: cacheSizeBytes, + evictions: evictions, + errors: errors, + } +} diff --git a/cmd/symbols/internal/database/sanity_check.go b/cmd/symbols/internal/database/sanity_check.go new file mode 100644 index 00000000000..19d5809be8f --- /dev/null +++ b/cmd/symbols/internal/database/sanity_check.go @@ -0,0 +1,20 @@ +package database + +import "github.com/jmoiron/sqlx" + +// SanityCheck makes sure that go-sqlite3 was compiled with cgo by seeing if we can actually create a table. +func SanityCheck() error { + db, err := sqlx.Open("sqlite3_with_regexp", ":memory:") + if err != nil { + return err + } + defer db.Close() + + // If go-sqlite3 was not compiled with cgo, the error will be: + // > Binary was compiled with 'CGO_ENABLED=0', go-sqlite3 requires cgo to work. This is a stub + if _, err := db.Exec("CREATE TABLE test (col TEXT);"); err != nil { + return err + } + + return nil +} diff --git a/cmd/symbols/internal/database/store/meta.go b/cmd/symbols/internal/database/store/meta.go new file mode 100644 index 00000000000..db0e001975c --- /dev/null +++ b/cmd/symbols/internal/database/store/meta.go @@ -0,0 +1,30 @@ +package store + +import ( + "context" + + "github.com/keegancsmith/sqlf" + + "github.com/sourcegraph/sourcegraph/internal/database/basestore" +) + +func (w *store) CreateMetaTable(ctx context.Context) error { + return w.Exec(ctx, sqlf.Sprintf(` + CREATE TABLE IF NOT EXISTS meta ( + id INTEGER PRIMARY KEY CHECK (id = 0), + revision TEXT NOT NULL + ) + `)) +} + +func (s *store) GetCommit(ctx context.Context) (string, bool, error) { + return basestore.ScanFirstString(s.Query(ctx, sqlf.Sprintf(`SELECT revision FROM meta`))) +} + +func (s *store) InsertMeta(ctx context.Context, commitID string) error { + return s.Exec(ctx, sqlf.Sprintf(`INSERT INTO meta (id, revision) VALUES (0, %s)`, commitID)) +} + +func (s *store) UpdateMeta(ctx context.Context, commitID string) error { + return s.Exec(ctx, sqlf.Sprintf(`UPDATE meta SET revision = %s`, commitID)) +} diff --git a/cmd/symbols/internal/database/store/search.go b/cmd/symbols/internal/database/store/search.go new file mode 100644 index 00000000000..3dfe03ab7e4 --- /dev/null +++ b/cmd/symbols/internal/database/store/search.go @@ -0,0 +1,143 @@ +package store + +import ( + "context" + "database/sql" + "regexp/syntax" + "strings" + + "github.com/cockroachdb/errors" + "github.com/keegancsmith/sqlf" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/database/basestore" + "github.com/sourcegraph/sourcegraph/internal/search/result" +) + +func scanSymbols(rows *sql.Rows, queryErr error) (symbols []result.Symbol, err error) { + if queryErr != nil { + return nil, queryErr + } + defer func() { err = basestore.CloseRows(rows, err) }() + + for rows.Next() { + var symbol result.Symbol + if err := rows.Scan( + &symbol.Name, + &symbol.Path, + &symbol.Line, + &symbol.Kind, + &symbol.Language, + &symbol.Parent, + &symbol.ParentKind, + &symbol.Signature, + &symbol.Pattern, + &symbol.FileLimited, + ); err != nil { + return nil, err + } + + symbols = append(symbols, symbol) + } + + return symbols, nil +} + +func (s *store) Search(ctx context.Context, args types.SearchArgs) ([]result.Symbol, error) { + return scanSymbols(s.Query(ctx, sqlf.Sprintf( + ` + SELECT + name, + path, + line, + kind, + language, + parent, + parentkind, + signature, + pattern, + filelimited + FROM symbols + WHERE %s + LIMIT %s + `, + sqlf.Join(makeSearchConditions(args), "AND"), + args.First, + ))) +} + +func makeSearchConditions(args types.SearchArgs) []*sqlf.Query { + conditions := make([]*sqlf.Query, 0, 2+len(args.IncludePatterns)) + conditions = append(conditions, makeSearchCondition("name", args.Query, args.IsCaseSensitive)) + conditions = append(conditions, negate(makeSearchCondition("path", args.ExcludePattern, args.IsCaseSensitive))) + for _, includePattern := range args.IncludePatterns { + conditions = append(conditions, makeSearchCondition("path", includePattern, args.IsCaseSensitive)) + } + + filtered := conditions[:0] + for _, condition := range conditions { + if condition != nil { + filtered = append(filtered, condition) + } + } + + if len(filtered) == 0 { + // Ensure we have at least one condition + filtered = append(filtered, sqlf.Sprintf("TRUE")) + } + + return filtered +} + +func makeSearchCondition(column string, regex string, isCaseSensitive bool) *sqlf.Query { + if regex == "" { + return nil + } + + if symbolName, isExact, err := isLiteralEquality(regex); err == nil && isExact { + if isCaseSensitive { + return sqlf.Sprintf(column+" = %s", symbolName) + } else { + return sqlf.Sprintf(column+"lowercase = %s", strings.ToLower(symbolName)) + } + } + + if !isCaseSensitive { + regex = "(?i:" + regex + ")" + } + return sqlf.Sprintf(column+" REGEXP %s", regex) +} + +// isLiteralEquality returns true if the given regex matches literal strings exactly. +// If so, this function returns true along with the literal search query. If not, this +// function returns false. +func isLiteralEquality(expr string) (string, bool, error) { + regexp, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return "", false, errors.Wrap(err, "regexp/syntax.Parse") + } + + // want a concat of size 3 which is [begin, literal, end] + if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 3 { + // starts with ^ + if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText { + // is a literal + if regexp.Sub[1].Op == syntax.OpLiteral { + // ends with $ + if regexp.Sub[2].Op == syntax.OpEndLine || regexp.Sub[2].Op == syntax.OpEndText { + return string(regexp.Sub[1].Rune), true, nil + } + } + } + } + + return "", false, nil +} + +func negate(query *sqlf.Query) *sqlf.Query { + if query == nil { + return nil + } + + return sqlf.Sprintf("NOT %s", query) +} diff --git a/cmd/symbols/internal/database/store/search_test.go b/cmd/symbols/internal/database/store/search_test.go new file mode 100644 index 00000000000..168ea75fa7b --- /dev/null +++ b/cmd/symbols/internal/database/store/search_test.go @@ -0,0 +1,41 @@ +package store + +import "testing" + +func TestIsLiteralEquality(t *testing.T) { + for _, test := range []struct { + regex string + noMatch bool + expectedLiteral string + }{ + {regex: `^foo$`, expectedLiteral: "foo"}, + {regex: `^[f]oo$`, expectedLiteral: `foo`}, + {regex: `^\\$`, expectedLiteral: `\`}, + {regex: `^\$`, noMatch: true}, + {regex: `^\($`, expectedLiteral: `(`}, + {regex: `\\`, noMatch: true}, + {regex: `\$`, noMatch: true}, + {regex: `\(`, noMatch: true}, + {regex: `foo$`, noMatch: true}, + {regex: `(^foo$|^bar$)`, noMatch: true}, + } { + literal, ok, err := isLiteralEquality(test.regex) + if err != nil { + t.Fatal(err) + } + if !ok { + if !test.noMatch { + t.Errorf("exected a match") + } + } else if test.noMatch { + t.Errorf("did not expect a match") + } else if literal != test.expectedLiteral { + t.Errorf( + "unexpected literal for %q. want=%q have=%q", + test.regex, + test.expectedLiteral, + literal, + ) + } + } +} diff --git a/cmd/symbols/internal/database/store/store.go b/cmd/symbols/internal/database/store/store.go new file mode 100644 index 00000000000..c9ba0fbfbc4 --- /dev/null +++ b/cmd/symbols/internal/database/store/store.go @@ -0,0 +1,87 @@ +package store + +import ( + "context" + "database/sql" + + "github.com/inconshreveable/log15" + "github.com/jmoiron/sqlx" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/database/basestore" + "github.com/sourcegraph/sourcegraph/internal/search/result" +) + +type Store interface { + Close() error + Transact(ctx context.Context) (Store, error) + Done(err error) error + + Search(ctx context.Context, args types.SearchArgs) ([]result.Symbol, error) + + CreateMetaTable(ctx context.Context) error + GetCommit(ctx context.Context) (string, bool, error) + InsertMeta(ctx context.Context, commitID string) error + UpdateMeta(ctx context.Context, commitID string) error + + CreateSymbolsTable(ctx context.Context) error + CreateSymbolIndexes(ctx context.Context) error + DeletePaths(ctx context.Context, paths []string) error + WriteSymbols(ctx context.Context, symbols <-chan result.Symbol) error +} + +type store struct { + db *sqlx.DB + *basestore.Store +} + +func NewStore(dbFile string) (Store, error) { + db, err := sqlx.Open("sqlite3_with_regexp", dbFile) + if err != nil { + return nil, err + } + + return &store{ + db: db, + Store: basestore.NewWithDB(db, sql.TxOptions{}), + }, nil +} + +func (s *store) Close() error { + return s.db.Close() +} + +func (s *store) Transact(ctx context.Context) (Store, error) { + tx, err := s.Store.Transact(ctx) + if err != nil { + return nil, err + } + + return &store{db: s.db, Store: tx}, nil +} + +func WithSQLiteStore(dbFile string, callback func(db Store) error) error { + db, err := NewStore(dbFile) + if err != nil { + return err + } + defer func() { + if err := db.Close(); err != nil { + log15.Error("Failed to close database", "filename", dbFile, "error", err) + } + }() + + return callback(db) +} + +func WithSQLiteStoreTransaction(ctx context.Context, dbFile string, callback func(db Store) error) error { + return WithSQLiteStore(dbFile, func(db Store) (err error) { + tx, err := db.Transact(ctx) + if err != nil { + return err + } + defer func() { err = tx.Done(err) }() + + return callback(tx) + }) +} diff --git a/cmd/symbols/internal/database/store/symbols.go b/cmd/symbols/internal/database/store/symbols.go new file mode 100644 index 00000000000..1b4da5aa4e2 --- /dev/null +++ b/cmd/symbols/internal/database/store/symbols.go @@ -0,0 +1,107 @@ +package store + +import ( + "context" + "strings" + + "github.com/keegancsmith/sqlf" + + "github.com/sourcegraph/sourcegraph/internal/database/batch" + "github.com/sourcegraph/sourcegraph/internal/search/result" +) + +func (s *store) CreateSymbolsTable(ctx context.Context) error { + return s.Exec(ctx, sqlf.Sprintf(` + CREATE TABLE IF NOT EXISTS symbols ( + name VARCHAR(256) NOT NULL, + namelowercase VARCHAR(256) NOT NULL, + path VARCHAR(4096) NOT NULL, + pathlowercase VARCHAR(4096) NOT NULL, + line INT NOT NULL, + kind VARCHAR(255) NOT NULL, + language VARCHAR(255) NOT NULL, + parent VARCHAR(255) NOT NULL, + parentkind VARCHAR(255) NOT NULL, + signature VARCHAR(255) NOT NULL, + pattern VARCHAR(255) NOT NULL, + filelimited BOOLEAN NOT NULL + ) + `)) +} + +func (s *store) CreateSymbolIndexes(ctx context.Context) error { + createIndexQueries := []string{ + `CREATE INDEX idx_name ON symbols(name)`, + `CREATE INDEX idx_path ON symbols(path)`, + `CREATE INDEX idx_namelowercase ON symbols(namelowercase)`, + `CREATE INDEX idx_pathlowercase ON symbols(pathlowercase)`, + } + + for _, query := range createIndexQueries { + if err := s.Exec(ctx, sqlf.Sprintf(query)); err != nil { + return err + } + } + + return nil +} + +func (s *store) DeletePaths(ctx context.Context, paths []string) error { + if len(paths) == 0 { + return nil + } + + pathQueries := make([]*sqlf.Query, 0, len(paths)) + for _, path := range paths { + pathQueries = append(pathQueries, sqlf.Sprintf("%s", path)) + } + + return s.Exec(ctx, sqlf.Sprintf(`DELETE FROM symbols WHERE path IN (%s)`, sqlf.Join(pathQueries, ","))) +} + +func (s *store) WriteSymbols(ctx context.Context, symbols <-chan result.Symbol) (err error) { + rows := make(chan []interface{}) + + go func() { + defer close(rows) + + for symbol := range symbols { + rows <- []interface{}{ + symbol.Name, + strings.ToLower(symbol.Name), + symbol.Path, + strings.ToLower(symbol.Path), + symbol.Line, + symbol.Kind, + symbol.Language, + symbol.Parent, + symbol.ParentKind, + symbol.Signature, + symbol.Pattern, + symbol.FileLimited, + } + } + }() + + return batch.InsertValues( + ctx, + s.Handle().DB(), + "symbols", + batch.MaxNumSQLiteParameters, + []string{ + "name", + "namelowercase", + "path", + "pathlowercase", + "line", + "kind", + "language", + "parent", + "parentkind", + "signature", + "pattern", + "filelimited", + }, + rows, + ) +} diff --git a/cmd/symbols/internal/database/writer/cache.go b/cmd/symbols/internal/database/writer/cache.go new file mode 100644 index 00000000000..5b77bdd92ac --- /dev/null +++ b/cmd/symbols/internal/database/writer/cache.go @@ -0,0 +1,53 @@ +package writer + +import ( + "context" + "fmt" + + "github.com/cockroachdb/errors" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/diskcache" +) + +type CachedDatabaseWriter interface { + GetOrCreateDatabaseFile(ctx context.Context, args types.SearchArgs) (string, error) +} + +type cachedDatabaseWriter struct { + databaseWriter DatabaseWriter + cache *diskcache.Store +} + +func NewCachedDatabaseWriter(databaseWriter DatabaseWriter, cache *diskcache.Store) CachedDatabaseWriter { + return &cachedDatabaseWriter{ + databaseWriter: databaseWriter, + cache: cache, + } +} + +// The version of the symbols database schema. This is included in the database filenames to prevent a +// newer version of the symbols service from attempting to read from a database created by an older and +// likely incompatible symbols service. Increment this when you change the database schema. +const symbolsDBVersion = 4 + +func (w *cachedDatabaseWriter) GetOrCreateDatabaseFile(ctx context.Context, args types.SearchArgs) (string, error) { + key := []string{ + string(args.Repo), + fmt.Sprintf("%s-%d", args.CommitID, symbolsDBVersion), + } + + cacheFile, err := w.cache.OpenWithPath(ctx, key, func(fetcherCtx context.Context, tempDBFile string) error { + if err := w.databaseWriter.WriteDBFile(fetcherCtx, args, tempDBFile); err != nil { + return errors.Wrap(err, "databaseWriter.WriteDBFile") + } + + return nil + }) + if err != nil { + return "", err + } + defer cacheFile.File.Close() + + return cacheFile.File.Name(), err +} diff --git a/cmd/symbols/internal/database/writer/fs.go b/cmd/symbols/internal/database/writer/fs.go new file mode 100644 index 00000000000..a3ef4d6ce37 --- /dev/null +++ b/cmd/symbols/internal/database/writer/fs.go @@ -0,0 +1,59 @@ +package writer + +import ( + "io" + "os" + "path/filepath" + "strings" + "time" +) + +// findNewestFile lists the directory and returns the newest file's path, prepended with dir. +func findNewestFile(dir string) (string, error) { + files, err := os.ReadDir(dir) + if err != nil { + return "", nil + } + + var mostRecentTime time.Time + newest := "" + for _, fi := range files { + if fi.Type().IsRegular() { + if !strings.HasSuffix(fi.Name(), ".zip") { + continue + } + + info, err := fi.Info() + if err != nil { + return "", err + } + + if newest == "" || info.ModTime().After(mostRecentTime) { + mostRecentTime = info.ModTime() + newest = filepath.Join(dir, fi.Name()) + } + } + } + + return newest, nil +} + +func copyFile(from string, to string) error { + fromFile, err := os.Open(from) + if err != nil { + return err + } + defer fromFile.Close() + + toFile, err := os.OpenFile(to, os.O_RDWR|os.O_CREATE, 0666) + if err != nil { + return err + } + defer toFile.Close() + + if _, err := io.Copy(toFile, fromFile); err != nil { + return err + } + + return nil +} diff --git a/cmd/symbols/internal/database/writer/writer.go b/cmd/symbols/internal/database/writer/writer.go new file mode 100644 index 00000000000..719420ac4ad --- /dev/null +++ b/cmd/symbols/internal/database/writer/writer.go @@ -0,0 +1,156 @@ +package writer + +import ( + "context" + "path/filepath" + + "github.com/cockroachdb/errors" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/diskcache" + "github.com/sourcegraph/sourcegraph/internal/search/result" +) + +type DatabaseWriter interface { + WriteDBFile(ctx context.Context, args types.SearchArgs, tempDBFile string) error +} + +type databaseWriter struct { + path string + gitserverClient gitserver.GitserverClient + parser parser.Parser +} + +func NewDatabaseWriter( + path string, + gitserverClient gitserver.GitserverClient, + parser parser.Parser, +) DatabaseWriter { + return &databaseWriter{ + path: path, + gitserverClient: gitserverClient, + parser: parser, + } +} + +func (w *databaseWriter) WriteDBFile(ctx context.Context, args types.SearchArgs, dbFile string) error { + if newestDBFile, oldCommit, ok, err := w.getNewestCommit(ctx, args); err != nil { + return err + } else if ok { + if ok, err := w.writeFileIncrementally(ctx, args, dbFile, newestDBFile, oldCommit); err != nil || ok { + return err + } + } + + return w.writeDBFile(ctx, args, dbFile) +} + +func (w *databaseWriter) getNewestCommit(ctx context.Context, args types.SearchArgs) (dbFile string, commit string, ok bool, err error) { + newest, err := findNewestFile(filepath.Join(w.path, diskcache.EncodeKeyComponent(string(args.Repo)))) + if err != nil || newest == "" { + return "", "", false, err + } + + err = store.WithSQLiteStore(newest, func(db store.Store) (err error) { + if commit, ok, err = db.GetCommit(ctx); err != nil { + return errors.Wrap(err, "store.GetCommit") + } + + return nil + }) + + return newest, commit, ok, err +} + +func (w *databaseWriter) writeDBFile(ctx context.Context, args types.SearchArgs, dbFile string) error { + return w.parseAndWriteInTransaction(ctx, args, nil, dbFile, func(tx store.Store, symbols <-chan result.Symbol) error { + if err := tx.CreateMetaTable(ctx); err != nil { + return errors.Wrap(err, "store.CreateMetaTable") + } + if err := tx.CreateSymbolsTable(ctx); err != nil { + return errors.Wrap(err, "store.CreateSymbolsTable") + } + if err := tx.InsertMeta(ctx, string(args.CommitID)); err != nil { + return errors.Wrap(err, "store.InsertMeta") + } + if err := tx.WriteSymbols(ctx, symbols); err != nil { + return errors.Wrap(err, "store.WriteSymbols") + } + if err := tx.CreateSymbolIndexes(ctx); err != nil { + return errors.Wrap(err, "store.CreateSymbolIndexes") + } + + return nil + }) +} + +// The maximum number of paths when doing incremental indexing. Diffs with more paths than this will +// not be incrementally indexed, and instead we will process all symbols. +const maxTotalPaths = 999 + +// The maximum sum of bytes in paths in a diff when doing incremental indexing. Diffs bigger than this +// will not be incrementally indexed, and instead we will process all symbols. Without this limit, we +// could hit HTTP 431 (header fields too large) when sending the list of paths `git archive paths...`. +// The actual limit is somewhere between 372KB and 450KB, and we want to be well under that. +// 100KB seems safe. +const maxTotalPathsLength = 100000 + +func (w *databaseWriter) writeFileIncrementally(ctx context.Context, args types.SearchArgs, dbFile, newestDBFile, oldCommit string) (bool, error) { + changes, err := w.gitserverClient.GitDiff(ctx, args.Repo, api.CommitID(oldCommit), args.CommitID) + if err != nil { + return false, errors.Wrap(err, "gitserverClient.GitDiff") + } + + // Paths to re-parse + addedOrModifiedPaths := append(changes.Added, changes.Modified...) + + // Paths to modify in the database + addedModifiedOrDeletedPaths := append(addedOrModifiedPaths, changes.Deleted...) + + // Too many entries + if len(addedModifiedOrDeletedPaths) > maxTotalPaths { + return false, nil + } + + totalPathsLength := 0 + for _, path := range addedModifiedOrDeletedPaths { + totalPathsLength += len(path) + } + // Argument lists too long + if totalPathsLength > maxTotalPathsLength { + return false, nil + } + + if err := copyFile(newestDBFile, dbFile); err != nil { + return false, err + } + + return true, w.parseAndWriteInTransaction(ctx, args, addedOrModifiedPaths, dbFile, func(tx store.Store, symbols <-chan result.Symbol) error { + if err := tx.UpdateMeta(ctx, string(args.CommitID)); err != nil { + return errors.Wrap(err, "store.UpdateMeta") + } + if err := tx.DeletePaths(ctx, addedModifiedOrDeletedPaths); err != nil { + return errors.Wrap(err, "store.DeletePaths") + } + if err := tx.WriteSymbols(ctx, symbols); err != nil { + return errors.Wrap(err, "store.WriteSymbols") + } + + return nil + }) +} + +func (w *databaseWriter) parseAndWriteInTransaction(ctx context.Context, args types.SearchArgs, paths []string, dbFile string, callback func(tx store.Store, symbols <-chan result.Symbol) error) error { + symbols, err := w.parser.Parse(ctx, args, paths) + if err != nil { + return errors.Wrap(err, "parser.Parse") + } + + return store.WithSQLiteStoreTransaction(ctx, dbFile, func(tx store.Store) error { + return callback(tx, symbols) + }) +} diff --git a/cmd/symbols/internal/fetcher/gen.go b/cmd/symbols/internal/fetcher/gen.go new file mode 100644 index 00000000000..e1aace05b69 --- /dev/null +++ b/cmd/symbols/internal/fetcher/gen.go @@ -0,0 +1,3 @@ +package fetcher + +//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go diff --git a/cmd/symbols/internal/fetcher/mock_iface_test.go b/cmd/symbols/internal/fetcher/mock_iface_test.go new file mode 100644 index 00000000000..4ea6335dfb2 --- /dev/null +++ b/cmd/symbols/internal/fetcher/mock_iface_test.go @@ -0,0 +1,304 @@ +// Code generated by go-mockgen 1.1.2; DO NOT EDIT. + +package fetcher + +import ( + "context" + "io" + "sync" + + gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + api "github.com/sourcegraph/sourcegraph/internal/api" +) + +// MockGitserverClient is a mock implementation of the GitserverClient +// interface (from the package +// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used +// for unit testing. +type MockGitserverClient struct { + // FetchTarFunc is an instance of a mock function object controlling the + // behavior of the method FetchTar. + FetchTarFunc *GitserverClientFetchTarFunc + // GitDiffFunc is an instance of a mock function object controlling the + // behavior of the method GitDiff. + GitDiffFunc *GitserverClientGitDiffFunc +} + +// NewMockGitserverClient creates a new mock of the GitserverClient +// interface. All methods return zero values for all results, unless +// overwritten. +func NewMockGitserverClient() *MockGitserverClient { + return &MockGitserverClient{ + FetchTarFunc: &GitserverClientFetchTarFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + return nil, nil + }, + }, + GitDiffFunc: &GitserverClientGitDiffFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + return gitserver.Changes{}, nil + }, + }, + } +} + +// NewStrictMockGitserverClient creates a new mock of the GitserverClient +// interface. All methods panic on invocation, unless overwritten. +func NewStrictMockGitserverClient() *MockGitserverClient { + return &MockGitserverClient{ + FetchTarFunc: &GitserverClientFetchTarFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + panic("unexpected invocation of MockGitserverClient.FetchTar") + }, + }, + GitDiffFunc: &GitserverClientGitDiffFunc{ + defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + panic("unexpected invocation of MockGitserverClient.GitDiff") + }, + }, + } +} + +// NewMockGitserverClientFrom creates a new mock of the MockGitserverClient +// interface. All methods delegate to the given implementation, unless +// overwritten. +func NewMockGitserverClientFrom(i gitserver.GitserverClient) *MockGitserverClient { + return &MockGitserverClient{ + FetchTarFunc: &GitserverClientFetchTarFunc{ + defaultHook: i.FetchTar, + }, + GitDiffFunc: &GitserverClientGitDiffFunc{ + defaultHook: i.GitDiff, + }, + } +} + +// GitserverClientFetchTarFunc describes the behavior when the FetchTar +// method of the parent MockGitserverClient instance is invoked. +type GitserverClientFetchTarFunc struct { + defaultHook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) + hooks []func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) + history []GitserverClientFetchTarFuncCall + mutex sync.Mutex +} + +// FetchTar delegates to the next hook function in the queue and stores the +// parameter and result values of this invocation. +func (m *MockGitserverClient) FetchTar(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 []string) (io.ReadCloser, error) { + r0, r1 := m.FetchTarFunc.nextHook()(v0, v1, v2, v3) + m.FetchTarFunc.appendCall(GitserverClientFetchTarFuncCall{v0, v1, v2, v3, r0, r1}) + return r0, r1 +} + +// SetDefaultHook sets function that is called when the FetchTar method of +// the parent MockGitserverClient instance is invoked and the hook queue is +// empty. +func (f *GitserverClientFetchTarFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) { + f.defaultHook = hook +} + +// PushHook adds a function to the end of hook queue. Each invocation of the +// FetchTar method of the parent MockGitserverClient instance invokes the +// hook at the front of the queue and discards it. After the queue is empty, +// the default hook function is invoked for any future action. +func (f *GitserverClientFetchTarFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) { + f.mutex.Lock() + f.hooks = append(f.hooks, hook) + f.mutex.Unlock() +} + +// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns +// the given values. +func (f *GitserverClientFetchTarFunc) SetDefaultReturn(r0 io.ReadCloser, r1 error) { + f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + return r0, r1 + }) +} + +// PushReturn calls PushDefaultHook with a function that returns the given +// values. +func (f *GitserverClientFetchTarFunc) PushReturn(r0 io.ReadCloser, r1 error) { + f.PushHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + return r0, r1 + }) +} + +func (f *GitserverClientFetchTarFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(f.hooks) == 0 { + return f.defaultHook + } + + hook := f.hooks[0] + f.hooks = f.hooks[1:] + return hook +} + +func (f *GitserverClientFetchTarFunc) appendCall(r0 GitserverClientFetchTarFuncCall) { + f.mutex.Lock() + f.history = append(f.history, r0) + f.mutex.Unlock() +} + +// History returns a sequence of GitserverClientFetchTarFuncCall objects +// describing the invocations of this function. +func (f *GitserverClientFetchTarFunc) History() []GitserverClientFetchTarFuncCall { + f.mutex.Lock() + history := make([]GitserverClientFetchTarFuncCall, len(f.history)) + copy(history, f.history) + f.mutex.Unlock() + + return history +} + +// GitserverClientFetchTarFuncCall is an object that describes an invocation +// of method FetchTar on an instance of MockGitserverClient. +type GitserverClientFetchTarFuncCall struct { + // Arg0 is the value of the 1st argument passed to this method + // invocation. + Arg0 context.Context + // Arg1 is the value of the 2nd argument passed to this method + // invocation. + Arg1 api.RepoName + // Arg2 is the value of the 3rd argument passed to this method + // invocation. + Arg2 api.CommitID + // Arg3 is the value of the 4th argument passed to this method + // invocation. + Arg3 []string + // Result0 is the value of the 1st result returned from this method + // invocation. + Result0 io.ReadCloser + // Result1 is the value of the 2nd result returned from this method + // invocation. + Result1 error +} + +// Args returns an interface slice containing the arguments of this +// invocation. +func (c GitserverClientFetchTarFuncCall) Args() []interface{} { + return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3} +} + +// Results returns an interface slice containing the results of this +// invocation. +func (c GitserverClientFetchTarFuncCall) Results() []interface{} { + return []interface{}{c.Result0, c.Result1} +} + +// GitserverClientGitDiffFunc describes the behavior when the GitDiff method +// of the parent MockGitserverClient instance is invoked. +type GitserverClientGitDiffFunc struct { + defaultHook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) + hooks []func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) + history []GitserverClientGitDiffFuncCall + mutex sync.Mutex +} + +// GitDiff delegates to the next hook function in the queue and stores the +// parameter and result values of this invocation. +func (m *MockGitserverClient) GitDiff(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 api.CommitID) (gitserver.Changes, error) { + r0, r1 := m.GitDiffFunc.nextHook()(v0, v1, v2, v3) + m.GitDiffFunc.appendCall(GitserverClientGitDiffFuncCall{v0, v1, v2, v3, r0, r1}) + return r0, r1 +} + +// SetDefaultHook sets function that is called when the GitDiff method of +// the parent MockGitserverClient instance is invoked and the hook queue is +// empty. +func (f *GitserverClientGitDiffFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) { + f.defaultHook = hook +} + +// PushHook adds a function to the end of hook queue. Each invocation of the +// GitDiff method of the parent MockGitserverClient instance invokes the +// hook at the front of the queue and discards it. After the queue is empty, +// the default hook function is invoked for any future action. +func (f *GitserverClientGitDiffFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) { + f.mutex.Lock() + f.hooks = append(f.hooks, hook) + f.mutex.Unlock() +} + +// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns +// the given values. +func (f *GitserverClientGitDiffFunc) SetDefaultReturn(r0 gitserver.Changes, r1 error) { + f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + return r0, r1 + }) +} + +// PushReturn calls PushDefaultHook with a function that returns the given +// values. +func (f *GitserverClientGitDiffFunc) PushReturn(r0 gitserver.Changes, r1 error) { + f.PushHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + return r0, r1 + }) +} + +func (f *GitserverClientGitDiffFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(f.hooks) == 0 { + return f.defaultHook + } + + hook := f.hooks[0] + f.hooks = f.hooks[1:] + return hook +} + +func (f *GitserverClientGitDiffFunc) appendCall(r0 GitserverClientGitDiffFuncCall) { + f.mutex.Lock() + f.history = append(f.history, r0) + f.mutex.Unlock() +} + +// History returns a sequence of GitserverClientGitDiffFuncCall objects +// describing the invocations of this function. +func (f *GitserverClientGitDiffFunc) History() []GitserverClientGitDiffFuncCall { + f.mutex.Lock() + history := make([]GitserverClientGitDiffFuncCall, len(f.history)) + copy(history, f.history) + f.mutex.Unlock() + + return history +} + +// GitserverClientGitDiffFuncCall is an object that describes an invocation +// of method GitDiff on an instance of MockGitserverClient. +type GitserverClientGitDiffFuncCall struct { + // Arg0 is the value of the 1st argument passed to this method + // invocation. + Arg0 context.Context + // Arg1 is the value of the 2nd argument passed to this method + // invocation. + Arg1 api.RepoName + // Arg2 is the value of the 3rd argument passed to this method + // invocation. + Arg2 api.CommitID + // Arg3 is the value of the 4th argument passed to this method + // invocation. + Arg3 api.CommitID + // Result0 is the value of the 1st result returned from this method + // invocation. + Result0 gitserver.Changes + // Result1 is the value of the 2nd result returned from this method + // invocation. + Result1 error +} + +// Args returns an interface slice containing the arguments of this +// invocation. +func (c GitserverClientGitDiffFuncCall) Args() []interface{} { + return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3} +} + +// Results returns an interface slice containing the results of this +// invocation. +func (c GitserverClientGitDiffFuncCall) Results() []interface{} { + return []interface{}{c.Result0, c.Result1} +} diff --git a/cmd/symbols/internal/fetcher/observability.go b/cmd/symbols/internal/fetcher/observability.go new file mode 100644 index 00000000000..c7541ec1353 --- /dev/null +++ b/cmd/symbols/internal/fetcher/observability.go @@ -0,0 +1,53 @@ +package fetcher + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/sourcegraph/sourcegraph/internal/metrics" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +type operations struct { + fetching prometheus.Gauge + fetchQueueSize prometheus.Gauge + fetchRepositoryArchive *observation.Operation +} + +func newOperations(observationContext *observation.Context) *operations { + fetching := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "src", + Name: "codeintel_symbols_fetching", + Help: "The number of fetches currently running.", + }) + observationContext.Registerer.MustRegister(fetching) + + fetchQueueSize := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "src", + Name: "codeintel_symbols_fetch_queue_size", + Help: "The number of fetch jobs enqueued.", + }) + observationContext.Registerer.MustRegister(fetchQueueSize) + + operationMetrics := metrics.NewREDMetrics( + observationContext.Registerer, + "codeintel_symbols_repository_fetcher", + metrics.WithLabels("op"), + metrics.WithCountHelp("Total number of method invocations."), + ) + + op := func(name string) *observation.Operation { + return observationContext.Operation(observation.Op{ + Name: fmt.Sprintf("codeintel.symbols.parser.%s", name), + MetricLabelValues: []string{name}, + Metrics: operationMetrics, + }) + } + + return &operations{ + fetching: fetching, + fetchQueueSize: fetchQueueSize, + fetchRepositoryArchive: op("FetchRepositoryArchive"), + } +} diff --git a/cmd/symbols/internal/fetcher/repository_fetcher.go b/cmd/symbols/internal/fetcher/repository_fetcher.go new file mode 100644 index 00000000000..01dd60282c2 --- /dev/null +++ b/cmd/symbols/internal/fetcher/repository_fetcher.go @@ -0,0 +1,196 @@ +package fetcher + +import ( + "archive/tar" + "bytes" + "context" + "io" + "path" + "strings" + + "github.com/cockroachdb/errors" + "github.com/opentracing/opentracing-go/log" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +type RepositoryFetcher interface { + FetchRepositoryArchive(ctx context.Context, args types.SearchArgs, paths []string) <-chan parseRequestOrError +} + +type repositoryFetcher struct { + gitserverClient gitserver.GitserverClient + fetchSem chan int + operations *operations +} + +type ParseRequest struct { + Path string + Data []byte +} + +type parseRequestOrError struct { + ParseRequest ParseRequest + Err error +} + +func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maximumConcurrentFetches int, observationContext *observation.Context) RepositoryFetcher { + return &repositoryFetcher{ + gitserverClient: gitserverClient, + fetchSem: make(chan int, maximumConcurrentFetches), + operations: newOperations(observationContext), + } +} + +func (f *repositoryFetcher) FetchRepositoryArchive(ctx context.Context, args types.SearchArgs, paths []string) <-chan parseRequestOrError { + requestCh := make(chan parseRequestOrError) + + go func() { + defer close(requestCh) + + if err := f.fetchRepositoryArchive(ctx, args, paths, func(request ParseRequest) { + requestCh <- parseRequestOrError{ParseRequest: request} + }); err != nil { + requestCh <- parseRequestOrError{Err: err} + } + }() + + return requestCh +} + +func (f *repositoryFetcher) fetchRepositoryArchive(ctx context.Context, args types.SearchArgs, paths []string, callback func(request ParseRequest)) (err error) { + ctx, traceLog, endObservation := f.operations.fetchRepositoryArchive.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{ + log.String("repo", string(args.Repo)), + log.String("commitID", string(args.CommitID)), + log.Int("paths", len(paths)), + log.String("paths", strings.Join(paths, ":")), + }}) + defer endObservation(1, observation.Args{}) + + onDefer, err := f.limitConcurrentFetches(ctx) + if err != nil { + return err + } + defer onDefer() + traceLog(log.Event("acquired fetch semaphore")) + + f.operations.fetching.Inc() + defer f.operations.fetching.Dec() + + rc, err := f.gitserverClient.FetchTar(ctx, args.Repo, args.CommitID, paths) + if err != nil { + return errors.Wrap(err, "gitserverClient.FetchTar") + } + defer rc.Close() + + return readTar(ctx, tar.NewReader(rc), callback, traceLog) +} + +func (f *repositoryFetcher) limitConcurrentFetches(ctx context.Context) (func(), error) { + f.operations.fetchQueueSize.Inc() + defer f.operations.fetchQueueSize.Dec() + + select { + case f.fetchSem <- 1: + return func() { <-f.fetchSem }, nil + + case <-ctx.Done(): + return func() {}, ctx.Err() + } +} + +func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request ParseRequest), traceLog observation.TraceLogger) error { + for { + if ctx.Err() != nil { + return ctx.Err() + } + + tarHeader, err := tarReader.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + readTarHeader(tarReader, tarHeader, callback, traceLog) + } +} + +func readTarHeader(tarReader *tar.Reader, tarHeader *tar.Header, callback func(request ParseRequest), traceLog observation.TraceLogger) error { + if !shouldParse(tarHeader) { + return nil + } + + // 32MB is the same size used by io.Copy + buffer := make([]byte, 32*1024) + + traceLog(log.Event("reading tar header prefix")) + + // Read first chunk of tar header contents + n, err := tarReader.Read(buffer) + if err != nil && err != io.EOF { + return err + } + traceLog(log.Int("n", n)) + + if n == 0 { + // Empty file, nothing to parse + return nil + } + + // Check to see if first 256 bytes contain a 0x00. If so, we'll assume that + // the file is binary and skip parsing. Otherwise, we'll have some non-zero + // contents that passed our filters above to parse. + + m := 256 + if n < m { + m = n + } + if bytes.IndexByte(buffer[:m], 0x00) >= 0 { + return nil + } + + // Copy buffer into appropriately-sized slice for return + data := make([]byte, int(tarHeader.Size)) + copy(data, buffer[:n]) + + if n < int(tarHeader.Size) { + traceLog(log.Event("reading remaining tar header content")) + + // Read the remaining contents + if _, err := io.ReadFull(tarReader, data[n:]); err != nil { + return err + } + traceLog(log.Int("n", int(tarHeader.Size)-n)) + } + + request := ParseRequest{Path: tarHeader.Name, Data: data} + callback(request) + return nil +} + +// maxFileSize (512KB) is the maximum size of files we attempt to parse. +const maxFileSize = 1 << 19 + +func shouldParse(tarHeader *tar.Header) bool { + // We do not search large files + if tarHeader.Size > maxFileSize { + return false + } + + // We only care about files + if tarHeader.Typeflag != tar.TypeReg && tarHeader.Typeflag != tar.TypeRegA { + return false + } + + // JSON files are symbol-less + if path.Ext(tarHeader.Name) == ".json" { + return false + } + + return true +} diff --git a/cmd/symbols/internal/fetcher/repository_fetcher_test.go b/cmd/symbols/internal/fetcher/repository_fetcher_test.go new file mode 100644 index 00000000000..d538beb4234 --- /dev/null +++ b/cmd/symbols/internal/fetcher/repository_fetcher_test.go @@ -0,0 +1,82 @@ +package fetcher + +import ( + "context" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +func TestRepositoryFetcher(t *testing.T) { + validParseRequests := map[string]string{ + "a.txt": strings.Repeat("payload a", 1<<8), + "b.txt": strings.Repeat("payload b", 1<<9), + "c.txt": strings.Repeat("payload c", 1<<10), + "d.txt": strings.Repeat("payload d", 1<<11), + "e.txt": strings.Repeat("payload e", 1<<12), + "f.txt": strings.Repeat("payload f", 1<<13), + "g.txt": strings.Repeat("payload g", 1<<14), + } + + tarContents := map[string]string{} + for name, content := range validParseRequests { + tarContents[name] = content + } + + // JSON is ignored + tarContents["ignored.json"] = "{}" + + // Large files are ignored + tarContents["payloads.txt"] = strings.Repeat("oversized load", maxFileSize) + + gitserverClient := NewMockGitserverClient() + gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(tarContents)) + + repositoryFetcher := NewRepositoryFetcher(gitserverClient, 15, &observation.TestContext) + args := types.SearchArgs{Repo: api.RepoName("foo"), CommitID: api.CommitID("deadbeef")} + + t.Run("all paths", func(t *testing.T) { + paths := []string(nil) + ch := repositoryFetcher.FetchRepositoryArchive(context.Background(), args, paths) + parseRequests := consumeParseRequests(t, ch) + + expectedParseRequests := validParseRequests + if diff := cmp.Diff(expectedParseRequests, parseRequests); diff != "" { + t.Errorf("unexpected parse requests (-want +got):\n%s", diff) + } + }) + + t.Run("selected paths", func(t *testing.T) { + paths := []string{"a.txt", "b.txt", "c.txt"} + ch := repositoryFetcher.FetchRepositoryArchive(context.Background(), args, paths) + parseRequests := consumeParseRequests(t, ch) + + expectedParseRequests := map[string]string{ + "a.txt": validParseRequests["a.txt"], + "b.txt": validParseRequests["b.txt"], + "c.txt": validParseRequests["c.txt"], + } + if diff := cmp.Diff(expectedParseRequests, parseRequests); diff != "" { + t.Errorf("unexpected parse requests (-want +got):\n%s", diff) + } + }) +} + +func consumeParseRequests(t *testing.T, ch <-chan parseRequestOrError) map[string]string { + parseRequests := map[string]string{} + for v := range ch { + if v.Err != nil { + t.Fatalf("unexpected fetch error: %s", v.Err) + } + + parseRequests[v.ParseRequest.Path] = string(v.ParseRequest.Data) + } + + return parseRequests +} diff --git a/cmd/symbols/internal/gitserver/client.go b/cmd/symbols/internal/gitserver/client.go new file mode 100644 index 00000000000..f86844be5df --- /dev/null +++ b/cmd/symbols/internal/gitserver/client.go @@ -0,0 +1,104 @@ +package gitserver + +import ( + "bytes" + "context" + "fmt" + "io" + "strings" + + "github.com/cockroachdb/errors" + "github.com/opentracing/opentracing-go/log" + + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/gitserver" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/vcs/git" +) + +type GitserverClient interface { + // FetchTar returns an io.ReadCloser to a tar archive of a repository at the specified Git + // remote URL and commit ID. If the error implements "BadRequest() bool", it will be used to + // determine if the error is a bad request (eg invalid repo). + FetchTar(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) + + // GitDiff returns the paths that have changed between two commits. + GitDiff(context.Context, api.RepoName, api.CommitID, api.CommitID) (Changes, error) +} + +// Changes are added, deleted, and modified paths. +type Changes struct { + Added []string + Modified []string + Deleted []string +} + +type gitserverClient struct { + operations *operations +} + +func NewClient(observationContext *observation.Context) GitserverClient { + return &gitserverClient{ + operations: newOperations(observationContext), + } +} + +func (c *gitserverClient) FetchTar(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (_ io.ReadCloser, err error) { + ctx, endObservation := c.operations.fetchTar.With(ctx, &err, observation.Args{LogFields: []log.Field{ + log.String("repo", string(repo)), + log.String("commit", string(commit)), + log.Int("paths", len(paths)), + log.String("paths", strings.Join(paths, ":")), + }}) + defer endObservation(1, observation.Args{}) + + opts := gitserver.ArchiveOptions{ + Treeish: string(commit), + Format: "tar", + Paths: paths, + } + + return gitserver.DefaultClient.Archive(ctx, repo, opts) +} + +func (c *gitserverClient) GitDiff(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) (_ Changes, err error) { + ctx, endObservation := c.operations.gitDiff.With(ctx, &err, observation.Args{LogFields: []log.Field{ + log.String("repo", string(repo)), + log.String("commitA", string(commitA)), + log.String("commitB", string(commitB)), + }}) + defer endObservation(1, observation.Args{}) + + output, err := git.DiffSymbols(ctx, repo, commitA, commitB) + + changes, err := parseGitDiffOutput(output) + if err != nil { + return Changes{}, errors.Wrap(err, "failed to parse git diff output") + } + + return changes, nil +} + +var NUL = []byte{0} + +// parseGitDiffOutput parses the output of a git diff command, which consists +// of a repeated sequence of ` NUL NUL` where NUL is the 0 byte. +func parseGitDiffOutput(output []byte) (changes Changes, _ error) { + slices := bytes.Split(bytes.TrimRight(output, string(NUL)), NUL) + if len(slices)%2 != 0 { + return changes, fmt.Errorf("uneven pairs") + } + + for i := 0; i < len(slices); i += 2 { + switch slices[i][0] { + case 'A': + changes.Added = append(changes.Added, string(slices[i+1])) + case 'M': + changes.Modified = append(changes.Modified, string(slices[i+1])) + case 'D': + changes.Deleted = append(changes.Deleted, string(slices[i+1])) + } + } + + return changes, nil +} diff --git a/cmd/symbols/internal/gitserver/client_test.go b/cmd/symbols/internal/gitserver/client_test.go new file mode 100644 index 00000000000..5172d2e9923 --- /dev/null +++ b/cmd/symbols/internal/gitserver/client_test.go @@ -0,0 +1,65 @@ +package gitserver + +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func TestParseGitDiffOutput(t *testing.T) { + testCases := []struct { + output []byte + expectedChanges Changes + shouldError bool + }{ + { + output: combineBytes( + []byte("A"), NUL, []byte("added1.json"), NUL, + []byte("M"), NUL, []byte("modified1.json"), NUL, + []byte("D"), NUL, []byte("deleted1.json"), NUL, + []byte("A"), NUL, []byte("added2.json"), NUL, + []byte("M"), NUL, []byte("modified2.json"), NUL, + []byte("D"), NUL, []byte("deleted2.json"), NUL, + []byte("A"), NUL, []byte("added3.json"), NUL, + []byte("M"), NUL, []byte("modified3.json"), NUL, + []byte("D"), NUL, []byte("deleted3.json"), NUL, + ), + expectedChanges: Changes{ + Added: []string{"added1.json", "added2.json", "added3.json"}, + Modified: []string{"modified1.json", "modified2.json", "modified3.json"}, + Deleted: []string{"deleted1.json", "deleted2.json", "deleted3.json"}, + }, + }, + { + output: combineBytes( + []byte("A"), NUL, []byte("added1.json"), NUL, + []byte("M"), NUL, []byte("modified1.json"), NUL, + []byte("D"), NUL, + ), + shouldError: true, + }, + } + + for _, testCase := range testCases { + changes, err := parseGitDiffOutput(testCase.output) + if err != nil { + if !testCase.shouldError { + t.Fatalf("unexpected error parsing git diff output: %s", err) + } + } else if testCase.shouldError { + t.Fatalf("expected error, got none") + } + + if diff := cmp.Diff(testCase.expectedChanges, changes); diff != "" { + t.Errorf("unexpected changes (-want +got):\n%s", diff) + } + } +} + +func combineBytes(bss ...[]byte) (combined []byte) { + for _, bs := range bss { + combined = append(combined, bs...) + } + + return combined +} diff --git a/cmd/symbols/internal/gitserver/observability.go b/cmd/symbols/internal/gitserver/observability.go new file mode 100644 index 00000000000..460e1796cf0 --- /dev/null +++ b/cmd/symbols/internal/gitserver/observability.go @@ -0,0 +1,35 @@ +package gitserver + +import ( + "fmt" + + "github.com/sourcegraph/sourcegraph/internal/metrics" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +type operations struct { + fetchTar *observation.Operation + gitDiff *observation.Operation +} + +func newOperations(observationContext *observation.Context) *operations { + metrics := metrics.NewREDMetrics( + observationContext.Registerer, + "codeintel_symbols_gitserver", + metrics.WithLabels("op"), + metrics.WithCountHelp("Total number of method invocations."), + ) + + op := func(name string) *observation.Operation { + return observationContext.Operation(observation.Op{ + Name: fmt.Sprintf("codeintel.symbols.gitserver.%s", name), + MetricLabelValues: []string{name}, + Metrics: metrics, + }) + } + + return &operations{ + fetchTar: op("FetchTar"), + gitDiff: op("GitDiff"), + } +} diff --git a/cmd/symbols/internal/gitserver/tar.go b/cmd/symbols/internal/gitserver/tar.go new file mode 100644 index 00000000000..06921b0ceb1 --- /dev/null +++ b/cmd/symbols/internal/gitserver/tar.go @@ -0,0 +1,49 @@ +package gitserver + +import ( + "archive/tar" + "bytes" + "context" + "io" + + "github.com/sourcegraph/sourcegraph/internal/api" +) + +func CreateTestFetchTarFunc(tarContents map[string]string) func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) { + return func(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (io.ReadCloser, error) { + var buffer bytes.Buffer + tarWriter := tar.NewWriter(&buffer) + + for name, content := range tarContents { + if paths != nil { + found := false + for _, path := range paths { + if path == name { + found = true + } + } + if !found { + continue + } + } + + tarHeader := &tar.Header{ + Name: name, + Mode: 0o600, + Size: int64(len(content)), + } + if err := tarWriter.WriteHeader(tarHeader); err != nil { + return nil, err + } + if _, err := tarWriter.Write([]byte(content)); err != nil { + return nil, err + } + } + + if err := tarWriter.Close(); err != nil { + return nil, err + } + + return io.NopCloser(bytes.NewReader(buffer.Bytes())), nil + } +} diff --git a/cmd/symbols/internal/parser/observability.go b/cmd/symbols/internal/parser/observability.go new file mode 100644 index 00000000000..2fe782fc97d --- /dev/null +++ b/cmd/symbols/internal/parser/observability.go @@ -0,0 +1,74 @@ +package parser + +import ( + "fmt" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/sourcegraph/sourcegraph/internal/metrics" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +type operations struct { + parsing prometheus.Gauge + parseQueueSize prometheus.Gauge + parseQueueTimeouts prometheus.Counter + parseFailed prometheus.Counter + parse *observation.Operation + handleParseRequest *observation.Operation +} + +func newOperations(observationContext *observation.Context) *operations { + parsing := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "src", + Name: "codeintel_symbols_parsing", + Help: "The number of parse jobs currently running.", + }) + observationContext.Registerer.MustRegister(parsing) + + parseQueueSize := prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "src", + Name: "codeintel_symbols_parse_queue_size", + Help: "The number of parse jobs enqueued.", + }) + observationContext.Registerer.MustRegister(parseQueueSize) + + parseQueueTimeouts := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "src", + Name: "codeintel_symbols_parse_queue_timeouts_total", + Help: "The total number of parse jobs that timed out while enqueued.", + }) + observationContext.Registerer.MustRegister(parseQueueTimeouts) + + parseFailed := prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: "src", + Name: "codeintel_symbols_parse_failed_total", + Help: "The total number of parse jobs that failed.", + }) + observationContext.Registerer.MustRegister(parseFailed) + + operationMetrics := metrics.NewREDMetrics( + observationContext.Registerer, + "codeintel_symbols_parser", + metrics.WithLabels("op"), + metrics.WithCountHelp("Total number of method invocations."), + metrics.WithDurationBuckets([]float64{1, 5, 10, 60, 300, 1200}), + ) + + op := func(name string) *observation.Operation { + return observationContext.Operation(observation.Op{ + Name: fmt.Sprintf("codeintel.symbols.parser.%s", name), + MetricLabelValues: []string{name}, + Metrics: operationMetrics, + }) + } + + return &operations{ + parsing: parsing, + parseQueueSize: parseQueueSize, + parseQueueTimeouts: parseQueueTimeouts, + parseFailed: parseFailed, + parse: op("Parse"), + handleParseRequest: op("HandleParseRequest"), + } +} diff --git a/cmd/symbols/internal/parser/parser.go b/cmd/symbols/internal/parser/parser.go new file mode 100644 index 00000000000..3aa10e24d90 --- /dev/null +++ b/cmd/symbols/internal/parser/parser.go @@ -0,0 +1,205 @@ +package parser + +import ( + "context" + "strings" + "sync" + "sync/atomic" + + "github.com/cockroachdb/errors" + "github.com/inconshreveable/log15" + "github.com/opentracing/opentracing-go/log" + + "github.com/sourcegraph/go-ctags" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/search/result" +) + +type Parser interface { + Parse(ctx context.Context, args types.SearchArgs, paths []string) (<-chan result.Symbol, error) +} + +type parser struct { + parserPool ParserPool + repositoryFetcher fetcher.RepositoryFetcher + operations *operations +} + +func NewParser( + parserPool ParserPool, + repositoryFetcher fetcher.RepositoryFetcher, + observationContext *observation.Context, +) Parser { + return &parser{ + parserPool: parserPool, + repositoryFetcher: repositoryFetcher, + operations: newOperations(observationContext), + } +} + +func (p *parser) Parse(ctx context.Context, args types.SearchArgs, paths []string) (_ <-chan result.Symbol, err error) { + ctx, traceLog, endObservation := p.operations.parse.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{ + log.String("repo", string(args.Repo)), + log.String("commitID", string(args.CommitID)), + log.Int("paths", len(paths)), + log.String("paths", strings.Join(paths, ":")), + }}) + // NOTE: We call endObservation synchronously within this function when we + // return an error. Once we get on the success-only path, we install it to + // run on defer of a background routine, which indicates when the returned + // symbols channel is closed. + + parseRequestOrErrors := p.repositoryFetcher.FetchRepositoryArchive(ctx, args, paths) + if err != nil { + endObservation(1, observation.Args{}) + return nil, errors.Wrap(err, "repositoryFetcher.FetchRepositoryArchive") + } + defer func() { + if err != nil { + go func() { + // Drain channel on early exit + for range parseRequestOrErrors { + } + }() + } + }() + + var wg sync.WaitGroup + var totalSymbols uint32 + symbols := make(chan result.Symbol) + + defer func() { + go func() { + defer func() { + endObservation(1, observation.Args{LogFields: []log.Field{ + log.Int("numSymbols", int(totalSymbols)), + }}) + }() + + wg.Wait() + close(symbols) + }() + }() + + totalRequests := 0 + for v := range parseRequestOrErrors { + if v.Err != nil { + return nil, v.Err + } + + wg.Add(1) + totalRequests++ + + go func(parseRequest fetcher.ParseRequest) { + defer wg.Done() + + _ = p.handleParseRequest(ctx, symbols, parseRequest, &totalSymbols) + }(v.ParseRequest) + } + traceLog(log.Int("numRequests", totalRequests)) + + return symbols, nil +} + +func (p *parser) handleParseRequest(ctx context.Context, symbols chan<- result.Symbol, parseRequest fetcher.ParseRequest, totalSymbols *uint32) (err error) { + ctx, traceLog, endObservation := p.operations.handleParseRequest.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{ + log.Int("fileSize", len(parseRequest.Data)), + }}) + defer endObservation(1, observation.Args{}) + + parser, err := p.parserFromPool(ctx) + if err != nil { + return err + } + traceLog(log.Event("acquired parser from pool")) + + defer func() { + if err == nil { + if e := recover(); e != nil { + err = errors.Errorf("panic: %s", e) + } + } + + if err == nil { + p.parserPool.Done(parser) + } else { + // Close parser and return nil to pool, indicating that the next receiver should create a new parser + log15.Error("Closing failed parser", "error", err) + parser.Close() + p.parserPool.Done(nil) + p.operations.parseFailed.Inc() + } + }() + + p.operations.parsing.Inc() + defer p.operations.parsing.Dec() + + entries, err := parser.Parse(parseRequest.Path, parseRequest.Data) + if err != nil { + return errors.Wrap(err, "parser.Parse") + } + traceLog(log.Int("numEntries", len(entries))) + + for _, e := range entries { + if !shouldPersistEntry(e) { + continue + } + + symbol := result.Symbol{ + Name: e.Name, + Path: e.Path, + Line: e.Line, + Kind: e.Kind, + Language: e.Language, + Parent: e.Parent, + ParentKind: e.ParentKind, + Signature: e.Signature, + Pattern: e.Pattern, + FileLimited: e.FileLimited, + } + + select { + case symbols <- symbol: + atomic.AddUint32(totalSymbols, 1) + + case <-ctx.Done(): + return ctx.Err() + } + } + + return nil +} + +func (p *parser) parserFromPool(ctx context.Context) (ctags.Parser, error) { + p.operations.parseQueueSize.Inc() + defer p.operations.parseQueueSize.Dec() + + parser, err := p.parserPool.Get(ctx) + if err != nil { + if err == context.DeadlineExceeded { + p.operations.parseQueueTimeouts.Inc() + } + if err != ctx.Err() { + err = errors.Wrap(err, "failed to create parser") + } + } + + return parser, err +} + +func shouldPersistEntry(e *ctags.Entry) bool { + if e.Name == "" { + return false + } + + for _, value := range []string{"__anon", "AnonymousFunction"} { + if strings.HasPrefix(e.Name, value) || strings.HasPrefix(e.Parent, value) { + return false + } + } + + return true +} diff --git a/cmd/symbols/internal/parser/parser_factory.go b/cmd/symbols/internal/parser/parser_factory.go new file mode 100644 index 00000000000..6798f48ef3c --- /dev/null +++ b/cmd/symbols/internal/parser/parser_factory.go @@ -0,0 +1,5 @@ +package parser + +import "github.com/sourcegraph/go-ctags" + +type ParserFactory func() (ctags.Parser, error) diff --git a/cmd/symbols/internal/parser/parser_factory_ctags.go b/cmd/symbols/internal/parser/parser_factory_ctags.go new file mode 100644 index 00000000000..5e089095494 --- /dev/null +++ b/cmd/symbols/internal/parser/parser_factory_ctags.go @@ -0,0 +1,23 @@ +package parser + +import ( + "log" + "os" + + "github.com/sourcegraph/go-ctags" +) + +func NewCtagsParserFactory(ctagsCommand string, patternLengthLimit int, logErrors, debugLogs bool) ParserFactory { + options := ctags.Options{ + Bin: ctagsCommand, + PatternLengthLimit: patternLengthLimit, + } + if logErrors { + options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags) + } + if debugLogs { + options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags) + } + + return func() (ctags.Parser, error) { return ctags.New(options) } +} diff --git a/cmd/symbols/internal/symbols/ctags_test.go b/cmd/symbols/internal/parser/parser_factory_ctags_test.go similarity index 93% rename from cmd/symbols/internal/symbols/ctags_test.go rename to cmd/symbols/internal/parser/parser_factory_ctags_test.go index 6b910f3263a..109aa2189b0 100644 --- a/cmd/symbols/internal/symbols/ctags_test.go +++ b/cmd/symbols/internal/parser/parser_factory_ctags_test.go @@ -1,4 +1,4 @@ -package symbols +package parser import ( "os/exec" @@ -10,13 +10,13 @@ import ( "github.com/sourcegraph/go-ctags" ) -func TestParser(t *testing.T) { +func TestCtagsParser(t *testing.T) { // TODO(sqs): find a way to make it easy to run these tests in local dev (w/o needing to install universal-ctags) and CI - if _, err := exec.LookPath(ctagsCommand); err != nil { + if _, err := exec.LookPath("universal-ctags"); err != nil { t.Skip("command not in PATH: universal-ctags") } - p, err := NewParser() + p, err := NewCtagsParserFactory("universal-ctags", 250, false, false)() if err != nil { t.Fatal(err) } diff --git a/cmd/symbols/internal/parser/parser_pool.go b/cmd/symbols/internal/parser/parser_pool.go new file mode 100644 index 00000000000..199ff8b2e63 --- /dev/null +++ b/cmd/symbols/internal/parser/parser_pool.go @@ -0,0 +1,57 @@ +package parser + +import ( + "context" + + "github.com/sourcegraph/go-ctags" +) + +type ParserPool interface { + Get(ctx context.Context) (ctags.Parser, error) + Done(parser ctags.Parser) +} + +type parserPool struct { + newParser ParserFactory + pool chan ctags.Parser +} + +func NewParserPool(newParser ParserFactory, numParserProcesses int) (ParserPool, error) { + pool := make(chan ctags.Parser, numParserProcesses) + for i := 0; i < numParserProcesses; i++ { + parser, err := newParser() + if err != nil { + return nil, err + } + pool <- parser + } + + return &parserPool{ + newParser: newParser, + pool: pool, + }, nil +} + +// Get a parser from the pool. Once this parser is no longer in use, the Done method +// MUST be called with either the parser or a nil value (when countering an error). +// Nil values will be recreated on-demand via the factory supplied when constructing +// the pool. This method always returns a non-nil parser with a nil error value. +// +// This method blocks until a parser is available or the given context is canceled. +func (p *parserPool) Get(ctx context.Context) (ctags.Parser, error) { + select { + case parser := <-p.pool: + if parser != nil { + return parser, nil + } + + return p.newParser() + + case <-ctx.Done(): + return nil, ctx.Err() + } +} + +func (p *parserPool) Done(parser ctags.Parser) { + p.pool <- parser +} diff --git a/cmd/symbols/internal/symbols/ctags.go b/cmd/symbols/internal/symbols/ctags.go deleted file mode 100644 index a1a9c9a6dfd..00000000000 --- a/cmd/symbols/internal/symbols/ctags.go +++ /dev/null @@ -1,48 +0,0 @@ -package symbols - -import ( - "log" - "os" - "strconv" - - "github.com/cockroachdb/errors" - "github.com/sourcegraph/go-ctags" - - "github.com/sourcegraph/sourcegraph/internal/env" -) - -const debugLogs = false - -var logErrors = os.Getenv("DEPLOY_TYPE") == "dev" - -var ctagsCommand = env.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)") - -// Increasing this value may increase the size of the symbols cache, but will also stop long lines containing symbols from -// being highlighted improperly. See https://github.com/sourcegraph/sourcegraph/issues/7668. -var rawPatternLengthLimit = env.Get("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags") - -// NewParser runs the ctags command from the CTAGS_COMMAND environment -// variable, falling back to `universal-ctags`. -func NewParser() (ctags.Parser, error) { - patternLengthLimit, err := strconv.Atoi(rawPatternLengthLimit) - if err != nil { - return nil, errors.Errorf("invalid pattern length limit: %s", rawPatternLengthLimit) - } - - var info *log.Logger - if logErrors { - info = log.New(os.Stderr, "ctags: ", log.LstdFlags) - } - - var debug *log.Logger - if debugLogs { - debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags) - } - - return ctags.New(ctags.Options{ - Bin: ctagsCommand, - PatternLengthLimit: patternLengthLimit, - Info: info, - Debug: debug, - }) -} diff --git a/cmd/symbols/internal/symbols/fetch.go b/cmd/symbols/internal/symbols/fetch.go deleted file mode 100644 index 8da5a104521..00000000000 --- a/cmd/symbols/internal/symbols/fetch.go +++ /dev/null @@ -1,151 +0,0 @@ -package symbols - -import ( - "archive/tar" - "bytes" - "context" - "io" - "path" - - "github.com/opentracing/opentracing-go/ext" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - - "github.com/sourcegraph/sourcegraph/internal/api" - "github.com/sourcegraph/sourcegraph/internal/trace/ot" -) - -type parseRequest struct { - path string - data []byte -} - -func (s *Service) fetchRepositoryArchive(ctx context.Context, repo api.RepoName, commitID api.CommitID, paths []string) (<-chan parseRequest, <-chan error, error) { - fetchQueueSize.Inc() - s.fetchSem <- 1 // acquire concurrent fetches semaphore - fetchQueueSize.Dec() - - fetching.Inc() - span, ctx := ot.StartSpanFromContext(ctx, "Store.fetch") - ext.Component.Set(span, "store") - span.SetTag("repo", repo) - span.SetTag("commit", commitID) - - requestCh := make(chan parseRequest, s.NumParserProcesses) - errCh := make(chan error, 1) - - // Done is called when the returned reader is closed, or if this function - // returns an error. It should always be called once. - doneCalled := false - done := func(err error) { - if doneCalled { - panic("Store.fetch.done called twice") - } - doneCalled = true - - if err != nil { - errCh <- err - } - - <-s.fetchSem // release concurrent fetches semaphore - close(requestCh) - close(errCh) - - if err != nil { - ext.Error.Set(span, true) - span.SetTag("err", err.Error()) - fetchFailed.Inc() - } - fetching.Dec() - span.Finish() - } - - r, err := s.FetchTar(ctx, repo, commitID, paths) - if err != nil { - done(err) - return nil, nil, err - } - - // After this point we are not allowed to return an error. Instead we can - // return an error via the errChan we return. If you do want to update this - // code please ensure we still always call done once. - - go func() { - defer r.Close() - buf := make([]byte, 32*1024) // 32*1024 is the same size used by io.Copy - tr := tar.NewReader(r) - for { - if ctx.Err() != nil { - done(ctx.Err()) - return - } - - hdr, err := tr.Next() - if err == io.EOF { - done(nil) - return - } - if err != nil { - done(err) - return - } - - if path.Ext(hdr.Name) == ".json" { - continue - } - - // We only care about files - if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA { - continue - } - // We do not search large files - if hdr.Size > maxFileSize { - continue - } - // Heuristic: Assume file is binary if first 256 bytes contain a 0x00. Best effort, so ignore err. - n, err := tr.Read(buf) - if n > 0 && bytes.IndexByte(buf[:n], 0x00) >= 0 { - continue - } - switch err { - case io.EOF: - if n == 0 { - continue - } - case nil: - default: - done(err) - return - } - - // Read the file's contents. - data := make([]byte, int(hdr.Size)) - copy(data, buf[:n]) - if n < int(hdr.Size) { - _, err = io.ReadFull(tr, data[n:]) - if err != nil { - done(err) - return - } - } - requestCh <- parseRequest{path: hdr.Name, data: data} - } - }() - - return requestCh, errCh, nil -} - -var ( - fetching = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "symbols_store_fetching", - Help: "The number of fetches currently running.", - }) - fetchQueueSize = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "symbols_store_fetch_queue_size", - Help: "The number of fetch jobs enqueued.", - }) - fetchFailed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "symbols_store_fetch_failed", - Help: "The total number of archive fetches that failed.", - }) -) diff --git a/cmd/symbols/internal/symbols/parse.go b/cmd/symbols/internal/symbols/parse.go deleted file mode 100644 index 677a0eaea2c..00000000000 --- a/cmd/symbols/internal/symbols/parse.go +++ /dev/null @@ -1,212 +0,0 @@ -package symbols - -import ( - "context" - "runtime" - "strings" - "sync" - - "github.com/cockroachdb/errors" - "github.com/inconshreveable/log15" - "github.com/opentracing/opentracing-go/ext" - otlog "github.com/opentracing/opentracing-go/log" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - nettrace "golang.org/x/net/trace" - - "github.com/sourcegraph/go-ctags" - - "github.com/sourcegraph/sourcegraph/internal/api" - "github.com/sourcegraph/sourcegraph/internal/search/result" - "github.com/sourcegraph/sourcegraph/internal/trace/ot" -) - -// startParsers starts the parser process pool. -func (s *Service) startParsers() error { - n := s.NumParserProcesses - if n == 0 { - n = runtime.GOMAXPROCS(0) - } - - s.parsers = make(chan ctags.Parser, n) - for i := 0; i < n; i++ { - parser, err := s.NewParser() - if err != nil { - return errors.Wrap(err, "NewParser") - } - s.parsers <- parser - } - return nil -} - -func (s *Service) parseUncached(ctx context.Context, repo api.RepoName, commitID api.CommitID, paths []string, callback func(symbol result.Symbol) error) (err error) { - span, ctx := ot.StartSpanFromContext(ctx, "parseUncached") - defer func() { - if err != nil { - ext.Error.Set(span, true) - span.LogFields(otlog.Error(err)) - } - span.Finish() - }() - span.SetTag("repo", string(repo)) - span.SetTag("commit", string(commitID)) - - tr := nettrace.New("parseUncached", string(repo)) - tr.LazyPrintf("commitID: %s", commitID) - - totalSymbols := 0 - defer func() { - tr.LazyPrintf("symbols=%d", totalSymbols) - if err != nil { - tr.LazyPrintf("error: %s", err) - tr.SetError() - } - tr.Finish() - }() - - tr.LazyPrintf("fetch") - parseRequests, errChan, err := s.fetchRepositoryArchive(ctx, repo, commitID, paths) - tr.LazyPrintf("fetch (returned chans)") - if err != nil { - return err - } - - ctx, cancel := context.WithCancel(ctx) - defer cancel() - - var ( - mu sync.Mutex // protects symbols and err - wg sync.WaitGroup - sem = make(chan struct{}, runtime.GOMAXPROCS(0)) - ) - tr.LazyPrintf("parse") - totalParseRequests := 0 - for req := range parseRequests { - totalParseRequests++ - if ctx.Err() != nil { - // Drain parseRequests - go func() { - for range parseRequests { - } - }() - return ctx.Err() - } - sem <- struct{}{} - wg.Add(1) - go func(req parseRequest) { - defer func() { - wg.Done() - <-sem - }() - entries, parseErr := s.parse(ctx, req) - if parseErr != nil && parseErr != context.Canceled && parseErr != context.DeadlineExceeded { - log15.Error("Error parsing symbols.", "repo", repo, "commitID", commitID, "path", req.path, "dataSize", len(req.data), "error", parseErr) - } - if len(entries) > 0 { - mu.Lock() - defer mu.Unlock() - for _, e := range entries { - if e.Name == "" || strings.HasPrefix(e.Name, "__anon") || strings.HasPrefix(e.Parent, "__anon") || strings.HasPrefix(e.Name, "AnonymousFunction") || strings.HasPrefix(e.Parent, "AnonymousFunction") { - continue - } - totalSymbols++ - err = callback(entryToSymbol(e)) - if err != nil { - log15.Error("Failed to add symbol", "symbol", e, "error", err) - return - } - } - } - }(req) - } - wg.Wait() - tr.LazyPrintf("parse (done) totalParseRequests=%d symbols=%d", totalParseRequests, totalSymbols) - - return <-errChan -} - -// parse gets a parser from the pool and uses it to satisfy the parse request. -func (s *Service) parse(ctx context.Context, req parseRequest) (entries []*ctags.Entry, err error) { - parseQueueSize.Inc() - - select { - case <-ctx.Done(): - parseQueueSize.Dec() - if ctx.Err() == context.DeadlineExceeded { - parseQueueTimeouts.Inc() - } - return nil, ctx.Err() - case parser, ok := <-s.parsers: - parseQueueSize.Dec() - - if !ok { - return nil, nil - } - - if parser == nil { - // The parser failed for some previous receiver (who returned a nil parser to the channel). Try - // creating a parser. - var err error - parser, err = s.NewParser() - if err != nil { - return nil, err - } - } - - defer func() { - if err == nil { - if e := recover(); e != nil { - err = errors.Errorf("panic: %s", e) - } - } - if err == nil { - // Return parser to pool. - s.parsers <- parser - } else { - // Close parser and return nil to pool, indicating that the next receiver should create a new - // parser. - log15.Error("Closing failed parser and creating a new one.", "path", req.path, "error", err) - parseFailed.Inc() - parser.Close() - s.parsers <- nil - } - }() - parsing.Inc() - defer parsing.Dec() - return parser.Parse(req.path, req.data) - } -} - -func entryToSymbol(e *ctags.Entry) result.Symbol { - return result.Symbol{ - Name: e.Name, - Path: e.Path, - Line: e.Line, - Kind: e.Kind, - Language: e.Language, - Parent: e.Parent, - ParentKind: e.ParentKind, - Signature: e.Signature, - Pattern: e.Pattern, - FileLimited: e.FileLimited, - } -} - -var ( - parsing = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "symbols_parse_parsing", - Help: "The number of parse jobs currently running.", - }) - parseQueueSize = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "symbols_parse_parse_queue_size", - Help: "The number of parse jobs enqueued.", - }) - parseQueueTimeouts = promauto.NewCounter(prometheus.CounterOpts{ - Name: "symbols_parse_parse_queue_timeouts", - Help: "The total number of parse jobs that timed out while enqueued.", - }) - parseFailed = promauto.NewCounter(prometheus.CounterOpts{ - Name: "symbols_parse_parse_failed", - Help: "The total number of parse jobs that failed.", - }) -) diff --git a/cmd/symbols/internal/symbols/search.go b/cmd/symbols/internal/symbols/search.go deleted file mode 100644 index e4ea6781fa6..00000000000 --- a/cmd/symbols/internal/symbols/search.go +++ /dev/null @@ -1,599 +0,0 @@ -package symbols - -import ( - "context" - "database/sql" - "encoding/json" - "fmt" - "io" - "net/http" - "os" - "path/filepath" - "regexp" - "regexp/syntax" - "strings" - "time" - - "github.com/mattn/go-sqlite3" - - "github.com/sourcegraph/sourcegraph/internal/api" - "github.com/sourcegraph/sourcegraph/internal/diskcache" - - "github.com/inconshreveable/log15" - "github.com/jmoiron/sqlx" - "github.com/keegancsmith/sqlf" - "github.com/opentracing/opentracing-go/ext" - otlog "github.com/opentracing/opentracing-go/log" - nettrace "golang.org/x/net/trace" - - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/protocol" - "github.com/sourcegraph/sourcegraph/internal/trace/ot" - - "github.com/sourcegraph/sourcegraph/internal/search/result" -) - -func init() { - sql.Register("sqlite3_with_regexp", - &sqlite3.SQLiteDriver{ - ConnectHook: func(conn *sqlite3.SQLiteConn) error { - return conn.RegisterFunc("REGEXP", regexp.MatchString, true) - }, - }) -} - -// maxFileSize is the limit on file size in bytes. Only files smaller than this are processed. -const maxFileSize = 1 << 19 // 512KB - -func (s *Service) handleSearch(w http.ResponseWriter, r *http.Request) { - var args protocol.SearchArgs - if err := json.NewDecoder(r.Body).Decode(&args); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - result, err := s.search(r.Context(), args) - if err != nil { - if err == context.Canceled && r.Context().Err() == context.Canceled { - return // client went away - } - log15.Error("Symbol search failed", "args", args, "error", err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - - if err := json.NewEncoder(w).Encode(result); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } -} - -func (s *Service) search(ctx context.Context, args protocol.SearchArgs) (*result.Symbols, error) { - var err error - ctx, cancel := context.WithTimeout(ctx, 60*time.Second) - defer cancel() - - log15.Debug("Symbol search", "repo", args.Repo, "query", args.Query) - span, ctx := ot.StartSpanFromContext(ctx, "search") - span.SetTag("repo", args.Repo) - span.SetTag("commitID", args.CommitID) - span.SetTag("query", args.Query) - span.SetTag("first", args.First) - defer func() { - if err != nil { - ext.Error.Set(span, true) - span.LogFields(otlog.Error(err)) - } - span.Finish() - }() - - tr := nettrace.New("symbols.search", fmt.Sprintf("args:%+v", args)) - defer func() { - if err != nil { - tr.LazyPrintf("error: %v", err) - tr.SetError() - } - tr.Finish() - }() - - dbFile, err := s.getDBFile(ctx, args) - if err != nil { - return nil, err - } - db, err := sqlx.Open("sqlite3_with_regexp", dbFile) - if err != nil { - return nil, err - } - defer db.Close() - - result, err := filterSymbols(ctx, db, args) - if err != nil { - return nil, err - } - return &result, nil -} - -// getDBFile returns the path to the sqlite3 database for the repo@commit -// specified in `args`. If the database doesn't already exist in the disk cache, -// it will create a new one and write all the symbols into it. -func (s *Service) getDBFile(ctx context.Context, args protocol.SearchArgs) (string, error) { - diskcacheFile, err := s.cache.OpenWithPath(ctx, []string{string(args.Repo), fmt.Sprintf("%s-%d", args.CommitID, symbolsDBVersion)}, func(fetcherCtx context.Context, tempDBFile string) error { - newest, commit, err := findNewestFile(filepath.Join(s.cache.Dir, diskcache.EncodeKeyComponent(string(args.Repo)))) - if err != nil { - return err - } - - var changes *Changes - if commit != "" && s.GitDiff != nil { - var err error - changes, err = s.GitDiff(ctx, args.Repo, commit, args.CommitID) - if err != nil { - return err - } - - // Avoid sending more files than will fit in HTTP headers. - totalPathsLength := 0 - paths := []string{} - paths = append(paths, changes.Added...) - paths = append(paths, changes.Modified...) - paths = append(paths, changes.Deleted...) - for _, path := range paths { - totalPathsLength += len(path) - } - - if totalPathsLength > MAX_TOTAL_PATHS_LENGTH { - changes = nil - } - } - - if changes == nil { - // There are no existing SQLite DBs to reuse, or the diff is too big, so write a completely - // new one. - err := s.writeAllSymbolsToNewDB(fetcherCtx, tempDBFile, args.Repo, args.CommitID) - if err != nil { - if err == context.Canceled { - log15.Error("Unable to parse repository symbols within the context", "repo", args.Repo, "commit", args.CommitID, "query", args.Query) - } - return err - } - } else { - // Copy the existing DB to a new DB and update the new DB - err = copyFile(newest, tempDBFile) - if err != nil { - return err - } - - err = s.updateSymbols(fetcherCtx, tempDBFile, args.Repo, args.CommitID, *changes) - if err != nil { - if err == context.Canceled { - log15.Error("updateSymbols: unable to parse repository symbols within the context", "repo", args.Repo, "commit", args.CommitID, "query", args.Query) - } - return err - } - } - - return nil - }) - if err != nil { - return "", err - } - defer diskcacheFile.File.Close() - - return diskcacheFile.File.Name(), err -} - -// isLiteralEquality checks if the given regex matches literal strings exactly. -// Returns whether or not the regex is exact, along with the literal string if -// so. -func isLiteralEquality(expr string) (ok bool, lit string, err error) { - r, err := syntax.Parse(expr, syntax.Perl) - if err != nil { - return false, "", err - } - // Want a Concat of size 3 which is [Begin, Literal, End] - if r.Op != syntax.OpConcat || len(r.Sub) != 3 || // size 3 concat - !(r.Sub[0].Op == syntax.OpBeginLine || r.Sub[0].Op == syntax.OpBeginText) || // Starts with ^ - !(r.Sub[2].Op == syntax.OpEndLine || r.Sub[2].Op == syntax.OpEndText) || // Ends with $ - r.Sub[1].Op != syntax.OpLiteral { // is a literal - return false, "", nil - } - return true, string(r.Sub[1].Rune), nil -} - -func filterSymbols(ctx context.Context, db *sqlx.DB, args protocol.SearchArgs) (res []result.Symbol, err error) { - span, _ := ot.StartSpanFromContext(ctx, "filterSymbols") - defer func() { - if err != nil { - ext.Error.Set(span, true) - span.LogFields(otlog.Error(err)) - } - span.Finish() - }() - - const maxFirst = 500 - if args.First < 0 || args.First > maxFirst { - args.First = maxFirst - } - - makeCondition := func(column string, regex string) []*sqlf.Query { - conditions := []*sqlf.Query{} - - if regex == "" { - return conditions - } - - if isExact, symbolName, err := isLiteralEquality(regex); isExact && err == nil { - // It looks like the user is asking for exact matches, so use `=` to - // get the speed boost from the index on the column. - if args.IsCaseSensitive { - conditions = append(conditions, sqlf.Sprintf(column+" = %s", symbolName)) - } else { - conditions = append(conditions, sqlf.Sprintf(column+"lowercase = %s", strings.ToLower(symbolName))) - } - } else { - if !args.IsCaseSensitive { - regex = "(?i:" + regex + ")" - } - conditions = append(conditions, sqlf.Sprintf(column+" REGEXP %s", regex)) - } - - return conditions - } - - negateAll := func(oldConditions []*sqlf.Query) []*sqlf.Query { - newConditions := []*sqlf.Query{} - - for _, oldCondition := range oldConditions { - newConditions = append(newConditions, sqlf.Sprintf("NOT %s", oldCondition)) - } - - return newConditions - } - - var conditions []*sqlf.Query - conditions = append(conditions, makeCondition("name", args.Query)...) - for _, includePattern := range args.IncludePatterns { - conditions = append(conditions, makeCondition("path", includePattern)...) - } - conditions = append(conditions, negateAll(makeCondition("path", args.ExcludePattern))...) - - var sqlQuery *sqlf.Query - if len(conditions) == 0 { - sqlQuery = sqlf.Sprintf("SELECT * FROM symbols LIMIT %s", args.First) - } else { - sqlQuery = sqlf.Sprintf("SELECT * FROM symbols WHERE %s LIMIT %s", sqlf.Join(conditions, "AND"), args.First) - } - - var symbolsInDB []symbolInDB - err = db.Select(&symbolsInDB, sqlQuery.Query(sqlf.PostgresBindVar), sqlQuery.Args()...) - if err != nil { - return nil, err - } - - for _, symbolInDB := range symbolsInDB { - res = append(res, symbolInDBToSymbol(symbolInDB)) - } - - span.SetTag("hits", len(res)) - return res, nil -} - -// The version of the symbols database schema. This is included in the database -// filenames to prevent a newer version of the symbols service from attempting -// to read from a database created by an older (and likely incompatible) symbols -// service. Increment this when you change the database schema. -const symbolsDBVersion = 4 - -// symbolInDB is the same as `protocol.Symbol`, but with two additional columns: -// namelowercase and pathlowercase, which enable indexed case insensitive -// queries. -type symbolInDB struct { - Name string - NameLowercase string // derived from `Name` - Path string - PathLowercase string // derived from `Path` - Line int - Kind string - Language string - Parent string - ParentKind string - Signature string - Pattern string - - // Whether or not the symbol is local to the file. - FileLimited bool -} - -func symbolToSymbolInDB(symbol result.Symbol) symbolInDB { - return symbolInDB{ - Name: symbol.Name, - NameLowercase: strings.ToLower(symbol.Name), - Path: symbol.Path, - PathLowercase: strings.ToLower(symbol.Path), - Line: symbol.Line, - Kind: symbol.Kind, - Language: symbol.Language, - Parent: symbol.Parent, - ParentKind: symbol.ParentKind, - Signature: symbol.Signature, - Pattern: symbol.Pattern, - - FileLimited: symbol.FileLimited, - } -} - -func symbolInDBToSymbol(symbolInDB symbolInDB) result.Symbol { - return result.Symbol{ - Name: symbolInDB.Name, - Path: symbolInDB.Path, - Line: symbolInDB.Line, - Kind: symbolInDB.Kind, - Language: symbolInDB.Language, - Parent: symbolInDB.Parent, - ParentKind: symbolInDB.ParentKind, - Signature: symbolInDB.Signature, - Pattern: symbolInDB.Pattern, - - FileLimited: symbolInDB.FileLimited, - } -} - -// writeAllSymbolsToNewDB fetches the repo@commit from gitserver, parses all the -// symbols, and writes them to the blank database file `dbFile`. -func (s *Service) writeAllSymbolsToNewDB(ctx context.Context, dbFile string, repoName api.RepoName, commitID api.CommitID) (err error) { - db, err := sqlx.Open("sqlite3_with_regexp", dbFile) - if err != nil { - return err - } - defer db.Close() - - // Writing a bunch of rows into sqlite3 is much faster in a transaction. - tx, err := db.Beginx() - if err != nil { - return err - } - defer func() { - if err != nil { - _ = tx.Rollback() - return - } - err = tx.Commit() - }() - - _, err = tx.Exec( - `CREATE TABLE IF NOT EXISTS meta ( - id INTEGER PRIMARY KEY CHECK (id = 0), - revision TEXT NOT NULL - )`) - if err != nil { - return err - } - - _, err = tx.Exec( - `INSERT INTO meta (id, revision) VALUES (0, ?)`, - string(commitID)) - if err != nil { - return err - } - - // The column names are the lowercase version of fields in `symbolInDB` - // because sqlx lowercases struct fields by default. See - // http://jmoiron.github.io/sqlx/#query - _, err = tx.Exec( - `CREATE TABLE IF NOT EXISTS symbols ( - name VARCHAR(256) NOT NULL, - namelowercase VARCHAR(256) NOT NULL, - path VARCHAR(4096) NOT NULL, - pathlowercase VARCHAR(4096) NOT NULL, - line INT NOT NULL, - kind VARCHAR(255) NOT NULL, - language VARCHAR(255) NOT NULL, - parent VARCHAR(255) NOT NULL, - parentkind VARCHAR(255) NOT NULL, - signature VARCHAR(255) NOT NULL, - pattern VARCHAR(255) NOT NULL, - filelimited BOOLEAN NOT NULL - )`) - if err != nil { - return err - } - - _, err = tx.Exec(`CREATE INDEX name_index ON symbols(name);`) - if err != nil { - return err - } - - _, err = tx.Exec(`CREATE INDEX path_index ON symbols(path);`) - if err != nil { - return err - } - - // `*lowercase_index` enables indexed case insensitive queries. - _, err = tx.Exec(`CREATE INDEX namelowercase_index ON symbols(namelowercase);`) - if err != nil { - return err - } - - _, err = tx.Exec(`CREATE INDEX pathlowercase_index ON symbols(pathlowercase);`) - if err != nil { - return err - } - - insertStatement, err := tx.PrepareNamed(insertQuery) - if err != nil { - return err - } - - return s.parseUncached(ctx, repoName, commitID, []string{}, func(symbol result.Symbol) error { - symbolInDBValue := symbolToSymbolInDB(symbol) - _, err := insertStatement.Exec(&symbolInDBValue) - return err - }) -} - -// updateSymbols adds/removes rows from the DB based on a `git diff` between the meta.revision within the -// DB and the given commitID. -func (s *Service) updateSymbols(ctx context.Context, dbFile string, repoName api.RepoName, commitID api.CommitID, changes Changes) (err error) { - db, err := sqlx.Open("sqlite3_with_regexp", dbFile) - if err != nil { - return err - } - defer db.Close() - - // Writing a bunch of rows into sqlite3 is much faster in a transaction. - tx, err := db.Beginx() - if err != nil { - return err - } - defer func() { - if err != nil { - _ = tx.Rollback() - return - } - err = tx.Commit() - }() - - // Write new commit - _, err = tx.Exec(`UPDATE meta SET revision = ?`, string(commitID)) - if err != nil { - return err - } - - deleteStatement, err := tx.Prepare("DELETE FROM symbols WHERE path = ?") - if err != nil { - return err - } - - insertStatement, err := tx.PrepareNamed(insertQuery) - if err != nil { - return err - } - - paths := []string{} - paths = append(paths, changes.Added...) - paths = append(paths, changes.Modified...) - paths = append(paths, changes.Deleted...) - - for _, path := range paths { - _, err := deleteStatement.Exec(path) - if err != nil { - return err - } - } - - return s.parseUncached(ctx, repoName, commitID, append(changes.Added, changes.Modified...), func(symbol result.Symbol) error { - symbolInDBValue := symbolToSymbolInDB(symbol) - _, err := insertStatement.Exec(&symbolInDBValue) - return err - }) -} - -const insertQuery = ` - INSERT INTO symbols ( name, namelowercase, path, pathlowercase, line, kind, language, parent, parentkind, signature, pattern, filelimited) - VALUES (:name, :namelowercase, :path, :pathlowercase, :line, :kind, :language, :parent, :parentkind, :signature, :pattern, :filelimited)` - -// SanityCheck makes sure that go-sqlite3 was compiled with cgo by -// seeing if we can actually create a table. -func SanityCheck() error { - db, err := sqlx.Open("sqlite3_with_regexp", ":memory:") - if err != nil { - return err - } - defer db.Close() - _, err = db.Exec("CREATE TABLE test (col TEXT);") - if err != nil { - // If go-sqlite3 was not compiled with cgo, the error will be: - // - // > Binary was compiled with 'CGO_ENABLED=0', go-sqlite3 requires cgo to work. This is a stub - return err - } - - return nil -} - -// findNewestFile lists the directory and returns the newest file's path (prepended with dir) and the -// commit. -func findNewestFile(dir string) (string, api.CommitID, error) { - files, err := os.ReadDir(dir) - if err != nil { - return "", "", nil - } - - var mostRecentTime time.Time - newest := "" - for _, fi := range files { - if fi.Type().IsRegular() { - if !strings.HasSuffix(fi.Name(), ".zip") { - continue - } - - info, err := fi.Info() - if err != nil { - return "", "", err - } - - if newest == "" || info.ModTime().After(mostRecentTime) { - mostRecentTime = info.ModTime() - newest = filepath.Join(dir, fi.Name()) - } - } - } - - if newest == "" { - return "", "", nil - } - - db, err := sqlx.Open("sqlite3_with_regexp", newest) - if err != nil { - return "", "", err - } - defer db.Close() - - // Read old commit - row := db.QueryRow(`SELECT revision FROM meta`) - commit := api.CommitID("") - if err = row.Scan(&commit); err != nil { - return "", "", err - } - - return newest, commit, nil -} - -// copyFile is like the cp command. -func copyFile(from string, to string) error { - fromFile, err := os.Open(from) - if err != nil { - return err - } - defer fromFile.Close() - - toFile, err := os.OpenFile(to, os.O_RDWR|os.O_CREATE, 0666) - if err != nil { - return err - } - defer toFile.Close() - - _, err = io.Copy(toFile, fromFile) - return err -} - -// Changes are added and deleted paths. -type Changes struct { - Added []string - Modified []string - Deleted []string -} - -func NewChanges() Changes { - return Changes{ - Added: []string{}, - Modified: []string{}, - Deleted: []string{}, - } -} - -// The maximum sum of bytes in paths in a diff when doing incremental indexing. Diffs bigger than this -// will not be incrementally indexed, and instead we will process all symbols. Without this limit, we -// could hit HTTP 431 (header fields too large) when sending the list of paths `git archive paths...`. -// The actual limit is somewhere between 372KB and 450KB, and we want to be well under that. 100KB seems -// safe. -const MAX_TOTAL_PATHS_LENGTH = 100000 diff --git a/cmd/symbols/internal/symbols/search_test.go b/cmd/symbols/internal/symbols/search_test.go deleted file mode 100644 index 336bdcee3ba..00000000000 --- a/cmd/symbols/internal/symbols/search_test.go +++ /dev/null @@ -1,82 +0,0 @@ -package symbols - -import ( - "context" - "fmt" - "os" - "path" - "testing" - - "github.com/inconshreveable/log15" - - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/protocol" - "github.com/sourcegraph/sourcegraph/internal/testutil" -) - -func BenchmarkSearch(b *testing.B) { - log15.Root().SetHandler(log15.LvlFilterHandler(log15.LvlError, log15.Root().GetHandler())) - - service := Service{ - FetchTar: testutil.FetchTarFromGithubWithPaths, - NewParser: NewParser, - Path: "/tmp/symbols-cache", - } - if err := service.Start(); err != nil { - b.Fatal(err) - } - - ctx := context.Background() - b.ResetTimer() - - indexTests := []protocol.SearchArgs{ - {Repo: "github.com/sourcegraph/go-langserver", CommitID: "391a062a7d9977510e7e883e412769b07fed8b5e"}, - {Repo: "github.com/moby/moby", CommitID: "6e5c2d639f67ae70f54d9f2285f3261440b074aa"}, - } - - queryTests := []protocol.SearchArgs{ - {Repo: "github.com/sourcegraph/go-langserver", CommitID: "391a062a7d9977510e7e883e412769b07fed8b5e", Query: "^sortedImportRecord$", First: 10}, - {Repo: "github.com/sourcegraph/go-langserver", CommitID: "391a062a7d9977510e7e883e412769b07fed8b5e", Query: "1234doesnotexist1234", First: 1}, - {Repo: "github.com/moby/moby", CommitID: "6e5c2d639f67ae70f54d9f2285f3261440b074aa", Query: "^fsCache$", First: 10}, - {Repo: "github.com/moby/moby", CommitID: "6e5c2d639f67ae70f54d9f2285f3261440b074aa", Query: "1234doesnotexist1234", First: 1}, - } - - runIndexTest := func(test protocol.SearchArgs) { - b.Run(fmt.Sprintf("indexing %s@%s", path.Base(string(test.Repo)), test.CommitID[:3]), func(b *testing.B) { - for n := 0; n < b.N; n++ { - tempFile, err := os.CreateTemp("", "") - if err != nil { - b.Fatal(err) - } - defer os.Remove(tempFile.Name()) - err = service.writeAllSymbolsToNewDB(ctx, tempFile.Name(), test.Repo, test.CommitID) - if err != nil { - b.Fatal(err) - } - } - }) - } - - runQueryTest := func(test protocol.SearchArgs) { - b.Run(fmt.Sprintf("searching %s@%s %s", path.Base(string(test.Repo)), test.CommitID[:3], test.Query), func(b *testing.B) { - _, err := service.search(ctx, test) - if err != nil { - b.Fatal(err) - } - b.ResetTimer() - for n := 0; n < b.N; n++ { - _, err := service.search(ctx, test) - if err != nil { - b.Fatal(err) - } - } - }) - } - - for _, test := range indexTests { - runIndexTest(test) - } - - for _, test := range queryTests { - runQueryTest(test) - } -} diff --git a/cmd/symbols/internal/symbols/service.go b/cmd/symbols/internal/symbols/service.go deleted file mode 100644 index a05d2e99709..00000000000 --- a/cmd/symbols/internal/symbols/service.go +++ /dev/null @@ -1,131 +0,0 @@ -// Package symbols implements the symbol search service. -package symbols - -import ( - "context" - "io" - "log" - "net/http" - "time" - - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - - "github.com/sourcegraph/go-ctags" - - "github.com/sourcegraph/sourcegraph/internal/api" - "github.com/sourcegraph/sourcegraph/internal/diskcache" -) - -// Service is the symbols service. -type Service struct { - // FetchTar returns an io.ReadCloser to a tar archive of a repository at the specified Git - // remote URL and commit ID. If the error implements "BadRequest() bool", it will be used to - // determine if the error is a bad request (eg invalid repo). - FetchTar func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) - - // GitDiff returns the paths that have changed between two commits. - GitDiff func(context.Context, api.RepoName, api.CommitID, api.CommitID) (*Changes, error) - - // MaxConcurrentFetchTar is the maximum number of concurrent calls allowed - // to FetchTar. It defaults to 15. - MaxConcurrentFetchTar int - - NewParser func() (ctags.Parser, error) - - // NumParserProcesses is the maximum number of ctags parser child processes to run. - NumParserProcesses int - - // Path is the directory in which to store the cache. - Path string - - // MaxCacheSizeBytes is the maximum size of the cache in bytes. Note: - // We can temporarily be larger than MaxCacheSizeBytes. When we go - // over MaxCacheSizeBytes we trigger delete files until we get below - // MaxCacheSizeBytes. - MaxCacheSizeBytes int64 - - // cache is the disk backed cache. - cache *diskcache.Store - - // fetchSem is a semaphore to limit concurrent calls to FetchTar. The - // semaphore size is controlled by MaxConcurrentFetchTar - fetchSem chan int - - // pool of ctags parser child processes - parsers chan ctags.Parser -} - -// Start must be called before any requests are handled. -func (s *Service) Start() error { - if err := s.startParsers(); err != nil { - return err - } - - if s.MaxConcurrentFetchTar == 0 { - s.MaxConcurrentFetchTar = 15 - } - s.fetchSem = make(chan int, s.MaxConcurrentFetchTar) - - s.cache = &diskcache.Store{ - Dir: s.Path, - Component: "symbols", - BackgroundTimeout: 20 * time.Minute, - } - go s.watchAndEvict() - - return nil -} - -// Handler returns the http.Handler that should be used to serve requests. -func (s *Service) Handler() http.Handler { - if s.parsers == nil { - panic("must call StartParserPool first") - } - - mux := http.NewServeMux() - - mux.HandleFunc("/search", s.handleSearch) - mux.HandleFunc("/healthz", s.handleHealthCheck) - - return mux -} - -func (s *Service) handleHealthCheck(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(http.StatusOK) - - _, err := w.Write([]byte("Ok")) - if err != nil { - log.Printf("failed to write response to health check, err: %s", err) - } -} - -// watchAndEvict is a loop which periodically checks the size of the cache and -// evicts/deletes items if the store gets too large. -func (s *Service) watchAndEvict() { - if s.MaxCacheSizeBytes == 0 { - return - } - - for { - time.Sleep(10 * time.Second) - stats, err := s.cache.Evict(s.MaxCacheSizeBytes) - if err != nil { - log.Printf("failed to Evict: %s", err) - continue - } - cacheSizeBytes.Set(float64(stats.CacheSize)) - evictions.Add(float64(stats.Evicted)) - } -} - -var ( - cacheSizeBytes = promauto.NewGauge(prometheus.GaugeOpts{ - Name: "symbols_store_cache_size_bytes", - Help: "The total size of items in the on disk cache.", - }) - evictions = promauto.NewCounter(prometheus.CounterOpts{ - Name: "symbols_store_evictions", - Help: "The total number of items evicted from the cache.", - }) -) diff --git a/cmd/symbols/internal/symbols/service_test.go b/cmd/symbols/internal/symbols/service_test.go deleted file mode 100644 index e2db68accb3..00000000000 --- a/cmd/symbols/internal/symbols/service_test.go +++ /dev/null @@ -1,184 +0,0 @@ -package symbols - -import ( - "archive/tar" - "bytes" - "context" - "io" - "net/http/httptest" - "os" - "reflect" - "testing" - - "github.com/sourcegraph/go-ctags" - - "github.com/sourcegraph/sourcegraph/internal/api" - "github.com/sourcegraph/sourcegraph/internal/httpcli" - "github.com/sourcegraph/sourcegraph/internal/search" - "github.com/sourcegraph/sourcegraph/internal/search/result" - symbolsclient "github.com/sourcegraph/sourcegraph/internal/symbols" -) - -func TestIsLiteralEquality(t *testing.T) { - type TestCase struct { - Regex string - WantOk bool - WantLiteral string - } - - for _, test := range []TestCase{ - {Regex: `^foo$`, WantLiteral: "foo", WantOk: true}, - {Regex: `^[f]oo$`, WantLiteral: `foo`, WantOk: true}, - {Regex: `^\\$`, WantLiteral: `\`, WantOk: true}, - {Regex: `^\$`, WantOk: false}, - {Regex: `^\($`, WantLiteral: `(`, WantOk: true}, - {Regex: `\\`, WantOk: false}, - {Regex: `\$`, WantOk: false}, - {Regex: `\(`, WantOk: false}, - {Regex: `foo$`, WantOk: false}, - {Regex: `(^foo$|^bar$)`, WantOk: false}, - } { - gotOk, gotLiteral, err := isLiteralEquality(test.Regex) - if err != nil { - t.Fatal(err) - } - if gotOk != test.WantOk { - t.Errorf("isLiteralEquality(%s) returned %t, wanted %t", test.Regex, gotOk, test.WantOk) - } - if gotLiteral != test.WantLiteral { - t.Errorf( - "isLiteralEquality(%s) returned the literal %s, wanted %s", - test.Regex, - gotLiteral, - test.WantLiteral, - ) - } - } -} - -func TestService(t *testing.T) { - tmpDir, err := os.MkdirTemp("", "") - if err != nil { - t.Fatal(err) - } - defer func() { os.RemoveAll(tmpDir) }() - - files := map[string]string{"a.js": "var x = 1"} - service := Service{ - FetchTar: func(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (io.ReadCloser, error) { - return createTar(files) - }, - NewParser: func() (ctags.Parser, error) { - return mockParser{"x", "y"}, nil - }, - Path: tmpDir, - } - - if err := service.Start(); err != nil { - t.Fatal(err) - } - server := httptest.NewServer(service.Handler()) - defer server.Close() - client := symbolsclient.Client{ - URL: server.URL, - HTTPClient: httpcli.InternalDoer, - } - x := result.Symbol{Name: "x", Path: "a.js"} - y := result.Symbol{Name: "y", Path: "a.js"} - - tests := map[string]struct { - args search.SymbolsParameters - want result.Symbols - }{ - "simple": { - args: search.SymbolsParameters{First: 10}, - want: []result.Symbol{x, y}, - }, - "onematch": { - args: search.SymbolsParameters{Query: "x", First: 10}, - want: []result.Symbol{x}, - }, - "nomatches": { - args: search.SymbolsParameters{Query: "foo", First: 10}, - want: nil, - }, - "caseinsensitiveexactmatch": { - args: search.SymbolsParameters{Query: "^X$", First: 10}, - want: []result.Symbol{x}, - }, - "casesensitiveexactmatch": { - args: search.SymbolsParameters{Query: "^x$", IsCaseSensitive: true, First: 10}, - want: []result.Symbol{x}, - }, - "casesensitivenoexactmatch": { - args: search.SymbolsParameters{Query: "^X$", IsCaseSensitive: true, First: 10}, - want: nil, - }, - "caseinsensitiveexactpathmatch": { - args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, First: 10}, - want: []result.Symbol{x, y}, - }, - "casesensitiveexactpathmatch": { - args: search.SymbolsParameters{IncludePatterns: []string{"^a.js$"}, IsCaseSensitive: true, First: 10}, - want: []result.Symbol{x, y}, - }, - "casesensitivenoexactpathmatch": { - args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, IsCaseSensitive: true, First: 10}, - want: nil, - }, - "exclude": { - args: search.SymbolsParameters{ExcludePattern: "a.js", IsCaseSensitive: true, First: 10}, - want: nil, - }, - } - for label, test := range tests { - t.Run(label, func(t *testing.T) { - result, err := client.Search(context.Background(), test.args) - if err != nil { - t.Fatal(err) - } - if result != nil && !reflect.DeepEqual(*result, test.want) { - t.Errorf("got %+v, want %+v", *result, test.want) - } - if result == nil && test.want != nil { - t.Errorf("got nil, want %+v", test.want) - } - }) - } -} - -func createTar(files map[string]string) (io.ReadCloser, error) { - buf := new(bytes.Buffer) - w := tar.NewWriter(buf) - for name, body := range files { - hdr := &tar.Header{ - Name: name, - Mode: 0o600, - Size: int64(len(body)), - } - if err := w.WriteHeader(hdr); err != nil { - return nil, err - } - if _, err := w.Write([]byte(body)); err != nil { - return nil, err - } - } - - err := w.Close() - if err != nil { - return nil, err - } - return io.NopCloser(bytes.NewReader(buf.Bytes())), nil -} - -type mockParser []string - -func (m mockParser) Parse(name string, content []byte) ([]*ctags.Entry, error) { - entries := make([]*ctags.Entry, len(m)) - for i, name := range m { - entries[i] = &ctags.Entry{Name: name, Path: "a.js"} - } - return entries, nil -} - -func (mockParser) Close() {} diff --git a/cmd/symbols/internal/protocol/symbols.go b/cmd/symbols/internal/types/search_args.go similarity index 98% rename from cmd/symbols/internal/protocol/symbols.go rename to cmd/symbols/internal/types/search_args.go index 3ca1c0acf2e..553f9795eb2 100644 --- a/cmd/symbols/internal/protocol/symbols.go +++ b/cmd/symbols/internal/types/search_args.go @@ -1,4 +1,4 @@ -package protocol +package types import ( "github.com/sourcegraph/sourcegraph/internal/api" diff --git a/cmd/symbols/main.go b/cmd/symbols/main.go index fa53a3f1940..34c3c5a892c 100644 --- a/cmd/symbols/main.go +++ b/cmd/symbols/main.go @@ -3,48 +3,65 @@ package main import ( - "bytes" "context" "fmt" - "io" "log" - "net" "net/http" "os" - "os/signal" - "runtime" - "strconv" "time" "github.com/inconshreveable/log15" + "github.com/opentracing/opentracing-go" + "github.com/prometheus/client_golang/prometheus" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/symbols" - "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database" + sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" "github.com/sourcegraph/sourcegraph/internal/conf" "github.com/sourcegraph/sourcegraph/internal/debugserver" + "github.com/sourcegraph/sourcegraph/internal/diskcache" "github.com/sourcegraph/sourcegraph/internal/env" - "github.com/sourcegraph/sourcegraph/internal/gitserver" + "github.com/sourcegraph/sourcegraph/internal/goroutine" + "github.com/sourcegraph/sourcegraph/internal/httpserver" "github.com/sourcegraph/sourcegraph/internal/logging" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/profiler" "github.com/sourcegraph/sourcegraph/internal/sentry" "github.com/sourcegraph/sourcegraph/internal/trace" "github.com/sourcegraph/sourcegraph/internal/trace/ot" "github.com/sourcegraph/sourcegraph/internal/tracer" - "github.com/sourcegraph/sourcegraph/internal/vcs/git" ) -const port = "3184" +const addr = ":3184" func main() { - var ( - cacheDir = env.Get("CACHE_DIR", "/tmp/symbols-cache", "directory to store cached symbols") - cacheSizeMB = env.Get("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache in megabytes") - ctagsProcesses = env.Get("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of ctags child processes to run") - sanityCheck = env.Get("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not") - ) + config.Load() - if sanityCheck == "true" { + // Set up Google Cloud Profiler when running in Cloud + if err := profiler.Init(); err != nil { + log.Fatalf("Failed to start profiler: %v", err) + } + + env.Lock() + env.HandleHelpFlag() + conf.Init() + logging.Init() + tracer.Init(conf.DefaultClient()) + sentry.Init(conf.DefaultClient()) + trace.Init() + + if err := config.Validate(); err != nil { + log.Fatalf("Failed to load configuration: %s", err) + } + + if config.sanityCheck { fmt.Print("Running sanity check...") - if err := symbols.SanityCheck(); err != nil { + if err := sqlite.SanityCheck(); err != nil { fmt.Println("failed ❌", err) os.Exit(1) } @@ -53,112 +70,54 @@ func main() { os.Exit(0) } - env.Lock() - env.HandleHelpFlag() - log.SetFlags(0) - conf.Init() - logging.Init() - tracer.Init(conf.DefaultClient()) - sentry.Init(conf.DefaultClient()) - trace.Init() + // Initialize tracing/metrics + observationContext := &observation.Context{ + Logger: log15.Root(), + Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()}, + Registerer: prometheus.DefaultRegisterer, + } - // Ready immediately + // Start debug server ready := make(chan struct{}) - close(ready) go debugserver.NewServerRoutine(ready).Start() - service := symbols.Service{ - FetchTar: func(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (io.ReadCloser, error) { - return gitserver.DefaultClient.Archive(ctx, repo, gitserver.ArchiveOptions{Treeish: string(commit), Format: "tar", Paths: paths}) - }, - GitDiff: func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) (*symbols.Changes, error) { - output, err := git.DiffSymbols(ctx, repo, commitA, commitB) - if err != nil { - return nil, err - } + ctagsParserFactory := parser.NewCtagsParserFactory( + config.ctagsCommand, + config.ctagsPatternLengthLimit, + config.ctagsLogErrors, + config.ctagsDebugLogs, + ) - // The output is a repeated sequence of: - // - // NUL NUL - // - // where NUL is the 0 byte. - // - // Example: - // - // M NUL cmd/symbols/internal/symbols/fetch.go NUL - - changes := symbols.NewChanges() - slices := bytes.Split(output, []byte{0}) - for i := 0; i < len(slices)-1; i += 2 { - statusIdx := i - fileIdx := i + 1 - - if len(slices[statusIdx]) == 0 { - return nil, fmt.Errorf("unrecognized git diff output (from repo %q, commitA %q, commitB %q): status was empty at index %d", repo, commitA, commitB, i) - } - - status := slices[statusIdx][0] - path := string(slices[fileIdx]) - - switch status { - case 'A': - changes.Added = append(changes.Added, path) - case 'M': - changes.Modified = append(changes.Modified, path) - case 'D': - changes.Deleted = append(changes.Deleted, path) - } - } - - return &changes, nil - }, - NewParser: symbols.NewParser, - Path: cacheDir, + cache := &diskcache.Store{ + Dir: config.cacheDir, + Component: "symbols", + BackgroundTimeout: 20 * time.Minute, } - if mb, err := strconv.ParseInt(cacheSizeMB, 10, 64); err != nil { - log.Fatalf("Invalid SYMBOLS_CACHE_SIZE_MB: %s", err) - } else { - service.MaxCacheSizeBytes = mb * 1000 * 1000 - } - var err error - service.NumParserProcesses, err = strconv.Atoi(ctagsProcesses) + + parserPool, err := parser.NewParserPool(ctagsParserFactory, config.numCtagsProcesses) if err != nil { - log.Fatalf("Invalid CTAGS_PROCESSES: %s", err) - } - if err := service.Start(); err != nil { - log.Fatalln("Start:", err) + log.Fatalf("Failed to parser pool: %s", err) } - handler := ot.Middleware(trace.HTTPTraceMiddleware(service.Handler(), conf.DefaultClient())) + database.Init() + gitserverClient := gitserver.NewClient(observationContext) + repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, 15, observationContext) + parser := parser.NewParser(parserPool, repositoryFetcher, observationContext) + databaseWriter := writer.NewDatabaseWriter(config.cacheDir, gitserverClient, parser) + cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache) + apiHandler := api.NewHandler(cachedDatabaseWriter, observationContext) - host := "" - if env.InsecureDev { - host = "127.0.0.1" - } - addr := net.JoinHostPort(host, port) - server := &http.Server{ + server := httpserver.NewFromAddr(addr, &http.Server{ ReadTimeout: 75 * time.Second, WriteTimeout: 10 * time.Minute, - Addr: addr, - Handler: handler, - } - go shutdownOnSIGINT(server) + Handler: ot.Middleware(trace.HTTPTraceMiddleware(apiHandler, conf.DefaultClient())), + }) - log15.Info("symbols: listening", "addr", addr) - err = server.ListenAndServe() - if err != http.ErrServerClosed { - log.Fatal(err) - } -} + evictionInterval := time.Second * 10 + cacheSizeBytes := int64(config.cacheSizeMB) * 1000 * 1000 + cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext)) -func shutdownOnSIGINT(s *http.Server) { - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt) - <-c - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - err := s.Shutdown(ctx) - if err != nil { - log.Fatal("graceful server shutdown failed, will exit:", err) - } + // Mark health server as ready and go! + close(ready) + goroutine.MonitorBackgroundRoutines(context.Background(), server, cacheEvicter) } diff --git a/dev/check/go-dbconn-import.sh b/dev/check/go-dbconn-import.sh index a4788cfa98e..f4a949a65e1 100755 --- a/dev/check/go-dbconn-import.sh +++ b/dev/check/go-dbconn-import.sh @@ -19,6 +19,8 @@ allowed_prefix=( github.com/sourcegraph/sourcegraph/enterprise/cmd/worker github.com/sourcegraph/sourcegraph/enterprise/cmd/repo-updater github.com/sourcegraph/sourcegraph/enterprise/cmd/precise-code-intel- + # Doesn't connect but uses db internals for use with sqlite + github.com/sourcegraph/sourcegraph/cmd/symbols ) # Create regex ^(a|b|c) diff --git a/doc/admin/observability/alert_solutions.md b/doc/admin/observability/alert_solutions.md index c8263efb92f..5f8dd7b7d4e 100644 --- a/doc/admin/observability/alert_solutions.md +++ b/doc/admin/observability/alert_solutions.md @@ -4145,52 +4145,6 @@ with your code hosts connections or networking issues affecting communication wi
-## symbols: store_fetch_failures - -

store fetch failures every 5m

- -**Descriptions** - -- warning symbols: 5+ store fetch failures every 5m - -**Possible solutions** - -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-store-fetch-failures). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_symbols_store_fetch_failures" -] -``` - -*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* - -
- -## symbols: current_fetch_queue_size - -

current fetch queue size

- -**Descriptions** - -- warning symbols: 25+ current fetch queue size - -**Possible solutions** - -- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-current-fetch-queue-size). -- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert: - -```json -"observability.silenceAlerts": [ - "warning_symbols_current_fetch_queue_size" -] -``` - -*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* - -
- ## symbols: frontend_internal_api_error_responses

frontend-internal API error responses every 5m by route

diff --git a/doc/admin/observability/dashboards.md b/doc/admin/observability/dashboards.md index b9f4bcd85ca..5d77917f9e7 100644 --- a/doc/admin/observability/dashboards.md +++ b/doc/admin/observability/dashboards.md @@ -10689,11 +10689,13 @@ Query: `sum by(app) (up{app=~".*searcher"}) / count by (app) (up{app=~".*searche To see this dashboard, visit `/-/debug/grafana/d/symbols/symbols` on your Sourcegraph instance. -#### symbols: store_fetch_failures +### Symbols: Codeintel: Symbols API -

Store fetch failures every 5m

+#### symbols: codeintel_symbols_api_total -Refer to the [alert solutions reference](./alert_solutions.md#symbols-store-fetch-failures) for 1 alert related to this panel. +

Aggregate API operations every 5m

+ +This panel has no related alerts. To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100000` on your Sourcegraph instance. @@ -10702,17 +10704,17 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100000` o
Technical details -Query: `sum(increase(symbols_store_fetch_failed[5m]))` +Query: `sum(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m]))`

-#### symbols: current_fetch_queue_size +#### symbols: codeintel_symbols_api_99th_percentile_duration -

Current fetch queue size

+

Aggregate successful API operation duration distribution over 5m

-Refer to the [alert solutions reference](./alert_solutions.md#symbols-current-fetch-queue-size) for 1 alert related to this panel. +This panel has no related alerts. To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100001` on your Sourcegraph instance. @@ -10721,7 +10723,762 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100001` o
Technical details -Query: `sum(symbols_store_fetch_queue_size)` +Query: `sum by (le)(rate(src_codeintel_symbols_api_duration_seconds_bucket{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_api_errors_total + +

Aggregate API operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100002` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_api_error_rate + +

Aggregate API operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100003` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))) * 100` + +
+ +
+ +#### symbols: codeintel_symbols_api_total + +

API operations every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100010` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_api_99th_percentile_duration + +

99th percentile successful API operation duration over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100011` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_api_duration_seconds_bucket{job=~"^symbols.*"}[5m])))` + +
+ +
+ +#### symbols: codeintel_symbols_api_errors_total + +

API operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100012` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_api_error_rate + +

API operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100013` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))) * 100` + +
+ +
+ +### Symbols: Codeintel: Symbols parser + +#### symbols: symbols + +

In-flight parse jobs

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100100` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `max(src_codeintel_symbols_parsing{job=~"^symbols.*"})` + +
+ +
+ +#### symbols: symbols + +

Parser queue size

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100101` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `max(src_codeintel_symbols_parse_queue_size{job=~"^symbols.*"})` + +
+ +
+ +#### symbols: symbols + +

Parse queue timeouts

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100102` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `max(src_codeintel_symbols_parse_queue_timeouts_total{job=~"^symbols.*"})` + +
+ +
+ +#### symbols: symbols + +

Parse failures every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100103` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `rate(src_codeintel_symbols_parse_failed_total{job=~"^symbols.*"}[5m])` + +
+ +
+ +#### symbols: codeintel_symbols_parser_total + +

Aggregate parser operations every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100110` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_parser_99th_percentile_duration + +

Aggregate successful parser operation duration distribution over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100111` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (le)(rate(src_codeintel_symbols_parser_duration_seconds_bucket{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_parser_errors_total + +

Aggregate parser operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100112` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_parser_error_rate + +

Aggregate parser operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100113` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))) * 100` + +
+ +
+ +#### symbols: codeintel_symbols_parser_total + +

Parser operations every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100120` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_parser_99th_percentile_duration + +

99th percentile successful parser operation duration over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100121` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_parser_duration_seconds_bucket{job=~"^symbols.*"}[5m])))` + +
+ +
+ +#### symbols: codeintel_symbols_parser_errors_total + +

Parser operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100122` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_parser_error_rate + +

Parser operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100123` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))) * 100` + +
+ +
+ +### Symbols: Codeintel: Symbols cache janitor + +#### symbols: symbols + +

Size in bytes of the on-disk cache

+ +no + +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100200` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `src_codeintel_symbols_store_cache_size_bytes` + +
+ +
+ +#### symbols: symbols + +

Cache eviction operations every 5m

+ +no + +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100201` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `rate(src_codeintel_symbols_store_evictions_total[5m])` + +
+ +
+ +#### symbols: symbols + +

Cache eviction operation errors every 5m

+ +no + +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100202` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `rate(src_codeintel_symbols_store_errors_total[5m])` + +
+ +
+ +### Symbols: Codeintel: Symbols repository fetcher + +#### symbols: symbols + +

In-flight repository fetch operations

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100300` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `src_codeintel_symbols_fetching` + +
+ +
+ +#### symbols: symbols + +

Repository fetch queue size

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100301` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `max(src_codeintel_symbols_fetch_queue_size{job=~"^symbols.*"})` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_total + +

Aggregate fetcher operations every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100310` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_99th_percentile_duration + +

Aggregate successful fetcher operation duration distribution over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100311` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (le)(rate(src_codeintel_symbols_repository_fetcher_duration_seconds_bucket{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_errors_total + +

Aggregate fetcher operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100312` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_error_rate + +

Aggregate fetcher operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100313` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))) * 100` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_total + +

Fetcher operations every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100320` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_99th_percentile_duration + +

99th percentile successful fetcher operation duration over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100321` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_repository_fetcher_duration_seconds_bucket{job=~"^symbols.*"}[5m])))` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_errors_total + +

Fetcher operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100322` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_repository_fetcher_error_rate + +

Fetcher operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100323` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))) * 100` + +
+ +
+ +### Symbols: Codeintel: Symbols gitserver client + +#### symbols: codeintel_symbols_gitserver_total + +

Aggregate gitserver client operations every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100400` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_gitserver_99th_percentile_duration + +

Aggregate successful gitserver client operation duration distribution over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100401` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (le)(rate(src_codeintel_symbols_gitserver_duration_seconds_bucket{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_gitserver_errors_total + +

Aggregate gitserver client operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100402` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_gitserver_error_rate + +

Aggregate gitserver client operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100403` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))) * 100` + +
+ +
+ +#### symbols: codeintel_symbols_gitserver_total + +

Gitserver client operations every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100410` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_gitserver_99th_percentile_duration + +

99th percentile successful gitserver client operation duration over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100411` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_gitserver_duration_seconds_bucket{job=~"^symbols.*"}[5m])))` + +
+ +
+ +#### symbols: codeintel_symbols_gitserver_errors_total + +

Gitserver client operation errors every 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100412` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))` + +
+ +
+ +#### symbols: codeintel_symbols_gitserver_error_rate + +

Gitserver client operation error rate over 5m

+ +This panel has no related alerts. + +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100413` on your Sourcegraph instance. + +*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* + +
+Technical details + +Query: `sum by (op)(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))) * 100`
@@ -10735,7 +11492,7 @@ Query: `sum(symbols_store_fetch_queue_size)` Refer to the [alert solutions reference](./alert_solutions.md#symbols-frontend-internal-api-error-responses) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100100` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100500` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10766,7 +11523,7 @@ value change independent of deployment events (such as an upgrade), it could ind This panel has no related alerts. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100200` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100600` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10785,7 +11542,7 @@ Query: `count by(name) ((time() - container_last_seen{name=~"^symbols.*"}) > 60) Refer to the [alert solutions reference](./alert_solutions.md#symbols-container-cpu-usage) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100201` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100601` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10804,7 +11561,7 @@ Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^symbols.*"}` Refer to the [alert solutions reference](./alert_solutions.md#symbols-container-memory-usage) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100202` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100602` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10826,7 +11583,7 @@ When extremely high, this can indicate a resource usage problem, or can cause pr This panel has no related alerts. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100203` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100603` on your Sourcegraph instance. *Managed by the [Sourcegraph Core application team](https://handbook.sourcegraph.com/engineering/core-application).* @@ -10847,7 +11604,7 @@ Query: `sum by(name) (rate(container_fs_reads_total{name=~"^symbols.*"}[1h]) + r Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100300` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100700` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10866,7 +11623,7 @@ Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{na Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-long-term) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100301` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100701` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10885,7 +11642,7 @@ Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^s Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100310` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100710` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10904,7 +11661,7 @@ Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^symb Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-short-term) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100311` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100711` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10927,7 +11684,7 @@ A high value here indicates a possible goroutine leak. Refer to the [alert solutions reference](./alert_solutions.md#symbols-go-goroutines) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100400` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100800` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10946,7 +11703,7 @@ Query: `max by(instance) (go_goroutines{job=~".*symbols"})` Refer to the [alert solutions reference](./alert_solutions.md#symbols-go-gc-duration-seconds) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100401` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100801` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* @@ -10967,7 +11724,7 @@ Query: `max by(instance) (go_gc_duration_seconds{job=~".*symbols"})` Refer to the [alert solutions reference](./alert_solutions.md#symbols-pods-available-percentage) for 1 alert related to this panel. -To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100500` on your Sourcegraph instance. +To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100900` on your Sourcegraph instance. *Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).* diff --git a/enterprise/internal/batches/store/batch_spec_workspaces.go b/enterprise/internal/batches/store/batch_spec_workspaces.go index 3993c6df773..e808d068688 100644 --- a/enterprise/internal/batches/store/batch_spec_workspaces.go +++ b/enterprise/internal/batches/store/batch_spec_workspaces.go @@ -134,6 +134,7 @@ func (s *Store) CreateBatchSpecWorkspace(ctx context.Context, ws ...*btypes.Batc ctx, s.Handle().DB(), "batch_spec_workspaces", + batch.MaxNumPostgresParameters, batchSpecWorkspaceInsertColumns, "", BatchSpecWorkspaceColums, diff --git a/enterprise/internal/batches/store/changeset_jobs.go b/enterprise/internal/batches/store/changeset_jobs.go index e910cf0a821..1fff3915dab 100644 --- a/enterprise/internal/batches/store/changeset_jobs.go +++ b/enterprise/internal/batches/store/changeset_jobs.go @@ -107,6 +107,7 @@ func (s *Store) CreateChangesetJob(ctx context.Context, cs ...*btypes.ChangesetJ ctx, s.Handle().DB(), "changeset_jobs", + batch.MaxNumPostgresParameters, changesetJobInsertColumns, "", changesetJobColumns, diff --git a/enterprise/internal/batches/store/changeset_specs.go b/enterprise/internal/batches/store/changeset_specs.go index f2babf07a04..7197f1d6724 100644 --- a/enterprise/internal/batches/store/changeset_specs.go +++ b/enterprise/internal/batches/store/changeset_specs.go @@ -127,6 +127,7 @@ func (s *Store) CreateChangesetSpec(ctx context.Context, cs ...*btypes.Changeset ctx, s.Handle().DB(), "changeset_specs", + batch.MaxNumPostgresParameters, changesetSpecInsertColumns, "", changesetSpecColumns, diff --git a/enterprise/internal/batches/testing/batch_spec_workspace_execution_jobs.go b/enterprise/internal/batches/testing/batch_spec_workspace_execution_jobs.go index efd208440bd..a2773829d73 100644 --- a/enterprise/internal/batches/testing/batch_spec_workspace_execution_jobs.go +++ b/enterprise/internal/batches/testing/batch_spec_workspace_execution_jobs.go @@ -84,6 +84,7 @@ func CreateBatchSpecWorkspaceExecutionJob(ctx context.Context, s createBatchSpec ctx, s.Handle().DB(), "batch_spec_workspace_execution_jobs", + batch.MaxNumPostgresParameters, []string{"batch_spec_workspace_id", "created_at", "updated_at"}, "", []string{ diff --git a/enterprise/internal/codeintel/stores/dbstore/commits.go b/enterprise/internal/codeintel/stores/dbstore/commits.go index 761caa123ef..5beffdcc4ae 100644 --- a/enterprise/internal/codeintel/stores/dbstore/commits.go +++ b/enterprise/internal/codeintel/stores/dbstore/commits.go @@ -423,6 +423,7 @@ func (s *Store) writeVisibleUploads(ctx context.Context, sanitizedInput *sanitiz ctx, s.Handle().DB(), "t_lsif_nearest_uploads", + batch.MaxNumPostgresParameters, []string{"commit_bytea", "uploads"}, sanitizedInput.nearestUploadsRowValues, ) @@ -436,6 +437,7 @@ func (s *Store) writeVisibleUploads(ctx context.Context, sanitizedInput *sanitiz ctx, s.Handle().DB(), "t_lsif_nearest_uploads_links", + batch.MaxNumPostgresParameters, []string{"commit_bytea", "ancestor_commit_bytea", "distance"}, sanitizedInput.nearestUploadsLinksRowValues, ) @@ -448,6 +450,7 @@ func (s *Store) writeVisibleUploads(ctx context.Context, sanitizedInput *sanitiz ctx, s.Handle().DB(), "t_lsif_uploads_visible_at_tip", + batch.MaxNumPostgresParameters, []string{"upload_id", "branch_or_tag_name", "is_default_branch"}, sanitizedInput.uploadsVisibleAtTipRowValues, ) diff --git a/enterprise/internal/codeintel/stores/dbstore/packages.go b/enterprise/internal/codeintel/stores/dbstore/packages.go index b17d1182ce7..135df724154 100644 --- a/enterprise/internal/codeintel/stores/dbstore/packages.go +++ b/enterprise/internal/codeintel/stores/dbstore/packages.go @@ -38,6 +38,7 @@ func (s *Store) UpdatePackages(ctx context.Context, dumpID int, packages []preci ctx, tx.Handle().DB(), "t_lsif_packages", + batch.MaxNumPostgresParameters, []string{"scheme", "name", "version"}, loadPackagesChannel(packages), ); err != nil { diff --git a/enterprise/internal/codeintel/stores/dbstore/references.go b/enterprise/internal/codeintel/stores/dbstore/references.go index cefda506622..b281bf7d90f 100644 --- a/enterprise/internal/codeintel/stores/dbstore/references.go +++ b/enterprise/internal/codeintel/stores/dbstore/references.go @@ -38,6 +38,7 @@ func (s *Store) UpdatePackageReferences(ctx context.Context, dumpID int, referen ctx, tx.Handle().DB(), "t_lsif_references", + batch.MaxNumPostgresParameters, []string{"scheme", "name", "version", "filter"}, loadReferencesChannel(references), ); err != nil { diff --git a/enterprise/internal/codeintel/stores/lsifstore/data_write.go b/enterprise/internal/codeintel/stores/lsifstore/data_write.go index 0195d8778aa..cf342898c5e 100644 --- a/enterprise/internal/codeintel/stores/lsifstore/data_write.go +++ b/enterprise/internal/codeintel/stores/lsifstore/data_write.go @@ -303,6 +303,6 @@ ON CONFLICT DO NOTHING // the maximum number of CPUs that can be executing simultaneously. func withBatchInserter(ctx context.Context, db dbutil.DB, tableName string, columns []string, f func(inserter *batch.Inserter) error) (err error) { return goroutine.RunWorkers(goroutine.SimplePoolWorker(func() error { - return batch.WithInserter(ctx, db, tableName, columns, f) + return batch.WithInserter(ctx, db, tableName, batch.MaxNumPostgresParameters, columns, f) })) } diff --git a/enterprise/internal/codeintel/stores/lsifstore/data_write_documentation.go b/enterprise/internal/codeintel/stores/lsifstore/data_write_documentation.go index 59914fa4f24..b427658089a 100644 --- a/enterprise/internal/codeintel/stores/lsifstore/data_write_documentation.go +++ b/enterprise/internal/codeintel/stores/lsifstore/data_write_documentation.go @@ -434,6 +434,7 @@ func (s *Store) upsertTags(ctx context.Context, tags []string, tableSuffix strin ctx, tx.Handle().DB(), "t_lsif_data_docs_search_tags_"+tableSuffix, + batch.MaxNumPostgresParameters, []string{"tags", "tsv"}, inserter, ); err != nil { @@ -566,6 +567,7 @@ func (s *Store) replaceSearchRecords( ctx, tx.Handle().DB(), "t_lsif_data_docs_search_"+tableSuffix, + batch.MaxNumPostgresParameters, []string{ "path_id", "detail", diff --git a/enterprise/internal/codeintel/stores/lsifstore/migration/migrator.go b/enterprise/internal/codeintel/stores/lsifstore/migration/migrator.go index 17f1b099384..ebbb4c143d3 100644 --- a/enterprise/internal/codeintel/stores/lsifstore/migration/migrator.go +++ b/enterprise/internal/codeintel/stores/lsifstore/migration/migrator.go @@ -366,6 +366,7 @@ func (m *Migrator) updateBatch(ctx context.Context, tx *lsifstore.Store, dumpID, ctx, tx.Handle().DB(), temporaryTableName, + batch.MaxNumPostgresParameters, m.temporaryTableFieldNames, rowValues, ); err != nil { diff --git a/internal/database/batch/batch.go b/internal/database/batch/batch.go index d756ca3b005..94e6b127ce9 100644 --- a/internal/database/batch/batch.go +++ b/internal/database/batch/batch.go @@ -34,8 +34,8 @@ type ReturningScanner func(rows *sql.Rows) error // column names, then reads from the given channel as if they specify values for a single row. // The inserter will be flushed and any error that occurred during insertion or flush will be // returned. -func InsertValues(ctx context.Context, db dbutil.DB, tableName string, columnNames []string, values <-chan []interface{}) error { - return WithInserter(ctx, db, tableName, columnNames, func(inserter *Inserter) error { +func InsertValues(ctx context.Context, db dbutil.DB, tableName string, maxNumParameters int, columnNames []string, values <-chan []interface{}) error { + return WithInserter(ctx, db, tableName, maxNumParameters, columnNames, func(inserter *Inserter) error { outer: for { select { @@ -66,10 +66,11 @@ func WithInserter( ctx context.Context, db dbutil.DB, tableName string, + maxNumParameters int, columnNames []string, f func(inserter *Inserter) error, ) (err error) { - inserter := NewInserter(ctx, db, tableName, columnNames...) + inserter := NewInserter(ctx, db, tableName, maxNumParameters, columnNames...) return with(ctx, inserter, f) } @@ -82,13 +83,14 @@ func WithInserterWithReturn( ctx context.Context, db dbutil.DB, tableName string, + maxNumParameters int, columnNames []string, onConflictClause string, returningColumnNames []string, returningScanner ReturningScanner, f func(inserter *Inserter) error, ) (err error) { - inserter := NewInserterWithReturn(ctx, db, tableName, columnNames, onConflictClause, returningColumnNames, returningScanner) + inserter := NewInserterWithReturn(ctx, db, tableName, maxNumParameters, columnNames, onConflictClause, returningColumnNames, returningScanner) return with(ctx, inserter, f) } @@ -104,8 +106,8 @@ func with(ctx context.Context, inserter *Inserter, f func(inserter *Inserter) er // NewInserter creates a new batch inserter using the given database handle, table name, // and column names. For performance and atomicity, handle should be a transaction. -func NewInserter(ctx context.Context, db dbutil.DB, tableName string, columnNames ...string) *Inserter { - return NewInserterWithReturn(ctx, db, tableName, columnNames, "", nil, nil) +func NewInserter(ctx context.Context, db dbutil.DB, tableName string, maxNumParameters int, columnNames ...string) *Inserter { + return NewInserterWithReturn(ctx, db, tableName, maxNumParameters, columnNames, "", nil, nil) } // NewInserterWithReturn creates a new batch inserter using the given database handle, table @@ -118,15 +120,16 @@ func NewInserterWithReturn( ctx context.Context, db dbutil.DB, tableName string, + maxNumParameters int, columnNames []string, onConflictClause string, returningColumnNames []string, returningScanner ReturningScanner, ) *Inserter { numColumns := len(columnNames) - maxBatchSize := getMaxBatchSize(numColumns) + maxBatchSize := getMaxBatchSize(numColumns, maxNumParameters) queryPrefix := makeQueryPrefix(tableName, columnNames) - querySuffix := makeQuerySuffix(numColumns) + querySuffix := makeQuerySuffix(numColumns, maxNumParameters) onConflictSuffix := makeOnConflictSuffix(onConflictClause) returningSuffix := makeReturningSuffix(returningColumnNames) @@ -215,13 +218,17 @@ func (i *Inserter) makeQuery(numValues int) string { return i.queryPrefix + i.querySuffix[:suffixLength] + i.onConflictSuffix + i.returningSuffix } -// maxNumPostgresParameters is the maximum number of placeholder variables allowed by Postgres +// MaxNumPostgresParameters is the maximum number of placeholder variables allowed by Postgres // in a single insert statement. -const maxNumParameters = 32767 +const MaxNumPostgresParameters = 32767 + +// MaxNumSQLiteParameters is the maximum number of placeholder variables allowed by SQLite +// in a single insert statement. +const MaxNumSQLiteParameters = 999 // getMaxBatchSize returns the number of rows that can be inserted into a single table with the // given number of columns via a single insert statement. -func getMaxBatchSize(numColumns int) int { +func getMaxBatchSize(numColumns, maxNumParameters int) int { return (maxNumParameters / numColumns) * numColumns } @@ -249,7 +256,7 @@ var querySuffixCacheMutex sync.Mutex // substring index is efficient. // // This method is memoized. -func makeQuerySuffix(numColumns int) string { +func makeQuerySuffix(numColumns, maxNumParameters int) string { querySuffixCacheMutex.Lock() defer querySuffixCacheMutex.Unlock() if cache, ok := querySuffixCache[numColumns]; ok { diff --git a/internal/database/batch/batch_test.go b/internal/database/batch/batch_test.go index 936e84bfec0..0be8b23fec2 100644 --- a/internal/database/batch/batch_test.go +++ b/internal/database/batch/batch_test.go @@ -51,7 +51,7 @@ func TestBatchInserterWithReturn(t *testing.T) { setupTestTable(t, db) tableSizeFactor := 2 - numRows := maxNumParameters * tableSizeFactor + numRows := MaxNumPostgresParameters * tableSizeFactor expectedValues := makeTestValues(tableSizeFactor, 0) var expectedIDs []int @@ -73,7 +73,7 @@ func TestBatchInserterWithReturnWithConflicts(t *testing.T) { tableSizeFactor := 2 duplicationFactor := 2 - numRows := maxNumParameters * tableSizeFactor + numRows := MaxNumPostgresParameters * tableSizeFactor expectedValues := makeTestValues(tableSizeFactor, 0) var expectedIDs []int @@ -134,7 +134,7 @@ func setupTestTable(t testing.TB, db *sql.DB) { func makeTestValues(tableSizeFactor, payloadSize int) [][]interface{} { var expectedValues [][]interface{} - for i := 0; i < maxNumParameters*tableSizeFactor; i++ { + for i := 0; i < MaxNumPostgresParameters*tableSizeFactor; i++ { expectedValues = append(expectedValues, []interface{}{ i, i + 1, @@ -159,7 +159,7 @@ func makePayload(size int) string { func testInsert(t testing.TB, db *sql.DB, expectedValues [][]interface{}) { ctx := context.Background() - inserter := NewInserter(ctx, db, "batch_inserter_test", "col1", "col2", "col3", "col4", "col5") + inserter := NewInserter(ctx, db, "batch_inserter_test", MaxNumPostgresParameters, "col1", "col2", "col3", "col4", "col5") for _, values := range expectedValues { if err := inserter.Insert(ctx, values...); err != nil { t.Fatalf("unexpected error inserting values: %s", err) @@ -178,6 +178,7 @@ func testInsertWithReturn(t testing.TB, db *sql.DB, expectedValues [][]interface ctx, db, "batch_inserter_test", + MaxNumPostgresParameters, []string{"col1", "col2", "col3", "col4", "col5"}, "", []string{"id"}, @@ -212,6 +213,7 @@ func testInsertWithReturnWithConflicts(t testing.TB, db *sql.DB, n int, expected ctx, db, "batch_inserter_test", + MaxNumPostgresParameters, []string{"id", "col1", "col2", "col3", "col4", "col5"}, "ON CONFLICT DO NOTHING", []string{"id"}, diff --git a/internal/database/event_logs.go b/internal/database/event_logs.go index 41d5067bf1f..84c27a2ecc9 100644 --- a/internal/database/event_logs.go +++ b/internal/database/event_logs.go @@ -226,6 +226,7 @@ func (l *eventLogStore) BulkInsert(ctx context.Context, events []*Event) error { ctx, l.Handle().DB(), "event_logs", + batch.MaxNumPostgresParameters, []string{ "name", "url", diff --git a/monitoring/definitions/shared/codeintel.go b/monitoring/definitions/shared/codeintel.go index 7efc56e9b60..810cb42aea7 100644 --- a/monitoring/definitions/shared/codeintel.go +++ b/monitoring/definitions/shared/codeintel.go @@ -848,3 +848,221 @@ func (codeIntelligence) NewDependencyReposStoreGroup(containerName string) monit }, }) } + +func (codeIntelligence) NewSymbolsAPIGroup(containerName string) monitoring.Group { + return Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{ + GroupConstructorOptions: GroupConstructorOptions{ + Namespace: "codeintel", + DescriptionRoot: "Symbols API", + Hidden: false, + ObservableConstructorOptions: ObservableConstructorOptions{ + MetricNameRoot: "codeintel_symbols_api", + MetricDescriptionRoot: "API", + Filters: []string{}, + By: []string{"op"}, + }, + }, + SharedObservationGroupOptions: SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + Aggregate: &SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + }) +} + +func (codeIntelligence) NewSymbolsParserGroup(containerName string) monitoring.Group { + group := Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{ + GroupConstructorOptions: GroupConstructorOptions{ + Namespace: "codeintel", + DescriptionRoot: "Symbols parser", + Hidden: false, + ObservableConstructorOptions: ObservableConstructorOptions{ + MetricNameRoot: "codeintel_symbols_parser", + MetricDescriptionRoot: "parser", + Filters: []string{}, + By: []string{"op"}, + }, + }, + SharedObservationGroupOptions: SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + Aggregate: &SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + }) + + queueRow := monitoring.Row{ + { + Name: containerName, + Description: "in-flight parse jobs", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "max(src_codeintel_symbols_parsing{job=~\"^symbols.*\"})", + NoAlert: true, + Interpretation: "none", + Panel: monitoring.Panel(), + }, + { + Name: containerName, + Description: "parser queue size", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "max(src_codeintel_symbols_parse_queue_size{job=~\"^symbols.*\"})", + NoAlert: true, + Interpretation: "none", + Panel: monitoring.Panel(), + }, + { + Name: containerName, + Description: "parse queue timeouts", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "max(src_codeintel_symbols_parse_queue_timeouts_total{job=~\"^symbols.*\"})", + NoAlert: true, + Interpretation: "none", + Panel: monitoring.Panel(), + }, + { + Name: containerName, + Description: "parse failures every 5m", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "rate(src_codeintel_symbols_parse_failed_total{job=~\"^symbols.*\"}[5m])", + NoAlert: true, + Interpretation: "none", + Panel: monitoring.Panel(), + }, + } + + group.Rows = append([]monitoring.Row{queueRow}, group.Rows...) + + return group +} + +func (codeIntelligence) NewSymbolsCacheJanitorGroup(containerName string) monitoring.Group { + return monitoring.Group{ + Title: fmt.Sprintf("%s: %s", "Codeintel", "Symbols cache janitor"), + Hidden: true, + Rows: []monitoring.Row{ + { + { + Name: containerName, + Description: "size in bytes of the on-disk cache", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "src_codeintel_symbols_store_cache_size_bytes", + NoAlert: true, + Interpretation: "no", + Panel: monitoring.Panel().Unit(monitoring.Bytes), + }, + { + Name: containerName, + Description: "cache eviction operations every 5m", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "rate(src_codeintel_symbols_store_evictions_total[5m])", + NoAlert: true, + Interpretation: "no", + Panel: monitoring.Panel(), + }, + { + Name: containerName, + Description: "cache eviction operation errors every 5m", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "rate(src_codeintel_symbols_store_errors_total[5m])", + NoAlert: true, + Interpretation: "no", + Panel: monitoring.Panel(), + }, + }, + }, + } +} + +func (codeIntelligence) NewSymbolsRepositoryFetcherGroup(containerName string) monitoring.Group { + group := Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{ + GroupConstructorOptions: GroupConstructorOptions{ + Namespace: "codeintel", + DescriptionRoot: "Symbols repository fetcher", + Hidden: true, + ObservableConstructorOptions: ObservableConstructorOptions{ + MetricNameRoot: "codeintel_symbols_repository_fetcher", + MetricDescriptionRoot: "fetcher", + Filters: []string{}, + By: []string{"op"}, + }, + }, + SharedObservationGroupOptions: SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + Aggregate: &SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + }) + + queueRow := monitoring.Row{ + { + Name: containerName, + Description: "in-flight repository fetch operations", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "src_codeintel_symbols_fetching", + NoAlert: true, + Interpretation: "none", + Panel: monitoring.Panel(), + }, + { + Name: containerName, + Description: "repository fetch queue size", + Owner: monitoring.ObservableOwnerCodeIntel, + Query: "max(src_codeintel_symbols_fetch_queue_size{job=~\"^symbols.*\"})", + NoAlert: true, + Interpretation: "none", + Panel: monitoring.Panel(), + }, + } + + group.Rows = append([]monitoring.Row{queueRow}, group.Rows...) + + return group +} + +func (codeIntelligence) NewSymbolsGitserverClientGroup(containerName string) monitoring.Group { + return Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{ + GroupConstructorOptions: GroupConstructorOptions{ + Namespace: "codeintel", + DescriptionRoot: "Symbols gitserver client", + Hidden: true, + ObservableConstructorOptions: ObservableConstructorOptions{ + MetricNameRoot: "codeintel_symbols_gitserver", + MetricDescriptionRoot: "gitserver client", + Filters: []string{}, + By: []string{"op"}, + }, + }, + SharedObservationGroupOptions: SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + Aggregate: &SharedObservationGroupOptions{ + Total: NoAlertsOption("none"), + Duration: NoAlertsOption("none"), + Errors: NoAlertsOption("none"), + ErrorRate: NoAlertsOption("none"), + }, + }) +} diff --git a/monitoring/definitions/symbols.go b/monitoring/definitions/symbols.go index 9844621ba2c..c7c43c1b854 100644 --- a/monitoring/definitions/symbols.go +++ b/monitoring/definitions/symbols.go @@ -13,31 +13,11 @@ func Symbols() *monitoring.Container { Title: "Symbols", Description: "Handles symbol searches for unindexed branches.", Groups: []monitoring.Group{ - { - Title: "General", - Rows: []monitoring.Row{ - { - { - Name: "store_fetch_failures", - Description: "store fetch failures every 5m", - Query: `sum(increase(symbols_store_fetch_failed[5m]))`, - Warning: monitoring.Alert().GreaterOrEqual(5, nil), - Panel: monitoring.Panel().LegendFormat("failures"), - Owner: monitoring.ObservableOwnerCodeIntel, - PossibleSolutions: "none", - }, - { - Name: "current_fetch_queue_size", - Description: "current fetch queue size", - Query: `sum(symbols_store_fetch_queue_size)`, - Warning: monitoring.Alert().GreaterOrEqual(25, nil), - Panel: monitoring.Panel().LegendFormat("size"), - Owner: monitoring.ObservableOwnerCodeIntel, - PossibleSolutions: "none", - }, - }, - }, - }, + shared.CodeIntelligence.NewSymbolsAPIGroup(containerName), + shared.CodeIntelligence.NewSymbolsParserGroup(containerName), + shared.CodeIntelligence.NewSymbolsCacheJanitorGroup(containerName), + shared.CodeIntelligence.NewSymbolsRepositoryFetcherGroup(containerName), + shared.CodeIntelligence.NewSymbolsGitserverClientGroup(containerName), shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil), shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil), diff --git a/sg.config.yaml b/sg.config.yaml index fb3483d817c..de57a48f7fa 100644 --- a/sg.config.yaml +++ b/sg.config.yaml @@ -804,6 +804,9 @@ commandsets: - minio - precise-code-intel-worker - codeintel-executor + - jaeger + - grafana + - prometheus enterprise-codeinsights: requiresDevPrivate: true