From 7a6a2a062b86b5463706fe1eee3c5fa77a674386 Mon Sep 17 00:00:00 2001 From: Chris Wendt Date: Wed, 2 Mar 2022 21:13:28 -0700 Subject: [PATCH] codeintel: Rockskip for symbols (#28719) --- cmd/symbols/config.go | 59 --- cmd/symbols/ctags-install-alpine.sh | 6 +- cmd/symbols/fetcher/gen.go | 3 + .../{internal => }/fetcher/mock_iface_test.go | 6 +- .../{internal => }/fetcher/observability.go | 0 .../fetcher/repository_fetcher.go | 119 +---- .../fetcher/repository_fetcher_test.go | 12 +- .../{internal => }/gitserver/client.go | 2 - .../{internal => }/gitserver/client_test.go | 0 .../{internal => }/gitserver/observability.go | 0 cmd/symbols/{internal => }/gitserver/tar.go | 0 cmd/symbols/go-build.sh | 29 +- cmd/symbols/internal/api/gen.go | 2 +- cmd/symbols/internal/api/handler.go | 101 ++-- cmd/symbols/internal/api/handler_test.go | 14 +- cmd/symbols/internal/api/mock_iface_test.go | 6 +- cmd/symbols/internal/api/search.go | 59 --- cmd/symbols/internal/api/search_sqlite.go | 64 +++ cmd/symbols/internal/database/store/search.go | 2 +- cmd/symbols/internal/database/store/store.go | 4 +- .../internal/database/store/symbols.go | 2 +- cmd/symbols/internal/database/writer/cache.go | 2 +- .../internal/database/writer/writer.go | 14 +- cmd/symbols/internal/fetcher/gen.go | 3 - .../internal/parser/parser_factory_ctags.go | 23 - cmd/symbols/internal/types/search_args.go | 40 -- cmd/symbols/main.go | 123 +---- .../api => observability}/observability.go | 12 +- cmd/symbols/parser/filtering_parser.go | 51 ++ .../{internal => }/parser/observability.go | 0 cmd/symbols/{internal => }/parser/parser.go | 11 +- .../{internal => }/parser/parser_factory.go | 0 cmd/symbols/parser/parser_factory_ctags.go | 31 ++ .../parser/parser_factory_ctags_test.go | 4 +- .../{internal => }/parser/parser_pool.go | 0 cmd/symbols/shared/main.go | 87 ++++ cmd/symbols/shared/sqlite.go | 71 +++ cmd/symbols/types/types.go | 122 +++++ dev/check/go-dbconn-import.sh | 1 + dev/sg/internal/db/db.go | 1 + .../background-information/ci/reference.md | 25 +- doc/dev/background-information/sql/index.md | 1 + .../sql/locking_behavior.md | 31 ++ enterprise/cmd/symbols/Dockerfile | 35 ++ enterprise/cmd/symbols/build.sh | 24 + enterprise/cmd/symbols/go-build.sh | 60 +++ enterprise/cmd/symbols/main.go | 241 +++++++++ enterprise/dev/ci/images/images.go | 1 + enterprise/internal/rockskip/git.go | 226 +++++++++ enterprise/internal/rockskip/index.go | 346 +++++++++++++ enterprise/internal/rockskip/postgres.go | 364 ++++++++++++++ enterprise/internal/rockskip/search.go | 469 ++++++++++++++++++ enterprise/internal/rockskip/server.go | 160 ++++++ enterprise/internal/rockskip/server_test.go | 309 ++++++++++++ enterprise/internal/rockskip/status.go | 312 ++++++++++++ go.mod | 2 +- internal/database/schema.codeintel.md | 48 ++ migrations/codeintel/1000000032/down.sql | 3 + migrations/codeintel/1000000032/metadata.yaml | 2 + migrations/codeintel/1000000032/up.sql | 71 +++ sg.config.yaml | 50 +- 61 files changed, 3319 insertions(+), 547 deletions(-) delete mode 100644 cmd/symbols/config.go create mode 100644 cmd/symbols/fetcher/gen.go rename cmd/symbols/{internal => }/fetcher/mock_iface_test.go (98%) rename cmd/symbols/{internal => }/fetcher/observability.go (100%) rename cmd/symbols/{internal => }/fetcher/repository_fetcher.go (56%) rename cmd/symbols/{internal => }/fetcher/repository_fetcher_test.go (88%) rename cmd/symbols/{internal => }/gitserver/client.go (98%) rename cmd/symbols/{internal => }/gitserver/client_test.go (100%) rename cmd/symbols/{internal => }/gitserver/observability.go (100%) rename cmd/symbols/{internal => }/gitserver/tar.go (100%) delete mode 100644 cmd/symbols/internal/api/search.go create mode 100644 cmd/symbols/internal/api/search_sqlite.go delete mode 100644 cmd/symbols/internal/fetcher/gen.go delete mode 100644 cmd/symbols/internal/parser/parser_factory_ctags.go delete mode 100644 cmd/symbols/internal/types/search_args.go rename cmd/symbols/{internal/api => observability}/observability.go (78%) create mode 100644 cmd/symbols/parser/filtering_parser.go rename cmd/symbols/{internal => }/parser/observability.go (100%) rename cmd/symbols/{internal => }/parser/parser.go (97%) rename cmd/symbols/{internal => }/parser/parser_factory.go (100%) create mode 100644 cmd/symbols/parser/parser_factory_ctags.go rename cmd/symbols/{internal => }/parser/parser_factory_ctags_test.go (94%) rename cmd/symbols/{internal => }/parser/parser_pool.go (100%) create mode 100644 cmd/symbols/shared/main.go create mode 100644 cmd/symbols/shared/sqlite.go create mode 100644 cmd/symbols/types/types.go create mode 100644 doc/dev/background-information/sql/locking_behavior.md create mode 100644 enterprise/cmd/symbols/Dockerfile create mode 100755 enterprise/cmd/symbols/build.sh create mode 100755 enterprise/cmd/symbols/go-build.sh create mode 100644 enterprise/cmd/symbols/main.go create mode 100644 enterprise/internal/rockskip/git.go create mode 100644 enterprise/internal/rockskip/index.go create mode 100644 enterprise/internal/rockskip/postgres.go create mode 100644 enterprise/internal/rockskip/search.go create mode 100644 enterprise/internal/rockskip/server.go create mode 100644 enterprise/internal/rockskip/server_test.go create mode 100644 enterprise/internal/rockskip/status.go create mode 100644 migrations/codeintel/1000000032/down.sql create mode 100644 migrations/codeintel/1000000032/metadata.yaml create mode 100644 migrations/codeintel/1000000032/up.sql diff --git a/cmd/symbols/config.go b/cmd/symbols/config.go deleted file mode 100644 index 5347abfde62..00000000000 --- a/cmd/symbols/config.go +++ /dev/null @@ -1,59 +0,0 @@ -package main - -import ( - "os" - "runtime" - "strconv" - "time" - - "github.com/sourcegraph/sourcegraph/internal/env" -) - -type Config struct { - env.BaseConfig - - ctagsCommand string - ctagsPatternLengthLimit int - ctagsLogErrors bool - ctagsDebugLogs bool - - sanityCheck bool - cacheDir string - cacheSizeMB int - numCtagsProcesses int - requestBufferSize int - processingTimeout time.Duration - - // The maximum sum of lengths of all paths in a single call to git archive. Without this limit, we - // could hit the error "argument list too long" by exceeding the limit on the number of arguments to - // a command enforced by the OS. - // - // Mac : getconf ARG_MAX returns 1,048,576 - // Linux: getconf ARG_MAX returns 2,097,152 - // - // We want to remain well under that limit, so defaulting to 100,000 seems safe (see the - // MAX_TOTAL_PATHS_LENGTH environment variable below). - maxTotalPathsLength int -} - -var config = &Config{} - -// Load reads from the environment and stores the transformed data on the config object for later retrieval. -func (c *Config) Load() { - c.ctagsCommand = c.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)") - c.ctagsPatternLengthLimit = c.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags") - logCtagsErrorsDefault := "false" - if os.Getenv("DEPLOY_TYPE") == "dev" { - logCtagsErrorsDefault = "true" - } - c.ctagsLogErrors = c.GetBool("LOG_CTAGS_ERRORS", logCtagsErrorsDefault, "log ctags errors") - c.ctagsDebugLogs = false - - c.sanityCheck = c.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not") - c.cacheDir = c.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols") - c.cacheSizeMB = c.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)") - c.numCtagsProcesses = c.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run") - c.requestBufferSize = c.GetInt("REQUEST_BUFFER_SIZE", "8192", "maximum size of buffered parser request channel") - c.processingTimeout = c.GetInterval("PROCESSING_TIMEOUT", "2h", "maximum time to spend processing a repository") - c.maxTotalPathsLength = c.GetInt("MAX_TOTAL_PATHS_LENGTH", "100000", "maximum sum of lengths of all paths in a single call to git archive") -} diff --git a/cmd/symbols/ctags-install-alpine.sh b/cmd/symbols/ctags-install-alpine.sh index 216c3962a92..08478887e5a 100755 --- a/cmd/symbols/ctags-install-alpine.sh +++ b/cmd/symbols/ctags-install-alpine.sh @@ -3,10 +3,8 @@ # This script installs ctags within an alpine container. # Commit hash of github.com/universal-ctags/ctags. -# Last bumped 2022-02-28 -# This version includes a fix that hasn't landed on master yet: -# https://github.com/universal-ctags/ctags/pull/3300 -CTAGS_VERSION=90a16c009c52a35578140c6c731bcd5faa104f11 +# Last bumped 2022-02-10 +CTAGS_VERSION=37a4b3601288bcdc02a387197ff8d9b971f7ab34 cleanup() { apk --no-cache --purge del ctags-build-deps || true diff --git a/cmd/symbols/fetcher/gen.go b/cmd/symbols/fetcher/gen.go new file mode 100644 index 00000000000..3cc07366a86 --- /dev/null +++ b/cmd/symbols/fetcher/gen.go @@ -0,0 +1,3 @@ +package fetcher + +//go:generate ../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver -i GitServerClient -o mock_iface_test.go diff --git a/cmd/symbols/internal/fetcher/mock_iface_test.go b/cmd/symbols/fetcher/mock_iface_test.go similarity index 98% rename from cmd/symbols/internal/fetcher/mock_iface_test.go rename to cmd/symbols/fetcher/mock_iface_test.go index edfc10f3546..5d74142a77a 100644 --- a/cmd/symbols/internal/fetcher/mock_iface_test.go +++ b/cmd/symbols/fetcher/mock_iface_test.go @@ -7,14 +7,14 @@ import ( "io" "sync" - gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" api "github.com/sourcegraph/sourcegraph/internal/api" ) // MockGitserverClient is a mock implementation of the GitserverClient // interface (from the package -// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used -// for unit testing. +// github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver) used for unit +// testing. type MockGitserverClient struct { // FetchTarFunc is an instance of a mock function object controlling the // behavior of the method FetchTar. diff --git a/cmd/symbols/internal/fetcher/observability.go b/cmd/symbols/fetcher/observability.go similarity index 100% rename from cmd/symbols/internal/fetcher/observability.go rename to cmd/symbols/fetcher/observability.go diff --git a/cmd/symbols/internal/fetcher/repository_fetcher.go b/cmd/symbols/fetcher/repository_fetcher.go similarity index 56% rename from cmd/symbols/internal/fetcher/repository_fetcher.go rename to cmd/symbols/fetcher/repository_fetcher.go index 08824ac196f..17b2d1728b9 100644 --- a/cmd/symbols/internal/fetcher/repository_fetcher.go +++ b/cmd/symbols/fetcher/repository_fetcher.go @@ -2,16 +2,14 @@ package fetcher import ( "archive/tar" - "bytes" "context" "io" - "path" "strings" "github.com/opentracing/opentracing-go/log" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/internal/observation" "github.com/sourcegraph/sourcegraph/lib/errors" ) @@ -22,7 +20,6 @@ type RepositoryFetcher interface { type repositoryFetcher struct { gitserverClient gitserver.GitserverClient - fetchSem chan int operations *operations maxTotalPathsLength int } @@ -37,10 +34,9 @@ type parseRequestOrError struct { Err error } -func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maximumConcurrentFetches int, maxTotalPathsLength int, observationContext *observation.Context) RepositoryFetcher { +func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maxTotalPathsLength int, observationContext *observation.Context) RepositoryFetcher { return &repositoryFetcher{ gitserverClient: gitserverClient, - fetchSem: make(chan int, maximumConcurrentFetches), operations: newOperations(observationContext), maxTotalPathsLength: maxTotalPathsLength, } @@ -71,13 +67,6 @@ func (f *repositoryFetcher) fetchRepositoryArchive(ctx context.Context, args typ }}) defer endObservation(1, observation.Args{}) - onDefer, err := f.limitConcurrentFetches(ctx) - if err != nil { - return err - } - defer onDefer() - trace.Log(log.Event("acquired fetch semaphore")) - f.operations.fetching.Inc() defer f.operations.fetching.Dec() @@ -136,19 +125,6 @@ func batchByTotalLength(paths []string, maxTotalLength int) [][]string { return batches } -func (f *repositoryFetcher) limitConcurrentFetches(ctx context.Context) (func(), error) { - f.operations.fetchQueueSize.Inc() - defer f.operations.fetchQueueSize.Dec() - - select { - case f.fetchSem <- 1: - return func() { <-f.fetchSem }, nil - - case <-ctx.Done(): - return func() {}, ctx.Err() - } -} - func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request ParseRequest), traceLog observation.TraceLogger) error { for { if ctx.Err() != nil { @@ -156,89 +132,22 @@ func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request P } tarHeader, err := tarReader.Next() - if err != nil { - if err == io.EOF { - return nil - } - + if err == io.EOF { + return nil + } else if err != nil { return err } - readTarHeader(tarReader, tarHeader, callback, traceLog) - } -} + if tarHeader.FileInfo().IsDir() || tarHeader.Typeflag == tar.TypeXGlobalHeader { + continue + } -func readTarHeader(tarReader *tar.Reader, tarHeader *tar.Header, callback func(request ParseRequest), trace observation.TraceLogger) error { - if !shouldParse(tarHeader) { - return nil - } - - // 32MB is the same size used by io.Copy - buffer := make([]byte, 32*1024) - - trace.Log(log.Event("reading tar header prefix")) - - // Read first chunk of tar header contents - n, err := tarReader.Read(buffer) - if err != nil && err != io.EOF { - return err - } - trace.Log(log.Int("n", n)) - - if n == 0 { - // Empty file, nothing to parse - return nil - } - - // Check to see if first 256 bytes contain a 0x00. If so, we'll assume that - // the file is binary and skip parsing. Otherwise, we'll have some non-zero - // contents that passed our filters above to parse. - - m := 256 - if n < m { - m = n - } - if bytes.IndexByte(buffer[:m], 0x00) >= 0 { - return nil - } - - // Copy buffer into appropriately-sized slice for return - data := make([]byte, int(tarHeader.Size)) - copy(data, buffer[:n]) - - if n < int(tarHeader.Size) { - trace.Log(log.Event("reading remaining tar header content")) - - // Read the remaining contents - if _, err := io.ReadFull(tarReader, data[n:]); err != nil { + data := make([]byte, int(tarHeader.Size)) + traceLog.Log(log.Event("reading tar file contents")) + if _, err := io.ReadFull(tarReader, data); err != nil { return err } - trace.Log(log.Int("n", int(tarHeader.Size)-n)) + traceLog.Log(log.Int("n", int(tarHeader.Size))) + callback(ParseRequest{Path: tarHeader.Name, Data: data}) } - - request := ParseRequest{Path: tarHeader.Name, Data: data} - callback(request) - return nil -} - -// maxFileSize (512KB) is the maximum size of files we attempt to parse. -const maxFileSize = 1 << 19 - -func shouldParse(tarHeader *tar.Header) bool { - // We do not search large files - if tarHeader.Size > maxFileSize { - return false - } - - // We only care about files - if tarHeader.Typeflag != tar.TypeReg && tarHeader.Typeflag != tar.TypeRegA { - return false - } - - // JSON files are symbol-less - if path.Ext(tarHeader.Name) == ".json" { - return false - } - - return true } diff --git a/cmd/symbols/internal/fetcher/repository_fetcher_test.go b/cmd/symbols/fetcher/repository_fetcher_test.go similarity index 88% rename from cmd/symbols/internal/fetcher/repository_fetcher_test.go rename to cmd/symbols/fetcher/repository_fetcher_test.go index 2701a3fdd7a..ed884c1f5d2 100644 --- a/cmd/symbols/internal/fetcher/repository_fetcher_test.go +++ b/cmd/symbols/fetcher/repository_fetcher_test.go @@ -7,8 +7,8 @@ import ( "github.com/google/go-cmp/cmp" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/observation" ) @@ -29,16 +29,10 @@ func TestRepositoryFetcher(t *testing.T) { tarContents[name] = content } - // JSON is ignored - tarContents["ignored.json"] = "{}" - - // Large files are ignored - tarContents["payloads.txt"] = strings.Repeat("oversized load", maxFileSize) - gitserverClient := NewMockGitserverClient() gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(tarContents)) - repositoryFetcher := NewRepositoryFetcher(gitserverClient, 15, 1000, &observation.TestContext) + repositoryFetcher := NewRepositoryFetcher(gitserverClient, 1000, &observation.TestContext) args := types.SearchArgs{Repo: api.RepoName("foo"), CommitID: api.CommitID("deadbeef")} t.Run("all paths", func(t *testing.T) { diff --git a/cmd/symbols/internal/gitserver/client.go b/cmd/symbols/gitserver/client.go similarity index 98% rename from cmd/symbols/internal/gitserver/client.go rename to cmd/symbols/gitserver/client.go index 182c45c1100..9f8e712f71e 100644 --- a/cmd/symbols/internal/gitserver/client.go +++ b/cmd/symbols/gitserver/client.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "io" - "strings" "github.com/opentracing/opentracing-go/log" @@ -47,7 +46,6 @@ func (c *gitserverClient) FetchTar(ctx context.Context, repo api.RepoName, commi log.String("repo", string(repo)), log.String("commit", string(commit)), log.Int("paths", len(paths)), - log.String("paths", strings.Join(paths, ":")), }}) defer endObservation(1, observation.Args{}) diff --git a/cmd/symbols/internal/gitserver/client_test.go b/cmd/symbols/gitserver/client_test.go similarity index 100% rename from cmd/symbols/internal/gitserver/client_test.go rename to cmd/symbols/gitserver/client_test.go diff --git a/cmd/symbols/internal/gitserver/observability.go b/cmd/symbols/gitserver/observability.go similarity index 100% rename from cmd/symbols/internal/gitserver/observability.go rename to cmd/symbols/gitserver/observability.go diff --git a/cmd/symbols/internal/gitserver/tar.go b/cmd/symbols/gitserver/tar.go similarity index 100% rename from cmd/symbols/internal/gitserver/tar.go rename to cmd/symbols/gitserver/tar.go diff --git a/cmd/symbols/go-build.sh b/cmd/symbols/go-build.sh index 377a28da509..90014f2724e 100755 --- a/cmd/symbols/go-build.sh +++ b/cmd/symbols/go-build.sh @@ -16,15 +16,36 @@ export GOOS=linux # go-sqlite3 depends on cgo. Without cgo, it will build but it'll throw an error at query time. export CGO_ENABLED=1 -# Ensure musl-gcc is available since we're building to run on Alpine, which uses musl. -if ! command -v musl-gcc >/dev/null; then - echo "musl-gcc not found, which is needed for cgo for go-sqlite3. Run 'apt-get install -y musl-tools'." +# Default CC to musl-gcc. +export CC="${CC:-musl-gcc}" + +help() { + echo "You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine." + echo + echo " Linux: run 'apt-get install -y musl-tools'" + echo " macOS: download https://github.com/FiloSottile/homebrew-musl-cross/blob/6ee3329ee41231fe693306490f8e4d127c70e618/musl-cross.rb and run 'brew install ~/Downloads/musl-cross.rb'" +} + +if ! command -v "$CC" >/dev/null; then + echo "$CC not found." + help exit 1 fi +# Make sure this is a musl compiler. +case "$CC" in + *musl*) + ;; + *) + echo "$CC doesn't look like a musl compiler." + help + exit 1 + ;; +esac + echo "--- go build" pkg="github.com/sourcegraph/sourcegraph/cmd/symbols" -env CC=musl-gcc go build \ +env go build \ -trimpath \ -ldflags "-X github.com/sourcegraph/sourcegraph/internal/version.version=$VERSION -X github.com/sourcegraph/sourcegraph/internal/version.timestamp=$(date +%s)" \ -buildmode exe \ diff --git a/cmd/symbols/internal/api/gen.go b/cmd/symbols/internal/api/gen.go index 54c920d6cc3..53cf11f87ec 100644 --- a/cmd/symbols/internal/api/gen.go +++ b/cmd/symbols/internal/api/gen.go @@ -1,3 +1,3 @@ package api -//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go +//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver -i GitServerClient -o mock_iface_test.go diff --git a/cmd/symbols/internal/api/handler.go b/cmd/symbols/internal/api/handler.go index 6ea25cf1de1..f5133256b1c 100644 --- a/cmd/symbols/internal/api/handler.go +++ b/cmd/symbols/internal/api/handler.go @@ -9,86 +9,71 @@ import ( "github.com/sourcegraph/go-ctags" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" - "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/lib/errors" ) -type apiHandler struct { - cachedDatabaseWriter writer.CachedDatabaseWriter - ctagsBinary string - operations *operations -} - func NewHandler( - cachedDatabaseWriter writer.CachedDatabaseWriter, + searchFunc types.SearchFunc, + handleStatus func(http.ResponseWriter, *http.Request), ctagsBinary string, - observationContext *observation.Context, ) http.Handler { - h := newAPIHandler(cachedDatabaseWriter, ctagsBinary, observationContext) - mux := http.NewServeMux() - mux.HandleFunc("/search", h.handleSearch) - mux.HandleFunc("/healthz", h.handleHealthCheck) - mux.HandleFunc("/list-languages", h.handleListLanguages) - return mux -} - -func newAPIHandler( - cachedDatabaseWriter writer.CachedDatabaseWriter, - ctagsBinary string, - observationContext *observation.Context, -) *apiHandler { - return &apiHandler{ - cachedDatabaseWriter: cachedDatabaseWriter, - ctagsBinary: ctagsBinary, - operations: newOperations(observationContext), + mux.HandleFunc("/search", handleSearchWith(searchFunc)) + mux.HandleFunc("/healthz", handleHealthCheck) + mux.HandleFunc("/list-languages", handleListLanguages(ctagsBinary)) + if handleStatus != nil { + mux.HandleFunc("/status", handleStatus) } + return mux } const maxNumSymbolResults = 500 -func (h *apiHandler) handleSearch(w http.ResponseWriter, r *http.Request) { - var args types.SearchArgs - if err := json.NewDecoder(r.Body).Decode(&args); err != nil { - http.Error(w, err.Error(), http.StatusBadRequest) - return - } - - if args.First < 0 || args.First > maxNumSymbolResults { - args.First = maxNumSymbolResults - } - - result, err := h.handleSearchInternal(r.Context(), args) - if err != nil { - // Ignore reporting errors where client disconnected - if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) { +func handleSearchWith(searchFunc types.SearchFunc) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + var args types.SearchArgs + if err := json.NewDecoder(r.Body).Decode(&args); err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) return } - log15.Error("Symbol search failed", "args", args, "error", err) - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } + if args.First < 0 || args.First > maxNumSymbolResults { + args.First = maxNumSymbolResults + } - if err := json.NewEncoder(w).Encode(result); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + result, err := searchFunc(r.Context(), args) + if err != nil { + // Ignore reporting errors where client disconnected + if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) { + return + } + + log15.Error("Symbol search failed", "args", args, "error", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + if err := json.NewEncoder(w).Encode(result); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } } } -func (h *apiHandler) handleListLanguages(w http.ResponseWriter, r *http.Request) { - mapping, err := ctags.ListLanguageMappings(r.Context(), h.ctagsBinary) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) - return - } - if err := json.NewEncoder(w).Encode(mapping); err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) +func handleListLanguages(ctagsBinary string) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + mapping, err := ctags.ListLanguageMappings(r.Context(), ctagsBinary) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if err := json.NewEncoder(w).Encode(mapping); err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + } } } -func (h *apiHandler) handleHealthCheck(w http.ResponseWriter, r *http.Request) { +func handleHealthCheck(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) if _, err := w.Write([]byte("OK")); err != nil { diff --git a/cmd/symbols/internal/api/handler_test.go b/cmd/symbols/internal/api/handler_test.go index a827065cd87..1cecd1cb27a 100644 --- a/cmd/symbols/internal/api/handler_test.go +++ b/cmd/symbols/internal/api/handler_test.go @@ -9,12 +9,14 @@ import ( "time" "github.com/sourcegraph/go-ctags" + "golang.org/x/sync/semaphore" + "github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher" + "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database" "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" + sharedobservability "github.com/sourcegraph/sourcegraph/cmd/symbols/observability" + "github.com/sourcegraph/sourcegraph/cmd/symbols/parser" "github.com/sourcegraph/sourcegraph/internal/diskcache" "github.com/sourcegraph/sourcegraph/internal/httpcli" "github.com/sourcegraph/sourcegraph/internal/observation" @@ -50,10 +52,10 @@ func TestHandler(t *testing.T) { gitserverClient := NewMockGitserverClient() gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(files)) - parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 15, 1000, &observation.TestContext), 0, 10, &observation.TestContext) - databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser) + parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 1000, &observation.TestContext), 0, 10, &observation.TestContext) + databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser, semaphore.NewWeighted(1)) cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache) - handler := NewHandler(cachedDatabaseWriter, "", &observation.TestContext) + handler := NewHandler(MakeSqliteSearchFunc(sharedobservability.NewOperations(&observation.TestContext), cachedDatabaseWriter), nil, "") server := httptest.NewServer(handler) defer server.Close() diff --git a/cmd/symbols/internal/api/mock_iface_test.go b/cmd/symbols/internal/api/mock_iface_test.go index 551b2146b33..4bd27fae32f 100644 --- a/cmd/symbols/internal/api/mock_iface_test.go +++ b/cmd/symbols/internal/api/mock_iface_test.go @@ -7,14 +7,14 @@ import ( "io" "sync" - gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" + gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" api "github.com/sourcegraph/sourcegraph/internal/api" ) // MockGitserverClient is a mock implementation of the GitserverClient // interface (from the package -// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used -// for unit testing. +// github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver) used for unit +// testing. type MockGitserverClient struct { // FetchTarFunc is an instance of a mock function object controlling the // behavior of the method FetchTar. diff --git a/cmd/symbols/internal/api/search.go b/cmd/symbols/internal/api/search.go deleted file mode 100644 index 3f5f0cfb89e..00000000000 --- a/cmd/symbols/internal/api/search.go +++ /dev/null @@ -1,59 +0,0 @@ -package api - -import ( - "context" - "strings" - "time" - - "github.com/opentracing/opentracing-go/log" - - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" - "github.com/sourcegraph/sourcegraph/internal/observation" - "github.com/sourcegraph/sourcegraph/internal/search/result" - "github.com/sourcegraph/sourcegraph/lib/errors" -) - -const searchTimeout = 60 * time.Second - -func (h *apiHandler) handleSearchInternal(ctx context.Context, args types.SearchArgs) (_ *result.Symbols, err error) { - ctx, trace, endObservation := h.operations.search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{ - log.String("repo", string(args.Repo)), - log.String("commitID", string(args.CommitID)), - log.String("query", args.Query), - log.Bool("isRegExp", args.IsRegExp), - log.Bool("isCaseSensitive", args.IsCaseSensitive), - log.Int("numIncludePatterns", len(args.IncludePatterns)), - log.String("includePatterns", strings.Join(args.IncludePatterns, ":")), - log.String("excludePattern", args.ExcludePattern), - log.Int("first", args.First), - }}) - defer func() { - endObservation(1, observation.Args{ - MetricLabelValues: []string{observability.GetParseAmount(ctx)}, - LogFields: []log.Field{log.String("parseAmount", observability.GetParseAmount(ctx))}, - }) - }() - ctx = observability.SeedParseAmount(ctx) - - ctx, cancel := context.WithTimeout(ctx, searchTimeout) - defer cancel() - - dbFile, err := h.cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args) - if err != nil { - return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile") - } - trace.Log(log.String("dbFile", dbFile)) - - var results result.Symbols - err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) { - if results, err = db.Search(ctx, args); err != nil { - return errors.Wrap(err, "store.Search") - } - - return nil - }) - - return &results, err -} diff --git a/cmd/symbols/internal/api/search_sqlite.go b/cmd/symbols/internal/api/search_sqlite.go new file mode 100644 index 00000000000..a481f2d03e7 --- /dev/null +++ b/cmd/symbols/internal/api/search_sqlite.go @@ -0,0 +1,64 @@ +package api + +import ( + "context" + "strings" + "time" + + "github.com/opentracing/opentracing-go/log" + + "github.com/sourcegraph/sourcegraph/lib/errors" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" + sharedobservability "github.com/sourcegraph/sourcegraph/cmd/symbols/observability" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/search/result" +) + +const searchTimeout = 60 * time.Second + +func MakeSqliteSearchFunc(operations *sharedobservability.Operations, cachedDatabaseWriter writer.CachedDatabaseWriter) types.SearchFunc { + return func(ctx context.Context, args types.SearchArgs) (results []result.Symbol, err error) { + ctx, trace, endObservation := operations.Search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{ + log.String("repo", string(args.Repo)), + log.String("commitID", string(args.CommitID)), + log.String("query", args.Query), + log.Bool("isRegExp", args.IsRegExp), + log.Bool("isCaseSensitive", args.IsCaseSensitive), + log.Int("numIncludePatterns", len(args.IncludePatterns)), + log.String("includePatterns", strings.Join(args.IncludePatterns, ":")), + log.String("excludePattern", args.ExcludePattern), + log.Int("first", args.First), + }}) + defer func() { + endObservation(1, observation.Args{ + MetricLabelValues: []string{observability.GetParseAmount(ctx)}, + LogFields: []log.Field{log.String("parseAmount", observability.GetParseAmount(ctx))}, + }) + }() + ctx = observability.SeedParseAmount(ctx) + + ctx, cancel := context.WithTimeout(ctx, searchTimeout) + defer cancel() + + dbFile, err := cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args) + if err != nil { + return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile") + } + trace.Log(log.String("dbFile", dbFile)) + + var res result.Symbols + err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) { + if res, err = db.Search(ctx, args); err != nil { + return errors.Wrap(err, "store.Search") + } + + return nil + }) + + return res, err + } +} diff --git a/cmd/symbols/internal/database/store/search.go b/cmd/symbols/internal/database/store/search.go index d19959b3abb..70fcce0b289 100644 --- a/cmd/symbols/internal/database/store/search.go +++ b/cmd/symbols/internal/database/store/search.go @@ -9,7 +9,7 @@ import ( "github.com/grafana/regexp/syntax" "github.com/keegancsmith/sqlf" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/internal/database/basestore" "github.com/sourcegraph/sourcegraph/internal/search/result" "github.com/sourcegraph/sourcegraph/lib/errors" diff --git a/cmd/symbols/internal/database/store/store.go b/cmd/symbols/internal/database/store/store.go index beaba0133a7..ed6f06bd8d3 100644 --- a/cmd/symbols/internal/database/store/store.go +++ b/cmd/symbols/internal/database/store/store.go @@ -7,8 +7,8 @@ import ( "github.com/inconshreveable/log15" "github.com/jmoiron/sqlx" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/cmd/symbols/parser" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/internal/database/basestore" "github.com/sourcegraph/sourcegraph/internal/search/result" ) diff --git a/cmd/symbols/internal/database/store/symbols.go b/cmd/symbols/internal/database/store/symbols.go index 411fb48093a..f13a168c618 100644 --- a/cmd/symbols/internal/database/store/symbols.go +++ b/cmd/symbols/internal/database/store/symbols.go @@ -7,7 +7,7 @@ import ( "github.com/keegancsmith/sqlf" "golang.org/x/sync/errgroup" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" + "github.com/sourcegraph/sourcegraph/cmd/symbols/parser" "github.com/sourcegraph/sourcegraph/internal/database/batch" "github.com/sourcegraph/sourcegraph/internal/search/result" ) diff --git a/cmd/symbols/internal/database/writer/cache.go b/cmd/symbols/internal/database/writer/cache.go index 21b5535ec3b..4da05d3a09d 100644 --- a/cmd/symbols/internal/database/writer/cache.go +++ b/cmd/symbols/internal/database/writer/cache.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/internal/diskcache" "github.com/sourcegraph/sourcegraph/lib/errors" ) diff --git a/cmd/symbols/internal/database/writer/writer.go b/cmd/symbols/internal/database/writer/writer.go index f4a8762b082..227e6787825 100644 --- a/cmd/symbols/internal/database/writer/writer.go +++ b/cmd/symbols/internal/database/writer/writer.go @@ -4,11 +4,13 @@ import ( "context" "path/filepath" + "golang.org/x/sync/semaphore" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability" "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/cmd/symbols/parser" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/internal/api" "github.com/sourcegraph/sourcegraph/internal/diskcache" "github.com/sourcegraph/sourcegraph/lib/errors" @@ -22,21 +24,27 @@ type databaseWriter struct { path string gitserverClient gitserver.GitserverClient parser parser.Parser + sem *semaphore.Weighted } func NewDatabaseWriter( path string, gitserverClient gitserver.GitserverClient, parser parser.Parser, + sem *semaphore.Weighted, ) DatabaseWriter { return &databaseWriter{ path: path, gitserverClient: gitserverClient, parser: parser, + sem: sem, } } func (w *databaseWriter) WriteDBFile(ctx context.Context, args types.SearchArgs, dbFile string) error { + w.sem.Acquire(ctx, 1) + defer w.sem.Release(1) + if newestDBFile, oldCommit, ok, err := w.getNewestCommit(ctx, args); err != nil { return err } else if ok { diff --git a/cmd/symbols/internal/fetcher/gen.go b/cmd/symbols/internal/fetcher/gen.go deleted file mode 100644 index e1aace05b69..00000000000 --- a/cmd/symbols/internal/fetcher/gen.go +++ /dev/null @@ -1,3 +0,0 @@ -package fetcher - -//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go diff --git a/cmd/symbols/internal/parser/parser_factory_ctags.go b/cmd/symbols/internal/parser/parser_factory_ctags.go deleted file mode 100644 index 5e089095494..00000000000 --- a/cmd/symbols/internal/parser/parser_factory_ctags.go +++ /dev/null @@ -1,23 +0,0 @@ -package parser - -import ( - "log" - "os" - - "github.com/sourcegraph/go-ctags" -) - -func NewCtagsParserFactory(ctagsCommand string, patternLengthLimit int, logErrors, debugLogs bool) ParserFactory { - options := ctags.Options{ - Bin: ctagsCommand, - PatternLengthLimit: patternLengthLimit, - } - if logErrors { - options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags) - } - if debugLogs { - options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags) - } - - return func() (ctags.Parser, error) { return ctags.New(options) } -} diff --git a/cmd/symbols/internal/types/search_args.go b/cmd/symbols/internal/types/search_args.go deleted file mode 100644 index 553f9795eb2..00000000000 --- a/cmd/symbols/internal/types/search_args.go +++ /dev/null @@ -1,40 +0,0 @@ -package types - -import ( - "github.com/sourcegraph/sourcegraph/internal/api" -) - -// SearchArgs are the arguments to perform a search on the symbols service. -type SearchArgs struct { - // Repo is the name of the repository to search in. - Repo api.RepoName `json:"repo"` - - // CommitID is the commit to search in. - CommitID api.CommitID `json:"commitID"` - - // Query is the search query. - Query string - - // IsRegExp if true will treat the Pattern as a regular expression. - IsRegExp bool - - // IsCaseSensitive if false will ignore the case of query and file pattern - // when finding matches. - IsCaseSensitive bool - - // IncludePatterns is a list of regexes that symbol's file paths - // need to match to get included in the result - // - // The patterns are ANDed together; a file's path must match all patterns - // for it to be kept. That is also why it is a list (unlike the singular - // ExcludePattern); it is not possible in general to construct a single - // glob or Go regexp that represents multiple such patterns ANDed together. - IncludePatterns []string - - // ExcludePattern is an optional regex that symbol's file paths - // need to match to get included in the result - ExcludePattern string - - // First indicates that only the first n symbols should be returned. - First int -} diff --git a/cmd/symbols/main.go b/cmd/symbols/main.go index 64e4b07bc3c..bcdbabff357 100644 --- a/cmd/symbols/main.go +++ b/cmd/symbols/main.go @@ -3,128 +3,9 @@ package main import ( - "context" - "fmt" - "log" - "net/http" - "os" - "time" - - "github.com/inconshreveable/log15" - "github.com/opentracing/opentracing-go" - "github.com/prometheus/client_golang/prometheus" - - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api" - sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser" - "github.com/sourcegraph/sourcegraph/internal/actor" - "github.com/sourcegraph/sourcegraph/internal/conf" - "github.com/sourcegraph/sourcegraph/internal/debugserver" - "github.com/sourcegraph/sourcegraph/internal/diskcache" - "github.com/sourcegraph/sourcegraph/internal/env" - "github.com/sourcegraph/sourcegraph/internal/goroutine" - "github.com/sourcegraph/sourcegraph/internal/honey" - "github.com/sourcegraph/sourcegraph/internal/httpserver" - "github.com/sourcegraph/sourcegraph/internal/logging" - "github.com/sourcegraph/sourcegraph/internal/observation" - "github.com/sourcegraph/sourcegraph/internal/profiler" - "github.com/sourcegraph/sourcegraph/internal/sentry" - "github.com/sourcegraph/sourcegraph/internal/trace" - "github.com/sourcegraph/sourcegraph/internal/trace/ot" - "github.com/sourcegraph/sourcegraph/internal/tracer" + "github.com/sourcegraph/sourcegraph/cmd/symbols/shared" ) -const addr = ":3184" - func main() { - config.Load() - - // Set up Google Cloud Profiler when running in Cloud - if err := profiler.Init(); err != nil { - log.Fatalf("Failed to start profiler: %v", err) - } - - env.Lock() - env.HandleHelpFlag() - conf.Init() - logging.Init() - tracer.Init(conf.DefaultClient()) - sentry.Init(conf.DefaultClient()) - trace.Init() - - if err := config.Validate(); err != nil { - log.Fatalf("Failed to load configuration: %s", err) - } - - // Ensure we register our database driver before calling - // anything that tries to open a SQLite database. - sqlite.Init() - - if config.sanityCheck { - fmt.Print("Running sanity check...") - if err := sqlite.SanityCheck(); err != nil { - fmt.Println("failed ❌", err) - os.Exit(1) - } - - fmt.Println("passed ✅") - os.Exit(0) - } - - // Initialize tracing/metrics - observationContext := &observation.Context{ - Logger: log15.Root(), - Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()}, - Registerer: prometheus.DefaultRegisterer, - HoneyDataset: &honey.Dataset{ - Name: "codeintel-symbols", - SampleRate: 5, - }, - } - - // Start debug server - ready := make(chan struct{}) - go debugserver.NewServerRoutine(ready).Start() - - ctagsParserFactory := parser.NewCtagsParserFactory( - config.ctagsCommand, - config.ctagsPatternLengthLimit, - config.ctagsLogErrors, - config.ctagsDebugLogs, - ) - - cache := diskcache.NewStore(config.cacheDir, "symbols", - diskcache.WithBackgroundTimeout(config.processingTimeout), - diskcache.WithObservationContext(observationContext), - ) - - parserPool, err := parser.NewParserPool(ctagsParserFactory, config.numCtagsProcesses) - if err != nil { - log.Fatalf("Failed to create parser pool: %s", err) - } - - gitserverClient := gitserver.NewClient(observationContext) - repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, 15, config.maxTotalPathsLength, observationContext) - parser := parser.NewParser(parserPool, repositoryFetcher, config.requestBufferSize, config.numCtagsProcesses, observationContext) - databaseWriter := writer.NewDatabaseWriter(config.cacheDir, gitserverClient, parser) - cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache) - apiHandler := api.NewHandler(cachedDatabaseWriter, config.ctagsCommand, observationContext) - - server := httpserver.NewFromAddr(addr, &http.Server{ - ReadTimeout: 75 * time.Second, - WriteTimeout: 10 * time.Minute, - Handler: actor.HTTPMiddleware(ot.HTTPMiddleware(trace.HTTPMiddleware(apiHandler, conf.DefaultClient()))), - }) - - evictionInterval := time.Second * 10 - cacheSizeBytes := int64(config.cacheSizeMB) * 1000 * 1000 - cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext)) - - // Mark health server as ready and go! - close(ready) - goroutine.MonitorBackgroundRoutines(context.Background(), server, cacheEvicter) + shared.Main(shared.SetupSqlite) } diff --git a/cmd/symbols/internal/api/observability.go b/cmd/symbols/observability/observability.go similarity index 78% rename from cmd/symbols/internal/api/observability.go rename to cmd/symbols/observability/observability.go index 9c6f663e962..5d6d7cf63a1 100644 --- a/cmd/symbols/internal/api/observability.go +++ b/cmd/symbols/observability/observability.go @@ -1,4 +1,4 @@ -package api +package observability import ( "fmt" @@ -7,11 +7,11 @@ import ( "github.com/sourcegraph/sourcegraph/internal/observation" ) -type operations struct { - search *observation.Operation +type Operations struct { + Search *observation.Operation } -func newOperations(observationContext *observation.Context) *operations { +func NewOperations(observationContext *observation.Context) *Operations { metrics := metrics.NewREDMetrics( observationContext.Registerer, "codeintel_symbols_api", @@ -28,7 +28,7 @@ func newOperations(observationContext *observation.Context) *operations { }) } - return &operations{ - search: op("Search"), + return &Operations{ + Search: op("Search"), } } diff --git a/cmd/symbols/parser/filtering_parser.go b/cmd/symbols/parser/filtering_parser.go new file mode 100644 index 00000000000..34e8fadd6aa --- /dev/null +++ b/cmd/symbols/parser/filtering_parser.go @@ -0,0 +1,51 @@ +package parser + +import ( + "bytes" + + "github.com/sourcegraph/go-ctags" +) + +type FilteringParser struct { + parser ctags.Parser + maxFileSize int + maxSymbols int +} + +func NewFilteringParser(parser ctags.Parser, maxFileSize int, maxSymbols int) ctags.Parser { + return &FilteringParser{ + parser: parser, + maxFileSize: maxFileSize, + maxSymbols: maxSymbols, + } +} + +func (p *FilteringParser) Parse(path string, content []byte) ([]*ctags.Entry, error) { + if len(content) > p.maxFileSize { + // File is over 512KiB, don't parse it + return nil, nil + } + + // Check to see if first 256 bytes contain a 0x00. If so, we'll assume that + // the file is binary and skip parsing. Otherwise, we'll have some non-zero + // contents that passed our filters above to parse. + if bytes.IndexByte(content[:min(len(content), 256)], 0x00) >= 0 { + return nil, nil + } + + entries, err := p.parser.Parse(path, content) + if err != nil { + return nil, err + } + + if len(entries) > p.maxSymbols { + // File has too many symbols, don't return any of them + return nil, nil + } + + return entries, nil +} + +func (p *FilteringParser) Close() { + p.parser.Close() +} diff --git a/cmd/symbols/internal/parser/observability.go b/cmd/symbols/parser/observability.go similarity index 100% rename from cmd/symbols/internal/parser/observability.go rename to cmd/symbols/parser/observability.go diff --git a/cmd/symbols/internal/parser/parser.go b/cmd/symbols/parser/parser.go similarity index 97% rename from cmd/symbols/internal/parser/parser.go rename to cmd/symbols/parser/parser.go index 5d042a66c0a..ef601d1a7a3 100644 --- a/cmd/symbols/internal/parser/parser.go +++ b/cmd/symbols/parser/parser.go @@ -10,8 +10,8 @@ import ( "github.com/opentracing/opentracing-go/log" "github.com/sourcegraph/go-ctags" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher" - "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types" + "github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" "github.com/sourcegraph/sourcegraph/internal/observation" "github.com/sourcegraph/sourcegraph/internal/search/result" "github.com/sourcegraph/sourcegraph/lib/errors" @@ -123,6 +123,13 @@ func (p *parser) Parse(ctx context.Context, args types.SearchArgs, paths []strin return symbolOrErrors, nil } +func min(a, b int) int { + if a < b { + return a + } + return b +} + func (p *parser) handleParseRequest(ctx context.Context, symbolOrErrors chan<- SymbolOrError, parseRequest fetcher.ParseRequest, totalSymbols *uint32) (err error) { ctx, trace, endObservation := p.operations.handleParseRequest.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{ log.String("path", parseRequest.Path), diff --git a/cmd/symbols/internal/parser/parser_factory.go b/cmd/symbols/parser/parser_factory.go similarity index 100% rename from cmd/symbols/internal/parser/parser_factory.go rename to cmd/symbols/parser/parser_factory.go diff --git a/cmd/symbols/parser/parser_factory_ctags.go b/cmd/symbols/parser/parser_factory_ctags.go new file mode 100644 index 00000000000..b93f68a506f --- /dev/null +++ b/cmd/symbols/parser/parser_factory_ctags.go @@ -0,0 +1,31 @@ +package parser + +import ( + "log" + "os" + + "github.com/sourcegraph/go-ctags" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" +) + +func NewCtagsParserFactory(config types.CtagsConfig) ParserFactory { + options := ctags.Options{ + Bin: config.Command, + PatternLengthLimit: config.PatternLengthLimit, + } + if config.LogErrors { + options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags) + } + if config.DebugLogs { + options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags) + } + + return func() (ctags.Parser, error) { + parser, err := ctags.New(options) + if err != nil { + return nil, err + } + return NewFilteringParser(parser, config.MaxFileSize, config.MaxSymbols), nil + } +} diff --git a/cmd/symbols/internal/parser/parser_factory_ctags_test.go b/cmd/symbols/parser/parser_factory_ctags_test.go similarity index 94% rename from cmd/symbols/internal/parser/parser_factory_ctags_test.go rename to cmd/symbols/parser/parser_factory_ctags_test.go index 109aa2189b0..24ba3c22fb7 100644 --- a/cmd/symbols/internal/parser/parser_factory_ctags_test.go +++ b/cmd/symbols/parser/parser_factory_ctags_test.go @@ -8,6 +8,8 @@ import ( "github.com/google/go-cmp/cmp/cmpopts" "github.com/sourcegraph/go-ctags" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" ) func TestCtagsParser(t *testing.T) { @@ -16,7 +18,7 @@ func TestCtagsParser(t *testing.T) { t.Skip("command not in PATH: universal-ctags") } - p, err := NewCtagsParserFactory("universal-ctags", 250, false, false)() + p, err := NewCtagsParserFactory(types.CtagsConfig{Command: "universal-ctags", PatternLengthLimit: 250})() if err != nil { t.Fatal(err) } diff --git a/cmd/symbols/internal/parser/parser_pool.go b/cmd/symbols/parser/parser_pool.go similarity index 100% rename from cmd/symbols/internal/parser/parser_pool.go rename to cmd/symbols/parser/parser_pool.go diff --git a/cmd/symbols/shared/main.go b/cmd/symbols/shared/main.go new file mode 100644 index 00000000000..ea4328ef00d --- /dev/null +++ b/cmd/symbols/shared/main.go @@ -0,0 +1,87 @@ +package shared + +import ( + "context" + "log" + "net/http" + "time" + + "github.com/inconshreveable/log15" + "github.com/opentracing/opentracing-go" + "github.com/prometheus/client_golang/prometheus" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher" + "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" + "github.com/sourcegraph/sourcegraph/internal/actor" + "github.com/sourcegraph/sourcegraph/internal/conf" + "github.com/sourcegraph/sourcegraph/internal/debugserver" + "github.com/sourcegraph/sourcegraph/internal/env" + "github.com/sourcegraph/sourcegraph/internal/goroutine" + "github.com/sourcegraph/sourcegraph/internal/honey" + "github.com/sourcegraph/sourcegraph/internal/httpserver" + "github.com/sourcegraph/sourcegraph/internal/logging" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/profiler" + "github.com/sourcegraph/sourcegraph/internal/sentry" + "github.com/sourcegraph/sourcegraph/internal/trace" + "github.com/sourcegraph/sourcegraph/internal/trace/ot" + "github.com/sourcegraph/sourcegraph/internal/tracer" +) + +const addr = ":3184" + +type SetupFunc func(observationContext *observation.Context, gitserverClient gitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) + +func Main(setup SetupFunc) { + routines := []goroutine.BackgroundRoutine{} + + // Set up Google Cloud Profiler when running in Cloud + if err := profiler.Init(); err != nil { + log.Fatalf("Failed to start profiler: %v", err) + } + + // Initialize tracing/metrics + observationContext := &observation.Context{ + Logger: log15.Root(), + Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()}, + Registerer: prometheus.DefaultRegisterer, + HoneyDataset: &honey.Dataset{ + Name: "codeintel-symbols", + SampleRate: 5, + }, + } + // Run setup + gitserverClient := gitserver.NewClient(observationContext) + repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, types.LoadRepositoryFetcherConfig(env.BaseConfig{}).MaxTotalPathsLength, observationContext) + searchFunc, handleStatus, newRoutines, ctagsBinary, err := setup(observationContext, gitserverClient, repositoryFetcher) + if err != nil { + log.Fatalf("Failed to setup: %v", err) + } + routines = append(routines, newRoutines...) + + // Initialization + env.HandleHelpFlag() + conf.Init() + logging.Init() + tracer.Init(conf.DefaultClient()) + sentry.Init(conf.DefaultClient()) + trace.Init() + + // Start debug server + ready := make(chan struct{}) + go debugserver.NewServerRoutine(ready).Start() + + // Create HTTP server + server := httpserver.NewFromAddr(addr, &http.Server{ + ReadTimeout: 75 * time.Second, + WriteTimeout: 10 * time.Minute, + Handler: actor.HTTPMiddleware(ot.HTTPMiddleware(trace.HTTPMiddleware(api.NewHandler(searchFunc, handleStatus, ctagsBinary), conf.DefaultClient()))), + }) + routines = append(routines, server) + + // Mark health server as ready and go! + close(ready) + goroutine.MonitorBackgroundRoutines(context.Background(), routines...) +} diff --git a/cmd/symbols/shared/sqlite.go b/cmd/symbols/shared/sqlite.go new file mode 100644 index 00000000000..7edbad18ea1 --- /dev/null +++ b/cmd/symbols/shared/sqlite.go @@ -0,0 +1,71 @@ +package shared + +import ( + "fmt" + "log" + "net/http" + "os" + "time" + + "golang.org/x/sync/semaphore" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher" + "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api" + sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor" + "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer" + "github.com/sourcegraph/sourcegraph/cmd/symbols/observability" + "github.com/sourcegraph/sourcegraph/cmd/symbols/parser" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" + "github.com/sourcegraph/sourcegraph/internal/diskcache" + "github.com/sourcegraph/sourcegraph/internal/env" + "github.com/sourcegraph/sourcegraph/internal/goroutine" + "github.com/sourcegraph/sourcegraph/internal/observation" +) + +func SetupSqlite(observationContext *observation.Context, gitserverClient gitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) { + baseConfig := env.BaseConfig{} + config := types.LoadSqliteConfig(baseConfig) + if err := baseConfig.Validate(); err != nil { + log.Fatalf("Failed to load configuration: %s", err) + } + + // Ensure we register our database driver before calling + // anything that tries to open a SQLite database. + sqlite.Init() + + if config.SanityCheck { + fmt.Print("Running sanity check...") + if err := sqlite.SanityCheck(); err != nil { + fmt.Println("failed ❌", err) + os.Exit(1) + } + + fmt.Println("passed ✅") + os.Exit(0) + } + + ctagsParserFactory := parser.NewCtagsParserFactory(config.Ctags) + + parserPool, err := parser.NewParserPool(ctagsParserFactory, config.NumCtagsProcesses) + if err != nil { + log.Fatalf("Failed to create parser pool: %s", err) + } + + cache := diskcache.NewStore(config.CacheDir, "symbols", + diskcache.WithBackgroundTimeout(config.ProcessingTimeout), + diskcache.WithObservationContext(observationContext), + ) + + parser := parser.NewParser(parserPool, repositoryFetcher, config.RequestBufferSize, config.NumCtagsProcesses, observationContext) + databaseWriter := writer.NewDatabaseWriter(config.CacheDir, gitserverClient, parser, semaphore.NewWeighted(int64(config.MaxConcurrentlyIndexing))) + cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache) + searchFunc := api.MakeSqliteSearchFunc(observability.NewOperations(observationContext), cachedDatabaseWriter) + + evictionInterval := time.Second * 10 + cacheSizeBytes := int64(config.CacheSizeMB) * 1000 * 1000 + cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext)) + + return searchFunc, nil, []goroutine.BackgroundRoutine{cacheEvicter}, config.Ctags.Command, nil +} diff --git a/cmd/symbols/types/types.go b/cmd/symbols/types/types.go new file mode 100644 index 00000000000..712d3188f2f --- /dev/null +++ b/cmd/symbols/types/types.go @@ -0,0 +1,122 @@ +package types + +import ( + "context" + "os" + + "runtime" + "strconv" + "time" + + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/search/result" + + "github.com/sourcegraph/sourcegraph/internal/env" +) + +type SqliteConfig struct { + SanityCheck bool + CacheDir string + CacheSizeMB int + NumCtagsProcesses int + RequestBufferSize int + ProcessingTimeout time.Duration + Ctags CtagsConfig + RepositoryFetcher RepositoryFetcherConfig + MaxConcurrentlyIndexing int +} + +func LoadSqliteConfig(baseConfig env.BaseConfig) SqliteConfig { + return SqliteConfig{ + Ctags: LoadCtagsConfig(baseConfig), + RepositoryFetcher: LoadRepositoryFetcherConfig(baseConfig), + SanityCheck: baseConfig.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not"), + CacheDir: baseConfig.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols"), + CacheSizeMB: baseConfig.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)"), + NumCtagsProcesses: baseConfig.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run"), + RequestBufferSize: baseConfig.GetInt("REQUEST_BUFFER_SIZE", "8192", "maximum size of buffered parser request channel"), + ProcessingTimeout: baseConfig.GetInterval("PROCESSING_TIMEOUT", "2h", "maximum time to spend processing a repository"), + MaxConcurrentlyIndexing: baseConfig.GetInt("MAX_CONCURRENTLY_INDEXING", "10", "maximum number of repositories to index at a time"), + } +} + +type CtagsConfig struct { + Command string + PatternLengthLimit int + LogErrors bool + DebugLogs bool + MaxFileSize int + MaxSymbols int +} + +func LoadCtagsConfig(baseConfig env.BaseConfig) CtagsConfig { + logCtagsErrorsDefault := "false" + if os.Getenv("DEPLOY_TYPE") == "dev" { + logCtagsErrorsDefault = "true" + } + + return CtagsConfig{ + Command: baseConfig.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)"), + PatternLengthLimit: baseConfig.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags"), + LogErrors: baseConfig.GetBool("LOG_CTAGS_ERRORS", logCtagsErrorsDefault, "log ctags errors"), + DebugLogs: false, + MaxFileSize: baseConfig.GetInt("CTAGS_MAX_FILE_SIZE", "524288", "skip files larger than this size (in bytes)"), + MaxSymbols: baseConfig.GetInt("CTAGS_MAX_SYMBOLS", "2000", "skip files with more than this many symbols"), + } +} + +type RepositoryFetcherConfig struct { + // The maximum sum of lengths of all paths in a single call to git archive. Without this limit, we + // could hit the error "argument list too long" by exceeding the limit on the number of arguments to + // a command enforced by the OS. + // + // Mac : getconf ARG_MAX returns 1,048,576 + // Linux: getconf ARG_MAX returns 2,097,152 + // + // We want to remain well under that limit, so defaulting to 100,000 seems safe (see the + // MAX_TOTAL_PATHS_LENGTH environment variable below). + MaxTotalPathsLength int +} + +func LoadRepositoryFetcherConfig(baseConfig env.BaseConfig) RepositoryFetcherConfig { + return RepositoryFetcherConfig{ + MaxTotalPathsLength: baseConfig.GetInt("MAX_TOTAL_PATHS_LENGTH", "100000", "maximum sum of lengths of all paths in a single call to git archive"), + } +} + +type SearchFunc func(ctx context.Context, args SearchArgs) (results result.Symbols, err error) + +// SearchArgs are the arguments to perform a search on the symbols service. +type SearchArgs struct { + // Repo is the name of the repository to search in. + Repo api.RepoName `json:"repo"` + + // CommitID is the commit to search in. + CommitID api.CommitID `json:"commitID"` + + // Query is the search query. + Query string + + // IsRegExp if true will treat the Pattern as a regular expression. + IsRegExp bool + + // IsCaseSensitive if false will ignore the case of query and file pattern + // when finding matches. + IsCaseSensitive bool + + // IncludePatterns is a list of regexes that symbol's file paths + // need to match to get included in the result + // + // The patterns are ANDed together; a file's path must match all patterns + // for it to be kept. That is also why it is a list (unlike the singular + // ExcludePattern); it is not possible in general to construct a single + // glob or Go regexp that represents multiple such patterns ANDed together. + IncludePatterns []string + + // ExcludePattern is an optional regex that symbol's file paths + // need to match to get included in the result + ExcludePattern string + + // First indicates that only the first n symbols should be returned. + First int +} diff --git a/dev/check/go-dbconn-import.sh b/dev/check/go-dbconn-import.sh index ba5ca06b86f..9be11317a5e 100755 --- a/dev/check/go-dbconn-import.sh +++ b/dev/check/go-dbconn-import.sh @@ -20,6 +20,7 @@ allowed_prefix=( github.com/sourcegraph/sourcegraph/enterprise/cmd/worker github.com/sourcegraph/sourcegraph/enterprise/cmd/repo-updater github.com/sourcegraph/sourcegraph/enterprise/cmd/precise-code-intel- + github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols # Doesn't connect but uses db internals for use with sqlite github.com/sourcegraph/sourcegraph/cmd/symbols # Transitively depends on zoekt package which imports but does not use DB diff --git a/dev/sg/internal/db/db.go b/dev/sg/internal/db/db.go index 2385d328782..10ba23027ff 100644 --- a/dev/sg/internal/db/db.go +++ b/dev/sg/internal/db/db.go @@ -57,6 +57,7 @@ var ( "lsif_data_apidocs_num_pages", "lsif_data_apidocs_num_search_results_private", "lsif_data_apidocs_num_search_results_public", + "rockskip_ancestry", }, FS: getFSForPath("codeintel"), } diff --git a/doc/dev/background-information/ci/reference.md b/doc/dev/background-information/ci/reference.md index c145463d036..5afe39bb399 100644 --- a/doc/dev/background-information/ci/reference.md +++ b/doc/dev/background-information/ci/reference.md @@ -93,8 +93,8 @@ The run type for tags starting with `v`. Default pipeline: - **Pipeline setup**: Trigger async -- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server -- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server +- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server +- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server - **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite - **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint - **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build @@ -102,7 +102,7 @@ Default pipeline: - **CI script tests**: test-trace-command.sh - **Integration tests**: Backend integration tests, Code Intel QA - **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade -- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server +- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server - Upload build trace ### Release branch @@ -112,8 +112,8 @@ The run type for branches matching `^[0-9]+\.[0-9]+$` (regexp match). Default pipeline: - **Pipeline setup**: Trigger async -- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image -- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server +- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image +- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server - **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite - **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint - **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build @@ -121,7 +121,7 @@ Default pipeline: - **CI script tests**: test-trace-command.sh - **Integration tests**: Backend integration tests, Code Intel QA - **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade -- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image +- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image - Upload build trace ### Browser extension release build @@ -149,8 +149,8 @@ The run type for branches matching `main` (exact match). Default pipeline: - **Pipeline setup**: Trigger async -- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image -- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server +- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image +- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server - **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite - **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint - **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build @@ -158,7 +158,7 @@ Default pipeline: - **CI script tests**: test-trace-command.sh - **Integration tests**: Backend integration tests, Code Intel QA - **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade -- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image +- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image - Upload build trace ### Main dry run @@ -173,8 +173,8 @@ sg ci build main-dry-run Default pipeline: - **Pipeline setup**: Trigger async -- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image -- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server +- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image +- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server - **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite - **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint - **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build @@ -182,7 +182,7 @@ Default pipeline: - **CI script tests**: test-trace-command.sh - **Integration tests**: Backend integration tests, Code Intel QA - **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade -- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server +- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server - Upload build trace ### Patch image @@ -219,6 +219,7 @@ Default pipeline: - Build cadvisor - Build codeinsights-db - Build codeintel-db +- Build enterprise-symbols - Build frontend - Build github-proxy - Build gitserver diff --git a/doc/dev/background-information/sql/index.md b/doc/dev/background-information/sql/index.md index d68627fb058..2aaff59c986 100644 --- a/doc/dev/background-information/sql/index.md +++ b/doc/dev/background-information/sql/index.md @@ -6,3 +6,4 @@ Guidance and documentation about writing database interactions within the Source - High-performance guides - [Batch operations](batch_operations.md) - [Materialized cache](materialized_cache.md) +- [Locking behavior](locking_behavior.md) diff --git a/doc/dev/background-information/sql/locking_behavior.md b/doc/dev/background-information/sql/locking_behavior.md new file mode 100644 index 00000000000..988c9f93eff --- /dev/null +++ b/doc/dev/background-information/sql/locking_behavior.md @@ -0,0 +1,31 @@ +# Locking behavior + +When you're using [advisory locks](https://www.postgresql.org/docs/9.1/functions-admin.html#FUNCTIONS-ADVISORY-LOCKS) in Postgres, lock calls stack when executed on the same connection (A.K.A. session): + +- Connection 1 calls `pg_advisory_lock(42)`, acquires the lock and continues +- Connection 1 calls `pg_advisory_lock(42)`, this lock "stacks" with the previous call and continues +- Connection 2 calls `pg_advisory_lock(42)`, this blocks +- Connection 1 calls `pg_advisory_unlock(42)`, this pops one lock call off the stack and continues +- Connection 1 calls `pg_advisory_unlock(42)`, this pops the last lock call off the stack and continues +- Connection 2 finally acquires the lock and continues + +If you get connections from a pool (e.g. the standard `sql` library in Go maintains an internal pool of connections), you need to be aware of the locking behavior otherwise you might get unpredictable behavior or deadlock. You can get deterministic behavior by explicitly taking a connection from the pool (e.g. with `db.Conn()`). + +Here's an example of bad code that can deadlock if the connection happens to be different across lock calls: ❌ + +```go +// Grab a write lock +db.Exec("SELECT pg_advisory_lock(1)") +// Grab a read lock +db.Exec("SELECT pg_advisory_lock_shared(1)") // 💥 Can deadlock +``` + +Good code explicitly takes a connection out of the pool first ✅ + +```go +conn := db.Conn() +// Grab a write lock +conn.Exec("SELECT pg_advisory_lock(1)") +// Grab a read lock +conn.Exec("SELECT pg_advisory_lock_shared(1)") // OK, will not block +``` diff --git a/enterprise/cmd/symbols/Dockerfile b/enterprise/cmd/symbols/Dockerfile new file mode 100644 index 00000000000..f5762a5ebfa --- /dev/null +++ b/enterprise/cmd/symbols/Dockerfile @@ -0,0 +1,35 @@ +# NOTE: This layer of the docker image is also used in local development as a wrapper around universal-ctags +FROM sourcegraph/alpine-3.12:120059_2021-12-09_b34c7b2@sha256:9a1fde12f56fea02027cf4caeebdddfedb7b73bf8db6c16f7907a6e04a29134c AS ctags +# hadolint ignore=DL3002 +USER root + +COPY ctags-install-alpine.sh /ctags-install-alpine.sh +RUN /ctags-install-alpine.sh + +FROM sourcegraph/alpine-3.12:120059_2021-12-09_b34c7b2@sha256:9a1fde12f56fea02027cf4caeebdddfedb7b73bf8db6c16f7907a6e04a29134c AS symbols + +# TODO(security): This container should not run as root! +# +# See https://github.com/sourcegraph/sourcegraph/issues/13237 +# hadolint ignore=DL3002 +USER root + +ARG COMMIT_SHA="unknown" +ARG DATE="unknown" +ARG VERSION="unknown" + +LABEL org.opencontainers.image.revision=${COMMIT_SHA} +LABEL org.opencontainers.image.created=${DATE} +LABEL org.opencontainers.image.version=${VERSION} +LABEL com.sourcegraph.github.url=https://github.com/sourcegraph/sourcegraph/commit/${COMMIT_SHA} + +RUN apk add --no-cache bind-tools ca-certificates mailcap tini + +COPY ctags-install-alpine.sh /ctags-install-alpine.sh +RUN /ctags-install-alpine.sh + +ENV CACHE_DIR=/mnt/cache/enterprise-symbols +RUN mkdir -p ${CACHE_DIR} +EXPOSE 3184 +ENTRYPOINT ["/sbin/tini", "--", "/usr/local/bin/enterprise-symbols"] +COPY enterprise-symbols /usr/local/bin/ diff --git a/enterprise/cmd/symbols/build.sh b/enterprise/cmd/symbols/build.sh new file mode 100755 index 00000000000..740c5d72c00 --- /dev/null +++ b/enterprise/cmd/symbols/build.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +# This script builds the symbols docker image. + +cd "$(dirname "${BASH_SOURCE[0]}")/../../.." +set -eu + +OUTPUT=$(mktemp -d -t sgdockerbuild_XXXXXXX) +cleanup() { + rm -rf "$OUTPUT" +} +trap cleanup EXIT + +cp -a ./cmd/symbols/ctags-install-alpine.sh "$OUTPUT" + +# Build go binary into $OUTPUT +./enterprise/cmd/symbols/go-build.sh "$OUTPUT" + +echo "--- docker build" +docker build -f enterprise/cmd/symbols/Dockerfile -t "$IMAGE" "$OUTPUT" \ + --progress=plain \ + --build-arg COMMIT_SHA \ + --build-arg DATE \ + --build-arg VERSION diff --git a/enterprise/cmd/symbols/go-build.sh b/enterprise/cmd/symbols/go-build.sh new file mode 100755 index 00000000000..531d37ef9fe --- /dev/null +++ b/enterprise/cmd/symbols/go-build.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash + +# This script builds the symbols go binary. +# Requires a single argument which is the path to the target bindir. + +cd "$(dirname "${BASH_SOURCE[0]}")/../../.." +set -eu + +OUTPUT="${1:?no output path provided}" + +# Environment for building linux binaries +export GO111MODULE=on +export GOARCH=amd64 +export GOOS=linux + +# go-sqlite3 depends on cgo. Without cgo, it will build but it'll throw an error at query time. +export CGO_ENABLED=1 + +# Default CC to musl-gcc. +export CC="${CC:-musl-gcc}" + +if ! command -v "$CC" >/dev/null; then + echo "$CC not found. You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine. Run 'apt-get install -y musl-tools'." + exit 1 +fi + +# Make sure this is a musl compiler. +case "$CC" in + *musl*) + ;; + *) + echo "$CC doesn't look like a musl compiler. You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine. Run 'apt-get install -y musl-tools'." + exit 1 + ;; +esac + +echo "--- go build" +pkg="github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols" +env go build \ + -trimpath \ + -ldflags "-X github.com/sourcegraph/sourcegraph/internal/version.version=$VERSION -X github.com/sourcegraph/sourcegraph/internal/version.timestamp=$(date +%s)" \ + -buildmode exe \ + -tags dist \ + -o "$OUTPUT/enterprise-$(basename $pkg)" \ + "$pkg" + +# We can't use -v because the spawned container might not share +# the same file system (e.g. when we're already inside docker +# and the spawned docker container will be a sibling on the host). +# +# A workaround is to feed the file into the container via stdin: +# +# 'cat FILE | docker run ... -i ... sh -c "cat > FILE && ..."' +echo "--- sanity check" +# shellcheck disable=SC2002 +cat "$OUTPUT/enterprise-$(basename $pkg)" | docker run \ + --rm \ + -i \ + sourcegraph/alpine@sha256:ce099fbcd3cf70b338fc4cb2a4e1fa9ae847de21afdb0a849a393b87d94fb174 \ + sh -c "cat > /enterprise-symbols && chmod a+x /enterprise-symbols && env SANITY_CHECK=true /enterprise-symbols" diff --git a/enterprise/cmd/symbols/main.go b/enterprise/cmd/symbols/main.go new file mode 100644 index 00000000000..e0edc4bf4f4 --- /dev/null +++ b/enterprise/cmd/symbols/main.go @@ -0,0 +1,241 @@ +package main + +import ( + "context" + "database/sql" + "log" + "net/http" + "os" + "strings" + + "github.com/sourcegraph/go-ctags" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher" + symbolsGitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver" + symbolsParser "github.com/sourcegraph/sourcegraph/cmd/symbols/parser" + "github.com/sourcegraph/sourcegraph/cmd/symbols/shared" + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" + "github.com/sourcegraph/sourcegraph/enterprise/internal/rockskip" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/conf" + "github.com/sourcegraph/sourcegraph/internal/conf/conftypes" + connections "github.com/sourcegraph/sourcegraph/internal/database/connections/live" + "github.com/sourcegraph/sourcegraph/internal/env" + gitserver "github.com/sourcegraph/sourcegraph/internal/gitserver" + "github.com/sourcegraph/sourcegraph/internal/goroutine" + "github.com/sourcegraph/sourcegraph/internal/observation" + "github.com/sourcegraph/sourcegraph/internal/search/result" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +func main() { + reposVar := env.Get("ROCKSKIP_REPOS", "", "comma separated list of repositories to index (e.g. `github.com/torvalds/linux,github.com/pallets/flask`)") + repos := strings.Split(reposVar, ",") + + if env.Get("USE_ROCKSKIP", "false", "use Rockskip to index the repos specified in ROCKSKIP_REPOS") == "true" { + shared.Main(func(observationContext *observation.Context, gitserverClient symbolsGitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) { + rockskipSearchFunc, rockskipHandleStatus, rockskipBackgroundRoutines, rockskipCtagsCommand, err := SetupRockskip(observationContext, gitserverClient, repositoryFetcher) + if err != nil { + return nil, nil, nil, "", err + } + + // The blanks are the SQLite status endpoint (it's always nil) and the ctags command (same as + // Rockskip's). + sqliteSearchFunc, _, sqliteBackgroundRoutines, _, err := shared.SetupSqlite(observationContext, gitserverClient, repositoryFetcher) + if err != nil { + return nil, nil, nil, "", err + } + + searchFunc := func(ctx context.Context, args types.SearchArgs) (results result.Symbols, err error) { + if sliceContains(repos, string(args.Repo)) { + return rockskipSearchFunc(ctx, args) + } else { + return sqliteSearchFunc(ctx, args) + } + } + + return searchFunc, rockskipHandleStatus, append(rockskipBackgroundRoutines, sqliteBackgroundRoutines...), rockskipCtagsCommand, nil + }) + } else { + shared.Main(shared.SetupSqlite) + } +} + +func SetupRockskip(observationContext *observation.Context, gitserverClient symbolsGitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) { + baseConfig := env.BaseConfig{} + config := LoadRockskipConfig(baseConfig) + if err := baseConfig.Validate(); err != nil { + log.Fatalf("Failed to load configuration: %s", err) + } + + db := mustInitializeCodeIntelDB() + git := NewGitserver(repositoryFetcher) + createParser := func() rockskip.ParseSymbolsFunc { return createParserWithConfig(config.Ctags) } + server, err := rockskip.NewService(db, git, createParser, config.MaxConcurrentlyIndexing, config.MaxRepos, config.LogQueries, config.IndexRequestsQueueSize, config.SymbolsCacheSize, config.PathSymbolsCacheSize) + if err != nil { + return nil, nil, nil, config.Ctags.Command, err + } + + return server.Search, server.HandleStatus, nil, config.Ctags.Command, nil +} + +type RockskipConfig struct { + Ctags types.CtagsConfig + RepositoryFetcher types.RepositoryFetcherConfig + MaxRepos int + LogQueries bool + IndexRequestsQueueSize int + MaxConcurrentlyIndexing int + SymbolsCacheSize int + PathSymbolsCacheSize int +} + +func LoadRockskipConfig(baseConfig env.BaseConfig) RockskipConfig { + return RockskipConfig{ + Ctags: types.LoadCtagsConfig(baseConfig), + RepositoryFetcher: types.LoadRepositoryFetcherConfig(baseConfig), + MaxRepos: baseConfig.GetInt("MAX_REPOS", "1000", "maximum number of repositories to store in Postgres, with LRU eviction"), + LogQueries: baseConfig.GetBool("LOG_QUERIES", "false", "print search queries to stdout"), + IndexRequestsQueueSize: baseConfig.GetInt("INDEX_REQUESTS_QUEUE_SIZE", "1000", "how many index requests can be queued at once, at which point new requests will be rejected"), + MaxConcurrentlyIndexing: baseConfig.GetInt("MAX_CONCURRENTLY_INDEXING", "4", "maximum number of repositories being indexed at a time (also limits ctags processes)"), + SymbolsCacheSize: baseConfig.GetInt("SYMBOLS_CACHE_SIZE", "1000000", "how many tuples of (path, symbol name, int ID) to cache in memory"), + PathSymbolsCacheSize: baseConfig.GetInt("PATH_SYMBOLS_CACHE_SIZE", "100000", "how many sets of symbols for files to cache in memory"), + } +} + +func createParserWithConfig(config types.CtagsConfig) rockskip.ParseSymbolsFunc { + parser := mustCreateCtagsParser(config) + + return func(path string, bytes []byte) (symbols []rockskip.Symbol, err error) { + entries, err := parser.Parse(path, bytes) + if err != nil { + return nil, err + } + + symbols = []rockskip.Symbol{} + for _, entry := range entries { + symbols = append(symbols, rockskip.Symbol{ + Name: entry.Name, + Parent: entry.Parent, + Kind: entry.Kind, + Line: entry.Line, + }) + } + + return symbols, nil + } +} + +func mustCreateCtagsParser(ctagsConfig types.CtagsConfig) ctags.Parser { + options := ctags.Options{ + Bin: ctagsConfig.Command, + PatternLengthLimit: ctagsConfig.PatternLengthLimit, + } + if ctagsConfig.LogErrors { + options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags) + } + if ctagsConfig.DebugLogs { + options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags) + } + + parser, err := ctags.New(options) + if err != nil { + log.Fatalf("Failed to create new ctags parser: %s", err) + } + + return symbolsParser.NewFilteringParser(parser, ctagsConfig.MaxFileSize, ctagsConfig.MaxSymbols) +} + +func mustInitializeCodeIntelDB() *sql.DB { + dsn := conf.GetServiceConnectionValueAndRestartOnChange(func(serviceConnections conftypes.ServiceConnections) string { + return serviceConnections.CodeIntelPostgresDSN + }) + var ( + db *sql.DB + err error + ) + db, err = connections.EnsureNewCodeIntelDB(dsn, "symbols", &observation.TestContext) + if err != nil { + log.Fatalf("Failed to connect to codeintel database: %s", err) + } + + return db +} + +type Gitserver struct { + repositoryFetcher fetcher.RepositoryFetcher +} + +func NewGitserver(repositoryFetcher fetcher.RepositoryFetcher) Gitserver { + return Gitserver{repositoryFetcher: repositoryFetcher} +} + +func (g Gitserver) LogReverseEach(repo string, commit string, n int, onLogEntry func(entry rockskip.LogEntry) error) error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + command := gitserver.DefaultClient.Command("git", rockskip.LogReverseArgs(n, commit)...) + command.Repo = api.RepoName(repo) + // We run a single `git log` command and stream the output while the repo is being processed, which + // can take much longer than 1 minute (the default timeout). + command.DisableTimeout() + stdout, err := gitserver.StdoutReader(ctx, command) + if err != nil { + return err + } + defer stdout.Close() + + return errors.Wrap(rockskip.ParseLogReverseEach(stdout, onLogEntry), "ParseLogReverseEach") +} + +func (g Gitserver) RevListEach(repo string, commit string, onCommit func(commit string) (shouldContinue bool, err error)) error { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + command := gitserver.DefaultClient.Command("git", rockskip.RevListArgs(commit)...) + command.Repo = api.RepoName(repo) + command.DisableTimeout() + stdout, err := gitserver.StdoutReader(ctx, command) + if err != nil { + return err + } + defer stdout.Close() + + return rockskip.RevListEach(stdout, onCommit) +} + +func (g Gitserver) ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error { + if len(paths) == 0 { + return nil + } + + args := types.SearchArgs{Repo: api.RepoName(repo), CommitID: api.CommitID(commit)} + parseRequestOrErrors := g.repositoryFetcher.FetchRepositoryArchive(context.TODO(), args, paths) + defer func() { + // Ensure the channel is drained + for range parseRequestOrErrors { + } + }() + + for parseRequestOrError := range parseRequestOrErrors { + if parseRequestOrError.Err != nil { + return errors.Wrap(parseRequestOrError.Err, "FetchRepositoryArchive") + } + + err := onFile(parseRequestOrError.ParseRequest.Path, parseRequestOrError.ParseRequest.Data) + if err != nil { + return err + } + } + + return nil +} + +func sliceContains(slice []string, s string) bool { + for _, v := range slice { + if v == s { + return true + } + } + return false +} diff --git a/enterprise/dev/ci/images/images.go b/enterprise/dev/ci/images/images.go index 65af3e4f58b..44bc6c5d316 100644 --- a/enterprise/dev/ci/images/images.go +++ b/enterprise/dev/ci/images/images.go @@ -61,6 +61,7 @@ var DeploySourcegraphDockerImages = []string{ "cadvisor", "codeinsights-db", "codeintel-db", + "enterprise-symbols", "frontend", "github-proxy", "gitserver", diff --git a/enterprise/internal/rockskip/git.go b/enterprise/internal/rockskip/git.go new file mode 100644 index 00000000000..75d0ab36b81 --- /dev/null +++ b/enterprise/internal/rockskip/git.go @@ -0,0 +1,226 @@ +package rockskip + +import ( + "bufio" + "fmt" + "io" + + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +type LogEntry struct { + Commit string + PathStatuses []PathStatus +} + +type PathStatus struct { + Path string + Status StatusAMD +} + +type CommitStatus struct { + Commit string + Status StatusAMD +} + +type StatusAMD int + +const ( + AddedAMD StatusAMD = 0 + ModifiedAMD StatusAMD = 1 + DeletedAMD StatusAMD = 2 +) + +type StatusAD int + +const ( + AddedAD StatusAD = 0 + DeletedAD StatusAD = 1 +) + +type Git interface { + LogReverseEach(repo string, commit string, n int, onLogEntry func(logEntry LogEntry) error) error + RevListEach(repo string, commit string, onCommit func(commit string) (shouldContinue bool, err error)) error + ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error +} + +func LogReverseArgs(n int, givenCommit string) []string { + return []string{ + "log", + "--pretty=%H %P", + "--raw", + "-z", + "-m", + // --no-abbrev speeds up git log a lot + "--no-abbrev", + "--no-renames", + "--first-parent", + "--reverse", + "--ignore-submodules", + fmt.Sprintf("-%d", n), + givenCommit, + } +} + +func ParseLogReverseEach(stdout io.Reader, onLogEntry func(entry LogEntry) error) error { + reader := bufio.NewReader(stdout) + + var buf []byte + + for { + // abc... ... NULL '\n'? + + // Read the commit + commitBytes, err := reader.Peek(40) + if err == io.EOF { + break + } else if err != nil { + return err + } + commit := string(commitBytes) + + // Skip past the NULL byte + _, err = reader.ReadBytes(0) + if err != nil { + return err + } + + // A '\n' indicates a list of paths and their statuses is next + buf, err = reader.Peek(1) + if err == io.EOF { + err = onLogEntry(LogEntry{Commit: commit, PathStatuses: []PathStatus{}}) + if err != nil { + return err + } + break + } else if err != nil { + return err + } + if buf[0] == '\n' { + // A list of paths and their statuses is next + + // Skip the '\n' + discarded, err := reader.Discard(1) + if discarded != 1 { + return errors.Newf("discarded %d bytes, expected 1", discarded) + } else if err != nil { + return err + } + + pathStatuses := []PathStatus{} + for { + // :100644 100644 abc... def... M NULL file.txt NULL + // ^ 0 ^ 97 ^ 99 + + // A ':' indicates a path and its status is next + buf, err = reader.Peek(1) + if err == io.EOF { + break + } else if err != nil { + return err + } + if buf[0] != ':' { + break + } + + // Read the status from index 97 and skip to the path at index 99 + buf = make([]byte, 99) + read, err := io.ReadFull(reader, buf) + if read != 99 { + return errors.Newf("read %d bytes, expected 99", read) + } else if err != nil { + return err + } + + // Read the path + path, err := reader.ReadBytes(0) + if err != nil { + return err + } + path = path[:len(path)-1] // Drop the trailing NULL byte + + // Inspect the status + var status StatusAMD + statusByte := buf[97] + switch statusByte { + case 'A': + status = AddedAMD + case 'M': + status = ModifiedAMD + case 'D': + status = DeletedAMD + case 'T': + // Type changed. Check if it changed from a file to a submodule or vice versa, + // treating submodules as empty. + + isSubmodule := func(mode string) bool { + // Submodules are mode "160000". https://stackoverflow.com/questions/737673/how-to-read-the-mode-field-of-git-ls-trees-output#comment3519596_737877 + return mode == "160000" + } + + oldMode := string(buf[1:7]) + newMode := string(buf[8:14]) + + if isSubmodule(oldMode) && !isSubmodule(newMode) { + // It changed from a submodule to a file, so consider it added. + status = AddedAMD + break + } + + if !isSubmodule(oldMode) && isSubmodule(newMode) { + // It changed from a file to a submodule, so consider it deleted. + status = DeletedAMD + break + } + + // Otherwise, it remained the same, so ignore the type change. + continue + case 'C': + // Copied + return errors.Newf("unexpected status 'C' given --no-renames was specified") + case 'R': + // Renamed + return errors.Newf("unexpected status 'R' given --no-renames was specified") + case 'X': + return errors.Newf("unexpected status 'X' indicates a bug in git") + default: + fmt.Printf("LogReverse commit %q path %q: unrecognized diff status %q, skipping\n", commit, path, string(statusByte)) + continue + } + + pathStatuses = append(pathStatuses, PathStatus{Path: string(path), Status: status}) + } + + err = onLogEntry(LogEntry{Commit: commit, PathStatuses: pathStatuses}) + if err != nil { + return err + } + } + } + + return nil +} + +func RevListArgs(givenCommit string) []string { + return []string{"rev-list", "--first-parent", givenCommit} +} + +func RevListEach(stdout io.Reader, onCommit func(commit string) (shouldContinue bool, err error)) error { + reader := bufio.NewReader(stdout) + + for { + commit, err := reader.ReadString('\n') + if err == io.EOF { + break + } else if err != nil { + return err + } + commit = commit[:len(commit)-1] // Drop the trailing newline + shouldContinue, err := onCommit(commit) + if !shouldContinue { + return err + } + } + + return nil +} diff --git a/enterprise/internal/rockskip/index.go b/enterprise/internal/rockskip/index.go new file mode 100644 index 00000000000..649b43e12e6 --- /dev/null +++ b/enterprise/internal/rockskip/index.go @@ -0,0 +1,346 @@ +package rockskip + +import ( + "context" + "fmt" + + "k8s.io/utils/lru" + + "github.com/inconshreveable/log15" + + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +func (s *Service) Index(ctx context.Context, repo, givenCommit string) (err error) { + threadStatus := s.status.NewThreadStatus(fmt.Sprintf("indexing %s@%s", repo, givenCommit)) + defer threadStatus.End() + + tasklog := threadStatus.Tasklog + + // Get a fresh connection from the DB pool to get deterministic "lock stacking" behavior. + // See doc/dev/background-information/sql/locking_behavior.md for more details. + conn, err := s.db.Conn(ctx) + if err != nil { + return errors.Wrap(err, "failed to get connection for indexing") + } + defer conn.Close() + + // Acquire the indexing lock on the repo. + releaseLock, err := iLock(ctx, conn, threadStatus, repo) + if err != nil { + return err + } + defer func() { err = errors.CombineErrors(err, releaseLock()) }() + + tipCommit := NULL + tipCommitHash := "" + tipHeight := 0 + + var repoId int + err = conn.QueryRowContext(ctx, "SELECT id FROM rockskip_repos WHERE repo = $1", repo).Scan(&repoId) + if err != nil { + return errors.Wrapf(err, "failed to get repo id for %s", repo) + } + + missingCount := 0 + tasklog.Start("RevList") + err = s.git.RevListEach(repo, givenCommit, func(commitHash string) (shouldContinue bool, err error) { + defer tasklog.Continue("RevList") + + tasklog.Start("GetCommitByHash") + commit, height, present, err := GetCommitByHash(ctx, conn, repoId, commitHash) + if err != nil { + return false, err + } else if present { + tipCommit = commit + tipCommitHash = commitHash + tipHeight = height + return false, nil + } + missingCount += 1 + return true, nil + }) + if err != nil { + return errors.Wrap(err, "RevList") + } + + threadStatus.SetProgress(0, missingCount) + + if missingCount == 0 { + return nil + } + + parse := s.createParser() + + symbolCache := newSymbolIdCache(s.symbolsCacheSize) + pathSymbolsCache := newPathSymbolsCache(s.pathSymbolsCacheSize) + + tasklog.Start("Log") + entriesIndexed := 0 + err = s.git.LogReverseEach(repo, givenCommit, missingCount, func(entry LogEntry) error { + defer tasklog.Continue("Log") + + threadStatus.SetProgress(entriesIndexed, missingCount) + entriesIndexed++ + + tx, err := conn.BeginTx(ctx, nil) + if err != nil { + return errors.Wrap(err, "begin transaction") + } + defer tx.Rollback() + + hops, err := getHops(ctx, tx, tipCommit, tasklog) + if err != nil { + return errors.Wrap(err, "getHops") + } + + r := ruler(tipHeight + 1) + if r >= len(hops) { + return errors.Newf("ruler(%d) = %d is out of range of len(hops) = %d", tipHeight+1, r, len(hops)) + } + + tasklog.Start("InsertCommit") + commit, err := InsertCommit(ctx, tx, repoId, entry.Commit, tipHeight+1, hops[r]) + if err != nil { + return errors.Wrap(err, "InsertCommit") + } + + tasklog.Start("AppendHop+") + err = AppendHop(ctx, tx, repoId, hops[0:r], AddedAD, commit) + if err != nil { + return errors.Wrap(err, "AppendHop (added)") + } + tasklog.Start("AppendHop-") + err = AppendHop(ctx, tx, repoId, hops[0:r], DeletedAD, commit) + if err != nil { + return errors.Wrap(err, "AppendHop (deleted)") + } + + deletedPaths := []string{} + addedPaths := []string{} + for _, pathStatus := range entry.PathStatuses { + if pathStatus.Status == DeletedAMD || pathStatus.Status == ModifiedAMD { + deletedPaths = append(deletedPaths, pathStatus.Path) + } + if pathStatus.Status == AddedAMD || pathStatus.Status == ModifiedAMD { + addedPaths = append(addedPaths, pathStatus.Path) + } + } + + getSymbols := func(commit string, paths []string) (map[string]map[string]struct{}, error) { + pathToSymbols := map[string]map[string]struct{}{} + pathsToFetchSet := map[string]struct{}{} + for _, path := range paths { + pathsToFetchSet[path] = struct{}{} + } + + // Don't fetch files that are already in the cache. + if commit == tipCommitHash { + for _, path := range paths { + if symbols, ok := pathSymbolsCache.get(path); ok { + pathToSymbols[path] = symbols + delete(pathsToFetchSet, path) + } + } + } + + pathsToFetch := []string{} + for path := range pathsToFetchSet { + pathsToFetch = append(pathsToFetch, path) + } + + tasklog.Start("ArchiveEach") + err = s.git.ArchiveEach(repo, commit, pathsToFetch, func(path string, contents []byte) error { + defer tasklog.Continue("ArchiveEach") + + tasklog.Start("parse") + symbols, err := parse(path, contents) + if err != nil { + return errors.Wrap(err, "parse") + } + + pathToSymbols[path] = map[string]struct{}{} + for _, symbol := range symbols { + pathToSymbols[path][symbol.Name] = struct{}{} + } + + return nil + }) + + if err != nil { + return nil, errors.Wrap(err, "while looping ArchiveEach") + } + + // Cache the symbols we just parsed. + if commit != tipCommitHash { + for path, symbols := range pathToSymbols { + pathSymbolsCache.set(path, symbols) + } + } + + return pathToSymbols, nil + } + + symbolsFromDeletedFiles, err := getSymbols(tipCommitHash, deletedPaths) + if err != nil { + return errors.Wrap(err, "getSymbols (deleted)") + } + symbolsFromAddedFiles, err := getSymbols(entry.Commit, addedPaths) + if err != nil { + return errors.Wrap(err, "getSymbols (added)") + } + + // Compute the symmetric difference of symbols between the added and deleted paths. + deletedSymbols := map[string]map[string]struct{}{} + addedSymbols := map[string]map[string]struct{}{} + for _, pathStatus := range entry.PathStatuses { + switch pathStatus.Status { + case DeletedAMD: + deletedSymbols[pathStatus.Path] = symbolsFromDeletedFiles[pathStatus.Path] + case AddedAMD: + addedSymbols[pathStatus.Path] = symbolsFromAddedFiles[pathStatus.Path] + case ModifiedAMD: + deletedSymbols[pathStatus.Path] = map[string]struct{}{} + addedSymbols[pathStatus.Path] = map[string]struct{}{} + for name := range symbolsFromDeletedFiles[pathStatus.Path] { + if _, ok := symbolsFromAddedFiles[pathStatus.Path][name]; !ok { + deletedSymbols[pathStatus.Path][name] = struct{}{} + } + } + for name := range symbolsFromAddedFiles[pathStatus.Path] { + if _, ok := symbolsFromDeletedFiles[pathStatus.Path][name]; !ok { + addedSymbols[pathStatus.Path][name] = struct{}{} + } + } + } + } + + for path, symbols := range deletedSymbols { + for symbol := range symbols { + id := 0 + ok := false + if id, ok = symbolCache.get(path, symbol); !ok { + found := false + for _, hop := range hops { + tasklog.Start("GetSymbol") + id, found, err = GetSymbol(ctx, tx, repoId, path, symbol, hop) + if err != nil { + return err + } + if found { + break + } + } + if !found { + // We did not find the symbol that (supposedly) has been deleted, so ignore the + // deletion. This will probably lead to extra symbols in search results. + // + // The last time this happened, it was caused by impurity in ctags where the + // result of parsing a file was affected by previously parsed files and not fully + // determined by the file itself: + // + // https://github.com/universal-ctags/ctags/pull/3300 + log15.Error("Could not find symbol that was supposedly deleted", "repo", repo, "commit", commit, "path", path, "symbol", symbol) + continue + } + } + + tasklog.Start("UpdateSymbolHops") + err = UpdateSymbolHops(ctx, tx, id, DeletedAD, commit) + if err != nil { + return errors.Wrap(err, "UpdateSymbolHops") + } + } + } + + for path, symbols := range addedSymbols { + for symbol := range symbols { + tasklog.Start("InsertSymbol") + id, err := InsertSymbol(ctx, tx, commit, repoId, path, symbol) + if err != nil { + return errors.Wrap(err, "InsertSymbol") + } + symbolCache.set(path, symbol, id) + } + } + + tasklog.Start("DeleteRedundant") + err = DeleteRedundant(ctx, tx, commit) + if err != nil { + return errors.Wrap(err, "DeleteRedundant") + } + + tasklog.Start("CommitTx") + err = tx.Commit() + if err != nil { + return errors.Wrap(err, "commit transaction") + } + + tipCommit = commit + tipCommitHash = entry.Commit + tipHeight += 1 + + return nil + }) + if err != nil { + return errors.Wrap(err, "LogReverseEach") + } + + threadStatus.SetProgress(entriesIndexed, missingCount) + + return nil +} + +type repoCommit struct { + repo string + commit string +} + +type indexRequest struct { + repoCommit + done chan struct{} +} + +type symbolIdCache struct { + cache *lru.Cache +} + +func newSymbolIdCache(size int) *symbolIdCache { + return &symbolIdCache{cache: lru.New(size)} +} + +func (s *symbolIdCache) get(path, symbol string) (int, bool) { + v, ok := s.cache.Get(symbolIdCacheKey(path, symbol)) + if !ok { + return 0, false + } + return v.(int), true +} + +func (s *symbolIdCache) set(path, symbol string, id int) { + s.cache.Add(symbolIdCacheKey(path, symbol), id) +} + +func symbolIdCacheKey(path, symbol string) string { + return path + ":" + symbol +} + +type pathSymbolsCache struct { + cache *lru.Cache +} + +func newPathSymbolsCache(size int) *pathSymbolsCache { + return &pathSymbolsCache{cache: lru.New(size)} +} + +func (s *pathSymbolsCache) get(path string) (map[string]struct{}, bool) { + v, ok := s.cache.Get(path) + if !ok { + return nil, false + } + return v.(map[string]struct{}), true +} + +func (s *pathSymbolsCache) set(path string, symbols map[string]struct{}) { + s.cache.Add(path, symbols) +} diff --git a/enterprise/internal/rockskip/postgres.go b/enterprise/internal/rockskip/postgres.go new file mode 100644 index 00000000000..e40d38c9c7b --- /dev/null +++ b/enterprise/internal/rockskip/postgres.go @@ -0,0 +1,364 @@ +package rockskip + +import ( + "context" + "database/sql" + "fmt" + + pg "github.com/lib/pq" + "github.com/segmentio/fasthash/fnv1" + + "github.com/sourcegraph/sourcegraph/internal/database/basestore" + "github.com/sourcegraph/sourcegraph/internal/database/dbutil" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +type CommitId = int + +func GetCommitById(ctx context.Context, db dbutil.DB, givenCommit CommitId) (commitHash string, ancestor CommitId, height int, present bool, err error) { + err = db.QueryRowContext(ctx, ` + SELECT commit_id, ancestor, height + FROM rockskip_ancestry + WHERE id = $1 + `, givenCommit).Scan(&commitHash, &ancestor, &height) + if err == sql.ErrNoRows { + return "", 0, 0, false, nil + } else if err != nil { + return "", 0, 0, false, errors.Newf("GetCommitById: %s", err) + } + return commitHash, ancestor, height, true, nil +} + +func GetCommitByHash(ctx context.Context, db dbutil.DB, repoId int, commitHash string) (commit CommitId, height int, present bool, err error) { + err = db.QueryRowContext(ctx, ` + SELECT id, height + FROM rockskip_ancestry + WHERE repo_id = $1 AND commit_id = $2 + `, repoId, commitHash).Scan(&commit, &height) + if err == sql.ErrNoRows { + return 0, 0, false, nil + } else if err != nil { + return 0, 0, false, errors.Newf("GetCommitByHash: %s", err) + } + return commit, height, true, nil +} + +func InsertCommit(ctx context.Context, db dbutil.DB, repoId int, commitHash string, height int, ancestor CommitId) (id CommitId, err error) { + err = db.QueryRowContext(ctx, ` + INSERT INTO rockskip_ancestry (commit_id, repo_id, height, ancestor) + VALUES ($1, $2, $3, $4) + RETURNING id + `, commitHash, repoId, height, ancestor).Scan(&id) + return id, errors.Wrap(err, "InsertCommit") +} + +func GetSymbol(ctx context.Context, db dbutil.DB, repoId int, path string, name string, hop CommitId) (id int, found bool, err error) { + err = db.QueryRowContext(ctx, ` + SELECT id + FROM rockskip_symbols + WHERE repo_id = $1 AND path = $2 AND name = $3 AND $4 && added AND NOT $4 && deleted + `, repoId, path, name, pg.Array([]int{hop})).Scan(&id) + if err == sql.ErrNoRows { + return 0, false, nil + } else if err != nil { + return 0, false, errors.Newf("GetSymbol: %s", err) + } + return id, true, nil +} + +func UpdateSymbolHops(ctx context.Context, db dbutil.DB, id int, status StatusAD, hop CommitId) error { + column := statusADToColumn(status) + _, err := db.ExecContext(ctx, fmt.Sprintf(` + UPDATE rockskip_symbols + SET %s = array_append(%s, $1) + WHERE id = $2 + `, column, column), hop, id) + return errors.Wrap(err, "UpdateSymbolHops") +} + +func InsertSymbol(ctx context.Context, db dbutil.DB, hop CommitId, repoId int, path string, name string) (id int, err error) { + err = db.QueryRowContext(ctx, ` + INSERT INTO rockskip_symbols (added, deleted, repo_id, path, name) + VALUES ($1 , $2 , $3 , $4 , $5 ) + RETURNING id + `, pg.Array([]int{hop}), pg.Array([]int{}), repoId, path, name).Scan(&id) + return id, errors.Wrap(err, "InsertSymbol") +} + +func AppendHop(ctx context.Context, db dbutil.DB, repoId int, hops []CommitId, givenStatus StatusAD, newHop CommitId) error { + column := statusADToColumn(givenStatus) + _, err := db.ExecContext(ctx, fmt.Sprintf(` + UPDATE rockskip_symbols + SET %s = array_append(%s, $1) + WHERE $2 && singleton_integer(repo_id) AND $3 && %s + `, column, column, column), newHop, pg.Array([]int{repoId}), pg.Array(hops)) + return errors.Wrap(err, "AppendHop") +} + +func DeleteRedundant(ctx context.Context, db dbutil.DB, hop CommitId) error { + _, err := db.ExecContext(ctx, ` + UPDATE rockskip_symbols + SET added = array_remove(added, $1), deleted = array_remove(deleted, $1) + WHERE $2 && added AND $2 && deleted + `, hop, pg.Array([]int{hop})) + return errors.Wrap(err, "DeleteRedundant") +} + +func tryDeleteOldestRepo(ctx context.Context, db *sql.Conn, maxRepos int, threadStatus *ThreadStatus) (more bool, err error) { + defer threadStatus.Tasklog.Continue("idle") + + // Select a candidate repo to delete. + threadStatus.Tasklog.Start("select repo to delete") + var repoId int + var repo string + var repoRank int + err = db.QueryRowContext(ctx, ` + SELECT id, repo, repo_rank + FROM ( + SELECT *, RANK() OVER (ORDER BY last_accessed_at DESC) repo_rank + FROM rockskip_repos + ) sub + WHERE repo_rank > $1 + ORDER BY last_accessed_at ASC + LIMIT 1;`, maxRepos, + ).Scan(&repoId, &repo, &repoRank) + if err == sql.ErrNoRows { + // No more repos to delete. + return false, nil + } + if err != nil { + return false, errors.Wrap(err, "selecting repo to delete") + } + + // Note: a search request or deletion could have intervened here. + + // Acquire the write lock on the repo. + releaseWLock, err := wLock(ctx, db, threadStatus, repo) + defer func() { err = errors.CombineErrors(err, releaseWLock()) }() + if err != nil { + return false, errors.Wrap(err, "acquiring write lock on repo") + } + + // Make sure the repo is still old. See note above. + var rank int + threadStatus.Tasklog.Start("recheck repo rank") + err = db.QueryRowContext(ctx, ` + SELECT repo_rank + FROM ( + SELECT id, RANK() OVER (ORDER BY last_accessed_at DESC) repo_rank + FROM rockskip_repos + ) sub + WHERE id = $1;`, repoId, + ).Scan(&rank) + if err == sql.ErrNoRows { + // The repo was deleted in the meantime, so retry. + return true, nil + } + if err != nil { + return false, errors.Wrap(err, "selecting repo rank") + } + if rank <= maxRepos { + // An intervening search request must have refreshed the repo, so retry. + return true, nil + } + + // Acquire the indexing lock on the repo. + releaseILock, err := iLock(ctx, db, threadStatus, repo) + defer func() { err = errors.CombineErrors(err, releaseILock()) }() + if err != nil { + return false, errors.Wrap(err, "acquiring indexing lock on repo") + } + + // Delete the repo. + threadStatus.Tasklog.Start("delete repo") + tx, err := db.BeginTx(ctx, nil) + defer tx.Rollback() + if err != nil { + return false, err + } + _, err = tx.ExecContext(ctx, "DELETE FROM rockskip_ancestry WHERE repo_id = $1;", repoId) + if err != nil { + return false, err + } + _, err = tx.ExecContext(ctx, "DELETE FROM rockskip_symbols WHERE repo_id = $1;", pg.Array([]int{repoId})) + if err != nil { + return false, err + } + _, err = tx.ExecContext(ctx, "DELETE FROM rockskip_repos WHERE id = $1;", repoId) + if err != nil { + return false, err + } + err = tx.Commit() + if err != nil { + return false, err + } + + return true, nil +} + +func PrintInternals(ctx context.Context, db dbutil.DB) error { + fmt.Println("Commit ancestry:") + fmt.Println() + + // print all rows in the rockskip_ancestry table + rows, err := db.QueryContext(ctx, ` + SELECT a1.commit_id, a1.height, a2.commit_id + FROM rockskip_ancestry a1 + JOIN rockskip_ancestry a2 ON a1.ancestor = a2.id + ORDER BY height ASC + `) + if err != nil { + return errors.Wrap(err, "PrintInternals") + } + defer rows.Close() + + for rows.Next() { + var commit, ancestor string + var height int + err = rows.Scan(&commit, &height, &ancestor) + if err != nil { + return errors.Wrap(err, "PrintInternals: Scan") + } + fmt.Printf("height %3d commit %s ancestor %s\n", height, commit, ancestor) + } + + fmt.Println() + fmt.Println("Symbols:") + fmt.Println() + + rows, err = db.QueryContext(ctx, ` + SELECT id, path, name, added, deleted + FROM rockskip_symbols + ORDER BY id ASC + `) + if err != nil { + return errors.Wrap(err, "PrintInternals") + } + + for rows.Next() { + var id int + var path string + var name string + var added, deleted []int64 + err = rows.Scan(&id, &path, &name, pg.Array(&added), pg.Array(&deleted)) + if err != nil { + return errors.Wrap(err, "PrintInternals: Scan") + } + fmt.Printf(" id %d path %-10s symbol %s\n", id, path, name) + for _, a := range added { + hash, _, _, _, err := GetCommitById(ctx, db, int(a)) + if err != nil { + return err + } + fmt.Printf(" + %-40s\n", hash) + } + fmt.Println() + for _, d := range deleted { + hash, _, _, _, err := GetCommitById(ctx, db, int(d)) + if err != nil { + return err + } + fmt.Printf(" - %-40s\n", hash) + } + fmt.Println() + + } + + fmt.Println() + return nil +} + +func updateLastAccessedAt(ctx context.Context, db dbutil.DB, repo string) (id int, err error) { + err = db.QueryRowContext(ctx, ` + INSERT INTO rockskip_repos (repo, last_accessed_at) + VALUES ($1, now()) + ON CONFLICT (repo) + DO UPDATE SET last_accessed_at = now() + RETURNING id + `, repo).Scan(&id) + if err != nil { + return 0, err + } + + return id, nil +} + +func statusADToColumn(status StatusAD) string { + switch status { + case AddedAD: + return "added" + case DeletedAD: + return "deleted" + default: + fmt.Println("unexpected status StatusAD: ", status) + return "unknown_status" + } +} + +var RW_LOCKS_NAMESPACE = int32(fnv1.HashString32("symbols-rw")) +var INDEXING_LOCKS_NAMESPACE = int32(fnv1.HashString32("symbols-indexing")) + +func lock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, namespace int32, name, repo, lockFn, unlockFn string) (func() error, error) { + key := int32(fnv1.HashString32(repo)) + + threadStatus.Tasklog.Start(name) + _, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, lockFn), namespace, key) + if err != nil { + return nil, errors.Newf("acquire %s: %s", name, err) + } + threadStatus.HoldLock(name) + + release := func() error { + _, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, unlockFn), namespace, key) + if err != nil { + return errors.Newf("release %s: %s", name, err) + } + threadStatus.ReleaseLock(name) + return nil + } + + return release, nil +} + +func tryLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, namespace int32, name, repo, lockFn, unlockFn string) (bool, func() error, error) { + key := int32(fnv1.HashString32(repo)) + + threadStatus.Tasklog.Start(name) + locked, _, err := basestore.ScanFirstBool(db.QueryContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, lockFn), namespace, key)) + if err != nil { + return false, nil, errors.Newf("try acquire %s: %s", name, err) + } + + if !locked { + return false, nil, nil + } + + threadStatus.HoldLock(name) + + release := func() error { + _, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, unlockFn), namespace, key) + if err != nil { + return errors.Newf("release %s: %s", name, err) + } + threadStatus.ReleaseLock(name) + return nil + } + + return true, release, nil +} + +// tryRLock attempts to acquire a read lock on the repo. +func tryRLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (bool, func() error, error) { + return tryLock(ctx, db, threadStatus, RW_LOCKS_NAMESPACE, "rLock", repo, "pg_try_advisory_lock_shared", "pg_advisory_unlock_shared") +} + +// wLock acquires the write lock on the repo. It blocks only when another connection holds a read or the +// write lock. That means a single connection can acquire the write lock while holding a read lock. +func wLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (func() error, error) { + return lock(ctx, db, threadStatus, RW_LOCKS_NAMESPACE, "wLock", repo, "pg_advisory_lock", "pg_advisory_unlock") +} + +// iLock acquires the indexing lock on the repo. +func iLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (func() error, error) { + return lock(ctx, db, threadStatus, INDEXING_LOCKS_NAMESPACE, "iLock", repo, "pg_advisory_lock", "pg_advisory_unlock") +} diff --git a/enterprise/internal/rockskip/search.go b/enterprise/internal/rockskip/search.go new file mode 100644 index 00000000000..54beb8b6519 --- /dev/null +++ b/enterprise/internal/rockskip/search.go @@ -0,0 +1,469 @@ +package rockskip + +import ( + "context" + "database/sql" + "database/sql/driver" + "fmt" + "strings" + "time" + + "github.com/grafana/regexp" + "github.com/grafana/regexp/syntax" + "github.com/keegancsmith/sqlf" + pg "github.com/lib/pq" + "github.com/segmentio/fasthash/fnv1" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" + "github.com/sourcegraph/sourcegraph/internal/database/dbutil" + "github.com/sourcegraph/sourcegraph/internal/search/result" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +func (s *Service) Search(ctx context.Context, args types.SearchArgs) (symbols []result.Symbol, err error) { + repo := string(args.Repo) + commitHash := string(args.CommitID) + + threadStatus := s.status.NewThreadStatus(fmt.Sprintf("searching %+v", args)) + if s.logQueries { + defer threadStatus.Tasklog.Print() + } + defer threadStatus.End() + + // Acquire a read lock on the repo. + locked, releaseRLock, err := tryRLock(ctx, s.db, threadStatus, repo) + if err != nil { + return nil, err + } + defer func() { err = errors.CombineErrors(err, releaseRLock()) }() + if !locked { + return nil, errors.Newf("deletion in progress", repo) + } + + // Insert or set the last_accessed_at column for this repo to now() in the rockskip_repos table. + threadStatus.Tasklog.Start("update last_accessed_at") + repoId, err := updateLastAccessedAt(ctx, s.db, repo) + if err != nil { + return nil, err + } + + // Non-blocking send on repoUpdates to notify the background deletion goroutine. + select { + case s.repoUpdates <- struct{}{}: + default: + } + + // Check if the commit has already been indexed, and if not then index it. + threadStatus.Tasklog.Start("check commit presence") + commit, _, present, err := GetCommitByHash(ctx, s.db, repoId, commitHash) + if err != nil { + return nil, err + } else if !present { + + // Try to send an index request. + done, err := s.emitIndexRequest(repoCommit{repo: repo, commit: commitHash}) + if err != nil { + return nil, err + } + + // Wait for indexing to complete or the request to be canceled. + threadStatus.Tasklog.Start("awaiting indexing completion") + select { + case <-done: + threadStatus.Tasklog.Start("recheck commit presence") + commit, _, present, err = GetCommitByHash(ctx, s.db, repoId, commitHash) + if err != nil { + return nil, err + } + if !present { + return nil, errors.Newf("indexing failed, check server logs") + } + case <-ctx.Done(): + return nil, ctx.Err() + } + + } + + // Finally search. + symbols, err = s.querySymbols(ctx, args, repoId, commit, threadStatus) + if err != nil { + return nil, err + } + + return symbols, nil +} + +func mkIsMatch(args types.SearchArgs) (func(string) bool, error) { + if !args.IsRegExp { + if args.IsCaseSensitive { + return func(symbol string) bool { return strings.Contains(symbol, args.Query) }, nil + } else { + return func(symbol string) bool { + return strings.Contains(strings.ToLower(symbol), strings.ToLower(args.Query)) + }, nil + } + } + + expr := args.Query + if !args.IsCaseSensitive { + expr = "(?i)" + expr + } + + regex, err := regexp.Compile(expr) + if err != nil { + return nil, err + } + + if args.IsCaseSensitive { + return func(symbol string) bool { return regex.MatchString(symbol) }, nil + } else { + return func(symbol string) bool { return regex.MatchString(strings.ToLower(symbol)) }, nil + } +} + +func (s *Service) emitIndexRequest(rc repoCommit) (chan struct{}, error) { + key := fmt.Sprintf("%s@%s", rc.repo, rc.commit) + + s.repoCommitToDoneMu.Lock() + + if done, ok := s.repoCommitToDone[key]; ok { + s.repoCommitToDoneMu.Unlock() + return done, nil + } + + done := make(chan struct{}) + + s.repoCommitToDone[key] = done + s.repoCommitToDoneMu.Unlock() + go func() { + <-done + s.repoCommitToDoneMu.Lock() + delete(s.repoCommitToDone, key) + s.repoCommitToDoneMu.Unlock() + }() + + request := indexRequest{ + repoCommit: repoCommit{ + repo: rc.repo, + commit: rc.commit, + }, + done: done} + + // Route the index request to the indexer associated with the repo. + ix := int(fnv1.HashString32(rc.repo)) % len(s.indexRequestQueues) + + select { + case s.indexRequestQueues[ix] <- request: + default: + return nil, errors.Newf("the indexing queue is full") + } + + return done, nil +} + +const DEFAULT_LIMIT = 100 + +func (s *Service) querySymbols(ctx context.Context, args types.SearchArgs, repoId int, commit int, threadStatus *ThreadStatus) ([]result.Symbol, error) { + hops, err := getHops(ctx, s.db, commit, threadStatus.Tasklog) + if err != nil { + return nil, err + } + // Drop the null commit. + hops = hops[:len(hops)-1] + + limit := DEFAULT_LIMIT + if args.First > 0 { + limit = args.First + } + + threadStatus.Tasklog.Start("run query") + q := sqlf.Sprintf(` + SELECT DISTINCT path + FROM rockskip_symbols + WHERE + %s && singleton_integer(repo_id) + AND %s && added + AND NOT %s && deleted + AND %s + LIMIT %s;`, + pg.Array([]int{repoId}), + pg.Array(hops), + pg.Array(hops), + convertSearchArgsToSqlQuery(args), + limit, + ) + + start := time.Now() + var rows *sql.Rows + rows, err = s.db.QueryContext(ctx, q.Query(sqlf.PostgresBindVar), q.Args()...) + duration := time.Since(start) + if err != nil { + return nil, errors.Wrap(err, "Search") + } + defer rows.Close() + + isMatch, err := mkIsMatch(args) + if err != nil { + return nil, err + } + + paths := []string{} + for rows.Next() { + var path string + err = rows.Scan(&path) + if err != nil { + return nil, errors.Wrap(err, "Search: Scan") + } + paths = append(paths, path) + } + + stopErr := errors.New("stop iterating") + + symbols := []result.Symbol{} + + parse := s.createParser() + + threadStatus.Tasklog.Start("ArchiveEach") + err = s.git.ArchiveEach(string(args.Repo), string(args.CommitID), paths, func(path string, contents []byte) error { + defer threadStatus.Tasklog.Continue("ArchiveEach") + + threadStatus.Tasklog.Start("parse") + allSymbols, err := parse(path, contents) + if err != nil { + return err + } + + for _, symbol := range allSymbols { + if isMatch(symbol.Name) { + symbols = append(symbols, result.Symbol{ + Name: symbol.Name, + Path: path, + Line: symbol.Line, + Kind: symbol.Kind, + Parent: symbol.Parent, + }) + + if len(symbols) >= limit { + return stopErr + } + } + } + + return nil + }) + + if err != nil && err != stopErr { + return nil, err + } + + if s.logQueries { + err = logQuery(ctx, s.db, args, q, duration, len(symbols)) + if err != nil { + return nil, errors.Wrap(err, "logQuery") + } + } + + return symbols, nil +} + +func logQuery(ctx context.Context, db dbutil.DB, args types.SearchArgs, q *sqlf.Query, duration time.Duration, symbols int) error { + sb := &strings.Builder{} + + fmt.Fprintf(sb, "Search args: %+v\n", args) + + fmt.Fprintln(sb, "Query:") + query, err := sqlfToString(q) + if err != nil { + return errors.Wrap(err, "sqlfToString") + } + fmt.Fprintln(sb, query) + + fmt.Fprintln(sb, "EXPLAIN:") + explain, err := db.QueryContext(ctx, sqlf.Sprintf("EXPLAIN %s", q).Query(sqlf.PostgresBindVar), q.Args()...) + if err != nil { + return errors.Wrap(err, "EXPLAIN") + } + defer explain.Close() + for explain.Next() { + var plan string + err = explain.Scan(&plan) + if err != nil { + return errors.Wrap(err, "EXPLAIN Scan") + } + fmt.Fprintln(sb, plan) + } + + fmt.Fprintf(sb, "%.2fms, %d symbols", float64(duration.Microseconds())/1000, symbols) + + fmt.Println(" ") + fmt.Println(bracket(sb.String())) + fmt.Println(" ") + + return nil +} + +func bracket(text string) string { + lines := strings.Split(strings.TrimSpace(text), "\n") + for i, line := range lines { + if i == 0 { + lines[i] = "┌ " + line + } else if i == len(lines)-1 { + lines[i] = "└ " + line + } else { + lines[i] = "│ " + line + } + } + return strings.Join(lines, "\n") +} + +func sqlfToString(q *sqlf.Query) (string, error) { + s := q.Query(sqlf.PostgresBindVar) + for i, arg := range q.Args() { + argString, err := argToString(arg) + if err != nil { + return "", err + } + s = strings.ReplaceAll(s, fmt.Sprintf("$%d", i+1), argString) + } + return s, nil +} + +func argToString(arg interface{}) (string, error) { + switch arg := arg.(type) { + case string: + return fmt.Sprintf("'%s'", sqlEscapeQuotes(arg)), nil + case driver.Valuer: + value, err := arg.Value() + if err != nil { + return "", err + } + switch value := value.(type) { + case string: + return fmt.Sprintf("'%s'", sqlEscapeQuotes(value)), nil + case int: + return fmt.Sprintf("'%d'", value), nil + default: + return "", errors.Newf("unrecognized array type %T", value) + } + case int: + return fmt.Sprintf("%d", arg), nil + default: + return "", errors.Newf("unrecognized type %T", arg) + } +} + +func sqlEscapeQuotes(s string) string { + return strings.ReplaceAll(s, "'", "''") +} + +func convertSearchArgsToSqlQuery(args types.SearchArgs) *sqlf.Query { + // TODO support non regexp queries once the frontend supports it. + + conjunctOrNils := []*sqlf.Query{} + + // Query + conjunctOrNils = append(conjunctOrNils, regexMatch("name", "", args.Query, args.IsCaseSensitive)) + + // IncludePatterns + for _, includePattern := range args.IncludePatterns { + conjunctOrNils = append(conjunctOrNils, regexMatch("path", "path_prefixes(path)", includePattern, args.IsCaseSensitive)) + } + + // ExcludePattern + conjunctOrNils = append(conjunctOrNils, negate(regexMatch("path", "path_prefixes(path)", args.ExcludePattern, args.IsCaseSensitive))) + + // Drop nils + conjuncts := []*sqlf.Query{} + for _, condition := range conjunctOrNils { + if condition != nil { + conjuncts = append(conjuncts, condition) + } + } + + if len(conjuncts) == 0 { + return sqlf.Sprintf("TRUE") + } + + return sqlf.Join(conjuncts, "AND") +} + +func regexMatch(column, columnForLiteralPrefix, regex string, isCaseSensitive bool) *sqlf.Query { + if regex == "" || regex == "^" { + return nil + } + + // Exact match optimization + if literal, ok, err := isLiteralEquality(regex); err == nil && ok && isCaseSensitive { + return sqlf.Sprintf(fmt.Sprintf("%%s = %s", column), literal) + } + + // Prefix match optimization + if literal, ok, err := isLiteralPrefix(regex); err == nil && ok && isCaseSensitive && columnForLiteralPrefix != "" { + return sqlf.Sprintf(fmt.Sprintf("%%s && %s", columnForLiteralPrefix), pg.Array([]string{literal})) + } + + // Regex match + operator := "~" + if !isCaseSensitive { + operator = "~*" + } + + return sqlf.Sprintf(fmt.Sprintf("%s %s %%s", column, operator), regex) +} + +// isLiteralEquality returns true if the given regex matches literal strings exactly. +// If so, this function returns true along with the literal search query. If not, this +// function returns false. +func isLiteralEquality(expr string) (string, bool, error) { + regexp, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return "", false, errors.Wrap(err, "regexp/syntax.Parse") + } + + // want a concat of size 3 which is [begin, literal, end] + if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 3 { + // starts with ^ + if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText { + // is a literal + if regexp.Sub[1].Op == syntax.OpLiteral { + // ends with $ + if regexp.Sub[2].Op == syntax.OpEndLine || regexp.Sub[2].Op == syntax.OpEndText { + return string(regexp.Sub[1].Rune), true, nil + } + } + } + } + + return "", false, nil +} + +// isLiteralPrefix returns true if the given regex matches literal strings by prefix. +// If so, this function returns true along with the literal search query. If not, this +// function returns false. +func isLiteralPrefix(expr string) (string, bool, error) { + regexp, err := syntax.Parse(expr, syntax.Perl) + if err != nil { + return "", false, errors.Wrap(err, "regexp/syntax.Parse") + } + + // want a concat of size 2 which is [begin, literal] + if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 2 { + // starts with ^ + if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText { + // is a literal + if regexp.Sub[1].Op == syntax.OpLiteral { + return string(regexp.Sub[1].Rune), true, nil + } + } + } + + return "", false, nil +} + +func negate(query *sqlf.Query) *sqlf.Query { + if query == nil { + return nil + } + + return sqlf.Sprintf("NOT %s", query) +} diff --git a/enterprise/internal/rockskip/server.go b/enterprise/internal/rockskip/server.go new file mode 100644 index 00000000000..be900d1cda1 --- /dev/null +++ b/enterprise/internal/rockskip/server.go @@ -0,0 +1,160 @@ +package rockskip + +import ( + "context" + "database/sql" + "sync" + + "github.com/inconshreveable/log15" + + "github.com/sourcegraph/sourcegraph/internal/database/dbutil" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +type Symbol struct { + Name string `json:"name"` + Parent string `json:"parent"` + Kind string `json:"kind"` + Line int `json:"line"` +} + +type ParseSymbolsFunc func(path string, bytes []byte) (symbols []Symbol, err error) + +const NULL CommitId = 0 + +type Service struct { + db *sql.DB + git Git + createParser func() ParseSymbolsFunc + status *ServiceStatus + repoUpdates chan struct{} + maxRepos int + logQueries bool + repoCommitToDone map[string]chan struct{} + repoCommitToDoneMu sync.Mutex + indexRequestQueues []chan indexRequest + symbolsCacheSize int + pathSymbolsCacheSize int +} + +func NewService( + db *sql.DB, + git Git, + createParser func() ParseSymbolsFunc, + maxConcurrentlyIndexing int, + maxRepos int, + logQueries bool, + indexRequestsQueueSize int, + symbolsCacheSize int, + pathSymbolsCacheSize int, +) (*Service, error) { + indexRequestQueues := make([]chan indexRequest, maxConcurrentlyIndexing) + for i := 0; i < maxConcurrentlyIndexing; i++ { + indexRequestQueues[i] = make(chan indexRequest, indexRequestsQueueSize) + } + + service := &Service{ + db: db, + git: git, + createParser: createParser, + status: NewStatus(), + repoUpdates: make(chan struct{}, 1), + maxRepos: maxRepos, + logQueries: logQueries, + repoCommitToDone: map[string]chan struct{}{}, + repoCommitToDoneMu: sync.Mutex{}, + indexRequestQueues: indexRequestQueues, + symbolsCacheSize: symbolsCacheSize, + pathSymbolsCacheSize: pathSymbolsCacheSize, + } + + go service.startCleanupLoop() + + for i := 0; i < maxConcurrentlyIndexing; i++ { + go service.startIndexingLoop(service.indexRequestQueues[i]) + } + + return service, nil +} + +func (s *Service) startIndexingLoop(indexRequestQueue chan indexRequest) { + for indexRequest := range indexRequestQueue { + err := s.Index(context.Background(), indexRequest.repo, indexRequest.commit) + close(indexRequest.done) + if err != nil { + log15.Error("indexing error", "repo", indexRequest.repo, "commit", indexRequest.commit, "err", err) + } + } +} + +func (s *Service) startCleanupLoop() { + for range s.repoUpdates { + threadStatus := s.status.NewThreadStatus("cleanup") + err := DeleteOldRepos(context.Background(), s.db, s.maxRepos, threadStatus) + threadStatus.End() + if err != nil { + log15.Error("Failed to delete old repos", "error", err) + } + } +} + +func getHops(ctx context.Context, tx dbutil.DB, commit int, tasklog *TaskLog) ([]int, error) { + tasklog.Start("get hops") + + current := commit + spine := []int{current} + + for { + _, ancestor, _, present, err := GetCommitById(ctx, tx, current) + if err != nil { + return nil, errors.Wrap(err, "GetCommitById") + } else if !present { + break + } else { + if current == NULL { + break + } + current = ancestor + spine = append(spine, current) + } + } + + return spine, nil +} + +func DeleteOldRepos(ctx context.Context, db *sql.DB, maxRepos int, threadStatus *ThreadStatus) error { + // Get a fresh connection from the DB pool to get deterministic "lock stacking" behavior. + // See doc/dev/background-information/sql/locking_behavior.md for more details. + conn, err := db.Conn(context.Background()) + if err != nil { + return errors.Wrap(err, "failed to get connection for deleting old repos") + } + defer conn.Close() + + // Keep deleting repos until we're back to at most maxRepos. + for { + more, err := tryDeleteOldestRepo(ctx, conn, maxRepos, threadStatus) + if err != nil { + return err + } + if !more { + return nil + } + } +} + +// Ruler sequence +// +// input : 0, 1, 2, 3, 4, 5, 6, 7, 8, ... +// output: 0, 0, 1, 0, 2, 0, 1, 0, 3, ... +// +// https://oeis.org/A007814 +func ruler(n int) int { + if n == 0 { + return 0 + } + if n%2 != 0 { + return 0 + } + return 1 + ruler(n/2) +} diff --git a/enterprise/internal/rockskip/server_test.go b/enterprise/internal/rockskip/server_test.go new file mode 100644 index 00000000000..5377bc48969 --- /dev/null +++ b/enterprise/internal/rockskip/server_test.go @@ -0,0 +1,309 @@ +package rockskip + +import ( + "bufio" + "context" + "fmt" + "io" + "os" + "os/exec" + "path" + "sort" + "strconv" + "strings" + "testing" + + "github.com/google/go-cmp/cmp" + + "github.com/sourcegraph/sourcegraph/cmd/symbols/types" + "github.com/sourcegraph/sourcegraph/internal/api" + "github.com/sourcegraph/sourcegraph/internal/database/dbtest" + "github.com/sourcegraph/sourcegraph/lib/errors" +) + +// simpleParse converts each line into a symbol. +func simpleParse(path string, bytes []byte) ([]Symbol, error) { + symbols := []Symbol{} + + for _, line := range strings.Split(string(bytes), "\n") { + if line == "" { + continue + } + + symbols = append(symbols, Symbol{Name: line}) + } + + return symbols, nil +} + +func TestIndex(t *testing.T) { + fatalIfError := func(err error, message string) { + if err != nil { + t.Fatal(errors.Wrap(err, message)) + } + } + + gitDir, err := os.MkdirTemp("", "rockskip-test-index") + fatalIfError(err, "faiMkdirTemp") + + t.Cleanup(func() { + if t.Failed() { + t.Logf("git dir %s left intact for inspection", gitDir) + } else { + os.RemoveAll(gitDir) + } + }) + + gitCmd := func(args ...string) *exec.Cmd { + cmd := exec.Command("git", args...) + cmd.Dir = gitDir + return cmd + } + + gitRun := func(args ...string) { + fatalIfError(gitCmd(args...).Run(), "git "+strings.Join(args, " ")) + } + + gitStdout := func(args ...string) string { + stdout, err := gitCmd(args...).Output() + fatalIfError(err, "git "+strings.Join(args, " ")) + return string(stdout) + } + + getHead := func() string { + return strings.TrimSpace(gitStdout("rev-parse", "HEAD")) + } + + state := map[string][]string{} + + add := func(filename string, contents string) { + fatalIfError(os.WriteFile(path.Join(gitDir, filename), []byte(contents), 0644), "os.WriteFile") + gitRun("add", filename) + symbols, err := simpleParse(filename, []byte(contents)) + fatalIfError(err, "simpleParse") + state[filename] = []string{} + for _, symbol := range symbols { + state[filename] = append(state[filename], symbol.Name) + } + } + + rm := func(filename string) { + gitRun("rm", filename) + delete(state, filename) + } + + gitRun("init") + + git, err := NewSubprocessGit(gitDir) + fatalIfError(err, "NewSubprocessGit") + defer git.Close() + + db := dbtest.NewDB(t) + defer db.Close() + + createParser := func() ParseSymbolsFunc { return simpleParse } + + service, err := NewService(db, git, createParser, 1, 1, false, 1, 1, 1) + fatalIfError(err, "NewService") + + verifyBlobs := func() { + repo := "somerepo" + commit := getHead() + args := types.SearchArgs{Repo: api.RepoName(repo), CommitID: api.CommitID(commit), Query: ""} + symbols, err := service.Search(context.Background(), args) + fatalIfError(err, "Search") + + // Make sure the paths match. + gotPathSet := map[string]struct{}{} + for _, blob := range symbols { + gotPathSet[blob.Path] = struct{}{} + } + gotPaths := []string{} + for path := range gotPathSet { + gotPaths = append(gotPaths, path) + } + wantPaths := []string{} + for path := range state { + wantPaths = append(wantPaths, path) + } + sort.Strings(gotPaths) + sort.Strings(wantPaths) + if diff := cmp.Diff(gotPaths, wantPaths); diff != "" { + fmt.Println("unexpected paths (-got +want)") + fmt.Println(diff) + err = PrintInternals(context.Background(), db) + fatalIfError(err, "PrintInternals") + t.FailNow() + } + + gotPathToSymbols := map[string][]string{} + for _, blob := range symbols { + gotPathToSymbols[blob.Path] = append(gotPathToSymbols[blob.Path], blob.Name) + } + + // Make sure the symbols match. + for path, gotSymbols := range gotPathToSymbols { + wantSymbols := state[path] + sort.Strings(gotSymbols) + sort.Strings(wantSymbols) + if diff := cmp.Diff(gotSymbols, wantSymbols); diff != "" { + fmt.Println("unexpected symbols (-got +want)") + fmt.Println(diff) + err = PrintInternals(context.Background(), db) + fatalIfError(err, "PrintInternals") + t.FailNow() + } + } + } + + commit := func(message string) { + gitRun("commit", "--allow-empty", "-m", message) + verifyBlobs() + } + + add("a.txt", "sym1\n") + commit("add a file with 1 symbol") + + add("b.txt", "sym1\n") + commit("add another file with 1 symbol") + + add("c.txt", "sym1\nsym2") + commit("add another file with 2 symbols") + + add("a.txt", "sym1\nsym2") + commit("add a symbol to a.txt") + + commit("empty") + + rm("a.txt") + commit("rm a.txt") +} + +type SubprocessGit struct { + gitDir string + catFileCmd *exec.Cmd + catFileStdin io.WriteCloser + catFileStdout bufio.Reader +} + +func NewSubprocessGit(gitDir string) (*SubprocessGit, error) { + cmd := exec.Command("git", "cat-file", "--batch") + cmd.Dir = gitDir + + stdin, err := cmd.StdinPipe() + if err != nil { + return nil, err + } + + stdout, err := cmd.StdoutPipe() + if err != nil { + return nil, err + } + + err = cmd.Start() + if err != nil { + return nil, err + } + + return &SubprocessGit{ + gitDir: gitDir, + catFileCmd: cmd, + catFileStdin: stdin, + catFileStdout: *bufio.NewReader(stdout), + }, nil +} + +func (git SubprocessGit) Close() error { + err := git.catFileStdin.Close() + if err != nil { + return err + } + return git.catFileCmd.Wait() +} + +func (git SubprocessGit) LogReverseEach(repo string, givenCommit string, n int, onLogEntry func(entry LogEntry) error) (returnError error) { + log := exec.Command("git", LogReverseArgs(n, givenCommit)...) + log.Dir = git.gitDir + output, err := log.StdoutPipe() + if err != nil { + return err + } + + err = log.Start() + if err != nil { + return err + } + defer func() { + err = log.Wait() + if err != nil { + returnError = err + } + }() + + return ParseLogReverseEach(output, onLogEntry) +} + +func (git SubprocessGit) RevListEach(repo string, givenCommit string, onCommit func(commit string) (shouldContinue bool, err error)) (returnError error) { + revList := exec.Command("git", RevListArgs(givenCommit)...) + revList.Dir = git.gitDir + output, err := revList.StdoutPipe() + if err != nil { + return err + } + + err = revList.Start() + if err != nil { + return err + } + defer func() { + err = revList.Wait() + if err != nil { + returnError = err + } + }() + + return RevListEach(output, onCommit) +} + +func (git SubprocessGit) ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error { + for _, path := range paths { + _, err := git.catFileStdin.Write([]byte(fmt.Sprintf("%s:%s\n", commit, path))) + if err != nil { + return errors.Wrap(err, "writing to cat-file stdin") + } + + line, err := git.catFileStdout.ReadString('\n') + if err != nil { + return errors.Wrap(err, "read newline") + } + line = line[:len(line)-1] // Drop the trailing newline + parts := strings.Split(line, " ") + if len(parts) != 3 { + return errors.Newf("unexpected cat-file output: %q", line) + } + size, err := strconv.ParseInt(parts[2], 10, 64) + if err != nil { + return errors.Wrap(err, "parse size") + } + + fileContents, err := io.ReadAll(io.LimitReader(&git.catFileStdout, size)) + if err != nil { + return errors.Wrap(err, "read contents") + } + + discarded, err := git.catFileStdout.Discard(1) // Discard the trailing newline + if err != nil { + return errors.Wrap(err, "discard newline") + } + if discarded != 1 { + return errors.Newf("expected to discard 1 byte, but discarded %d", discarded) + } + + err = onFile(path, fileContents) + if err != nil { + return errors.Wrap(err, "onFile") + } + } + + return nil +} diff --git a/enterprise/internal/rockskip/status.go b/enterprise/internal/rockskip/status.go new file mode 100644 index 00000000000..309d9e1615a --- /dev/null +++ b/enterprise/internal/rockskip/status.go @@ -0,0 +1,312 @@ +package rockskip + +import ( + "fmt" + "net/http" + "sort" + "strings" + "sync" + "time" + + "github.com/inconshreveable/log15" + + "github.com/sourcegraph/sourcegraph/internal/database/basestore" +) + +// RequestId is a unique int for each HTTP request. +type RequestId = int + +// ServiceStatus contains the status of all requests. +type ServiceStatus struct { + threadIdToThreadStatus map[RequestId]*ThreadStatus + nextThreadId RequestId + mu sync.Mutex +} + +func NewStatus() *ServiceStatus { + return &ServiceStatus{ + threadIdToThreadStatus: map[int]*ThreadStatus{}, + nextThreadId: 0, + mu: sync.Mutex{}, + } +} + +func (s *ServiceStatus) NewThreadStatus(name string) *ThreadStatus { + s.mu.Lock() + defer s.mu.Unlock() + + threadId := s.nextThreadId + s.nextThreadId++ + + threadStatus := NewThreadStatus(name, func() { + s.mu.Lock() + defer s.mu.Unlock() + delete(s.threadIdToThreadStatus, threadId) + }) + + s.threadIdToThreadStatus[threadId] = threadStatus + + return threadStatus +} + +func (s *Service) HandleStatus(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + repositoryCount, _, err := basestore.ScanFirstInt(s.db.QueryContext(ctx, "SELECT COUNT(*) FROM rockskip_repos")) + if err != nil { + log15.Error("Failed to count repos", "error", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + type repoRow struct { + repo string + lastAccessedAt time.Time + } + + repoRows := []repoRow{} + repoSqlRows, err := s.db.QueryContext(ctx, "SELECT repo, last_accessed_at FROM rockskip_repos ORDER BY last_accessed_at DESC LIMIT 5") + if err != nil { + log15.Error("Failed to list repoRows", "error", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + defer repoSqlRows.Close() + for repoSqlRows.Next() { + var repo string + var lastAccessedAt time.Time + if err := repoSqlRows.Scan(&repo, &lastAccessedAt); err != nil { + log15.Error("Failed to scan repo", "error", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + repoRows = append(repoRows, repoRow{repo: repo, lastAccessedAt: lastAccessedAt}) + } + + symbolsSize, _, err := basestore.ScanFirstString(s.db.QueryContext(ctx, "SELECT pg_size_pretty(pg_total_relation_size('rockskip_symbols'))")) + if err != nil { + log15.Error("Failed to get size of symbols table", "error", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusOK) + fmt.Fprintln(w, "This is the symbols service status page.") + fmt.Fprintln(w, "") + + fmt.Fprintf(w, "Number of repositories: %d\n", repositoryCount) + fmt.Fprintf(w, "Size of symbols table: %s\n", symbolsSize) + fmt.Fprintln(w, "") + + if repositoryCount > 0 { + fmt.Fprintf(w, "Most recently searched repositories (at most 5 shown)\n") + for _, repo := range repoRows { + fmt.Fprintf(w, " %s %s\n", repo.lastAccessedAt, repo.repo) + } + fmt.Fprintln(w, "") + } + + s.status.mu.Lock() + defer s.status.mu.Unlock() + + if len(s.status.threadIdToThreadStatus) == 0 { + fmt.Fprintln(w, "No requests in flight.") + return + } + fmt.Fprintln(w, "Here are all in-flight requests:") + fmt.Fprintln(w, "") + + ids := []int{} + for id := range s.status.threadIdToThreadStatus { + ids = append(ids, id) + } + sort.Ints(ids) + + for _, id := range ids { + status := s.status.threadIdToThreadStatus[id] + status.WithLock(func() { + fmt.Fprintf(w, "%s\n", status.Name) + if status.Total > 0 { + progress := float64(status.Indexed) / float64(status.Total) + remaining := "unknown" + if progress != 0 { + total := status.Tasklog.TotalDuration() + remaining = fmt.Sprint(time.Duration(total.Seconds()/progress)*time.Second - total) + } + fmt.Fprintf(w, " progress %.2f%% (indexed %d of %d commits), %s remaining\n", progress*100, status.Indexed, status.Total, remaining) + } + fmt.Fprintf(w, " %s\n", status.Tasklog) + locks := []string{} + for lock := range status.HeldLocks { + locks = append(locks, lock) + } + sort.Strings(locks) + for _, lock := range locks { + fmt.Fprintf(w, " holding %s\n", lock) + } + fmt.Fprintln(w) + }) + } +} + +type ThreadStatus struct { + Tasklog *TaskLog + Name string + HeldLocks map[string]struct{} + Indexed int + Total int + mu sync.Mutex + onEnd func() +} + +func NewThreadStatus(name string, onEnd func()) *ThreadStatus { + return &ThreadStatus{ + Tasklog: NewTaskLog(), + Name: name, + HeldLocks: map[string]struct{}{}, + Indexed: -1, + Total: -1, + mu: sync.Mutex{}, + onEnd: onEnd, + } +} + +func (s *ThreadStatus) WithLock(f func()) { + s.mu.Lock() + defer s.mu.Unlock() + f() +} + +func (s *ThreadStatus) SetProgress(indexed, total int) { + s.WithLock(func() { s.Indexed = indexed; s.Total = total }) +} +func (s *ThreadStatus) HoldLock(name string) { s.WithLock(func() { s.HeldLocks[name] = struct{}{} }) } +func (s *ThreadStatus) ReleaseLock(name string) { s.WithLock(func() { delete(s.HeldLocks, name) }) } + +func (s *ThreadStatus) End() { + if s.onEnd != nil { + s.mu.Lock() + defer s.mu.Unlock() + s.onEnd() + } +} + +type TaskLog struct { + currentName string + currentStart time.Time + nameToTask map[string]*Task + // This mutex is only necessary to synchronize with the status page handler. + mu sync.Mutex +} + +type Task struct { + Duration time.Duration + Count int +} + +func NewTaskLog() *TaskLog { + return &TaskLog{ + currentName: "idle", + currentStart: time.Now(), + nameToTask: map[string]*Task{"idle": {Duration: 0, Count: 1}}, + mu: sync.Mutex{}, + } +} + +func (t *TaskLog) Start(name string) { + t.mu.Lock() + defer t.mu.Unlock() + + now := time.Now() + + if _, ok := t.nameToTask[t.currentName]; !ok { + t.nameToTask[t.currentName] = &Task{Duration: 0, Count: 0} + } + t.nameToTask[t.currentName].Duration += now.Sub(t.currentStart) + + if _, ok := t.nameToTask[name]; !ok { + t.nameToTask[name] = &Task{Duration: 0, Count: 0} + } + t.nameToTask[name].Count += 1 + + t.currentName = name + t.currentStart = now +} + +func (t *TaskLog) Continue(name string) { + t.mu.Lock() + defer t.mu.Unlock() + + now := time.Now() + + if _, ok := t.nameToTask[t.currentName]; !ok { + t.nameToTask[t.currentName] = &Task{Duration: 0, Count: 0} + } + t.nameToTask[t.currentName].Duration += now.Sub(t.currentStart) + + if _, ok := t.nameToTask[name]; !ok { + t.nameToTask[name] = &Task{Duration: 0, Count: 0} + } + + t.currentName = name + t.currentStart = now +} + +func (t *TaskLog) Reset() { + t.mu.Lock() + defer t.mu.Unlock() + + t.currentName = "idle" + t.currentStart = time.Now() + t.nameToTask = map[string]*Task{"idle": {Duration: 0, Count: 1}} +} + +func (t *TaskLog) Print() { + fmt.Println(t) +} + +func (t *TaskLog) String() string { + var s strings.Builder + + t.Continue(t.currentName) + + t.mu.Lock() + defer t.mu.Unlock() + + var total time.Duration = 0 + totalCount := 0 + for _, task := range t.nameToTask { + total += task.Duration + totalCount += task.Count + } + fmt.Fprintf(&s, "Tasks (%.2fs total, current %s): ", total.Seconds(), t.currentName) + + type kv struct { + Key string + Value *Task + } + + var kvs []kv + for k, v := range t.nameToTask { + kvs = append(kvs, kv{k, v}) + } + + sort.Slice(kvs, func(i, j int) bool { + return kvs[i].Value.Duration > kvs[j].Value.Duration + }) + + for _, kv := range kvs { + fmt.Fprintf(&s, "%s %.2f%% %dx, ", kv.Key, kv.Value.Duration.Seconds()*100/total.Seconds(), kv.Value.Count) + } + + return s.String() +} + +func (t *TaskLog) TotalDuration() time.Duration { + t.Continue(t.currentName) + var total time.Duration = 0 + for _, task := range t.nameToTask { + total += task.Duration + } + return total +} diff --git a/go.mod b/go.mod index 09cd3d72c9d..624d32b7c68 100644 --- a/go.mod +++ b/go.mod @@ -366,7 +366,7 @@ require ( gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b k8s.io/kube-openapi v0.0.0-20220124234850-424119656bbf // indirect - k8s.io/utils v0.0.0-20220127004650-9b3446523e65 // indirect + k8s.io/utils v0.0.0-20220127004650-9b3446523e65 mvdan.cc/gofumpt v0.2.1 // indirect sigs.k8s.io/yaml v1.3.0 ) diff --git a/internal/database/schema.codeintel.md b/internal/database/schema.codeintel.md index 1c98303b845..4b666f1b9cb 100755 --- a/internal/database/schema.codeintel.md +++ b/internal/database/schema.codeintel.md @@ -742,3 +742,51 @@ Indexes: "migration_logs_pkey" PRIMARY KEY, btree (id) ``` + +# Table "public.rockskip_ancestry" +``` + Column | Type | Collation | Nullable | Default +-----------+-----------------------+-----------+----------+----------------------------------------------- + id | integer | | not null | nextval('rockskip_ancestry_id_seq'::regclass) + repo_id | integer | | not null | + commit_id | character varying(40) | | not null | + height | integer | | not null | + ancestor | integer | | not null | +Indexes: + "rockskip_ancestry_pkey" PRIMARY KEY, btree (id) + "rockskip_ancestry_repo_id_commit_id_key" UNIQUE CONSTRAINT, btree (repo_id, commit_id) + "rockskip_ancestry_repo_commit_id" btree (repo_id, commit_id) + +``` + +# Table "public.rockskip_repos" +``` + Column | Type | Collation | Nullable | Default +------------------+--------------------------+-----------+----------+-------------------------------------------- + id | integer | | not null | nextval('rockskip_repos_id_seq'::regclass) + repo | text | | not null | + last_accessed_at | timestamp with time zone | | not null | +Indexes: + "rockskip_repos_pkey" PRIMARY KEY, btree (id) + "rockskip_repos_repo_key" UNIQUE CONSTRAINT, btree (repo) + "rockskip_repos_last_accessed_at" btree (last_accessed_at) + "rockskip_repos_repo" btree (repo) + +``` + +# Table "public.rockskip_symbols" +``` + Column | Type | Collation | Nullable | Default +---------+-----------+-----------+----------+---------------------------------------------- + id | integer | | not null | nextval('rockskip_symbols_id_seq'::regclass) + added | integer[] | | not null | + deleted | integer[] | | not null | + repo_id | integer | | not null | + path | text | | not null | + name | text | | not null | +Indexes: + "rockskip_symbols_pkey" PRIMARY KEY, btree (id) + "rockskip_symbols_gin" gin (singleton_integer(repo_id) gin__int_ops, added gin__int_ops, deleted gin__int_ops, singleton(path), path_prefixes(path), singleton(name), name gin_trgm_ops) + "rockskip_symbols_repo_id_path_name" btree (repo_id, path, name) + +``` diff --git a/migrations/codeintel/1000000032/down.sql b/migrations/codeintel/1000000032/down.sql new file mode 100644 index 00000000000..0bf541b08df --- /dev/null +++ b/migrations/codeintel/1000000032/down.sql @@ -0,0 +1,3 @@ +DROP TABLE IF EXISTS rockskip_ancestry; +DROP TABLE IF EXISTS rockskip_symbols; +DROP TABLE IF EXISTS rockskip_repos; diff --git a/migrations/codeintel/1000000032/metadata.yaml b/migrations/codeintel/1000000032/metadata.yaml new file mode 100644 index 00000000000..24027f1c4d0 --- /dev/null +++ b/migrations/codeintel/1000000032/metadata.yaml @@ -0,0 +1,2 @@ +name: 'rockskip' +parent: 1000000031 diff --git a/migrations/codeintel/1000000032/up.sql b/migrations/codeintel/1000000032/up.sql new file mode 100644 index 00000000000..1057ba84eba --- /dev/null +++ b/migrations/codeintel/1000000032/up.sql @@ -0,0 +1,71 @@ +CREATE TABLE IF NOT EXISTS rockskip_repos ( + id SERIAL PRIMARY KEY, + repo TEXT NOT NULL, + last_accessed_at TIMESTAMP WITH TIME ZONE NOT NULL, + UNIQUE (repo) +); + +CREATE TABLE IF NOT EXISTS rockskip_ancestry ( + id SERIAL PRIMARY KEY, + repo_id INTEGER NOT NULL, + commit_id VARCHAR(40) NOT NULL, + height INTEGER NOT NULL, + ancestor INTEGER NOT NULL, + UNIQUE (repo_id, commit_id) +); + +-- Insert the null commit. repo_id 0 will not conflict with other repos because SERIAL's MINVALUE +-- defaults to 1. +INSERT INTO rockskip_ancestry + (id, commit_id , repo_id , height, ancestor) +VALUES (0 , '0000000000000000000000000000000000000000', 0 , 0 , 0 ) +ON CONFLICT DO NOTHING; + +CREATE TABLE IF NOT EXISTS rockskip_symbols ( + -- Globally unique ID of this instance of the symbol. + id SERIAL PRIMARY KEY, + added INTEGER[] NOT NULL, + deleted INTEGER[] NOT NULL, + + -- Since we only support searching by symbol name and we re-parse the file at query time, symbols + -- with the same name in the same file only need to be stored once. Upon re-parsing the file at query + -- time we will discover all symbols that match. + repo_id INTEGER NOT NULL, + path TEXT NOT NULL, + name TEXT NOT NULL +); + +CREATE OR REPLACE FUNCTION singleton(value TEXT) RETURNS TEXT[] AS $$ BEGIN + RETURN ARRAY[value]; +END; $$ IMMUTABLE language plpgsql; + +CREATE OR REPLACE FUNCTION singleton_integer(value INTEGER) RETURNS INTEGER[] AS $$ BEGIN + RETURN ARRAY[value]; +END; $$ IMMUTABLE language plpgsql; + +CREATE OR REPLACE FUNCTION path_prefixes(path TEXT) RETURNS TEXT[] AS $$ BEGIN + RETURN ( + SELECT array_agg(array_to_string(components[:len], '/')) prefixes + FROM + (SELECT regexp_split_to_array(path, E'/') components) t, + generate_series(1, array_length(components, 1)) AS len + ); +END; $$ IMMUTABLE language plpgsql; + +CREATE INDEX IF NOT EXISTS rockskip_repos_repo ON rockskip_repos(repo); + +CREATE INDEX IF NOT EXISTS rockskip_repos_last_accessed_at ON rockskip_repos(last_accessed_at); + +CREATE INDEX IF NOT EXISTS rockskip_ancestry_repo_commit_id ON rockskip_ancestry(repo_id, commit_id); + +CREATE INDEX IF NOT EXISTS rockskip_symbols_repo_id_path_name ON rockskip_symbols(repo_id, path, name); + +CREATE INDEX IF NOT EXISTS rockskip_symbols_gin ON rockskip_symbols USING GIN ( + singleton_integer(repo_id) gin__int_ops, + added gin__int_ops, + deleted gin__int_ops, + singleton(path), + path_prefixes(path), + singleton(name), + name gin_trgm_ops +); diff --git a/sg.config.yaml b/sg.config.yaml index 968ca30b64d..d4bf09ed494 100644 --- a/sg.config.yaml +++ b/sg.config.yaml @@ -245,11 +245,6 @@ commands: symbols: cmd: .bin/symbols install: | - # Remove old pcre libs that might still be lying around. - # TODO delete these two lines after 2021-10-24 (1 month after removal of pcre). - rm -f libsqlite3-pcre.dylib || true - rm -f libsqlite3-pcre.so || true - if [ -n "$DELVE" ]; then export GCFLAGS='all=-N -l' fi @@ -265,6 +260,27 @@ commands: - internal - cmd/symbols + enterprise-symbols: + cmd: .bin/enterprise-symbols + install: | + if [ -n "$DELVE" ]; then + export GCFLAGS='all=-N -l' + fi + + ./cmd/symbols/build-ctags.sh && + go build -gcflags="$GCFLAGS" -o .bin/enterprise-symbols github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols + checkBinary: .bin/enterprise-symbols + env: + CTAGS_COMMAND: cmd/symbols/universal-ctags-dev + CTAGS_PROCESSES: 2 + USE_ROCKSKIP: 'false' + watch: + - lib + - internal + - cmd/symbols + - enterprise/cmd/symbols + - enterprise/internal/rockskip + searcher: cmd: .bin/searcher install: | @@ -425,8 +441,8 @@ commands: - enterprise/internal - lib/codeintel - executor-template: &executor_template - # TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`. + executor-template: + &executor_template # TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`. cmd: | env TMPDIR="$HOME/.sourcegraph/executor-temp" .bin/executor install: | @@ -450,7 +466,7 @@ commands: env TMPDIR="$HOME/.sourcegraph/indexer-temp" .bin/executor env: EXECUTOR_QUEUE_NAME: codeintel - SRC_PROF_HTTP: ":6092" + SRC_PROF_HTTP: ':6092' batches-executor: <<: *executor_template @@ -459,7 +475,7 @@ commands: env: EXECUTOR_QUEUE_NAME: batches EXECUTOR_MAXIMUM_NUM_JOBS: 8 - SRC_PROF_HTTP: ":6093" + SRC_PROF_HTTP: ':6093' # If you want to use this, either start it with `sg run batches-executor-firecracker` or # modify the `commandsets.batches` in your local `sg.config.overwrite.yaml` @@ -472,7 +488,7 @@ commands: env: EXECUTOR_USE_FIRECRACKER: true EXECUTOR_QUEUE_NAME: batches - SRC_PROF_HTTP: ":6093" + SRC_PROF_HTTP: ':6093' minio: cmd: | @@ -604,8 +620,8 @@ commands: CONTAINER: grafana PORT: 3370 # docker containers must access things via docker host on non-linux platforms - DOCKER_USER: "" - ADD_HOST_FLAG: "" + DOCKER_USER: '' + ADD_HOST_FLAG: '' CACHE: false watch: - monitoring @@ -655,11 +671,11 @@ commands: CONTAINER: prometheus PORT: 9090 CONFIG_DIR: docker-images/prometheus/config - DOCKER_USER: "" - DOCKER_NET: "" + DOCKER_USER: '' + DOCKER_NET: '' PROM_TARGETS: dev/prometheus/all/prometheus_targets.yml SRC_FRONTEND_INTERNAL: host.docker.internal:3090 - ADD_HOST_FLAG: "" + ADD_HOST_FLAG: '' DISABLE_SOURCEGRAPH_CONFIG: false postgres_exporter: @@ -686,7 +702,7 @@ commands: docker pull index.docker.io/grafana/loki:$LOKI_VERSION env: LOKI_DISK: $HOME/.sourcegraph-dev/data/loki - LOKI_VERSION: "2.3.0" + LOKI_VERSION: '2.3.0' LOKI_LOG_FILE: $HOME/.sourcegraph-dev/logs/loki/loki.log storybook: @@ -772,7 +788,7 @@ commandsets: - enterprise-web - gitserver - searcher - - symbols + - enterprise-symbols - caddy - docsite - syntax-highlighter