codeintel: Rockskip for symbols (#28719)

This commit is contained in:
Chris Wendt 2022-03-02 21:13:28 -07:00 committed by GitHub
parent 103572c4d8
commit 7a6a2a062b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
61 changed files with 3319 additions and 547 deletions

View File

@ -1,59 +0,0 @@
package main
import (
"os"
"runtime"
"strconv"
"time"
"github.com/sourcegraph/sourcegraph/internal/env"
)
type Config struct {
env.BaseConfig
ctagsCommand string
ctagsPatternLengthLimit int
ctagsLogErrors bool
ctagsDebugLogs bool
sanityCheck bool
cacheDir string
cacheSizeMB int
numCtagsProcesses int
requestBufferSize int
processingTimeout time.Duration
// The maximum sum of lengths of all paths in a single call to git archive. Without this limit, we
// could hit the error "argument list too long" by exceeding the limit on the number of arguments to
// a command enforced by the OS.
//
// Mac : getconf ARG_MAX returns 1,048,576
// Linux: getconf ARG_MAX returns 2,097,152
//
// We want to remain well under that limit, so defaulting to 100,000 seems safe (see the
// MAX_TOTAL_PATHS_LENGTH environment variable below).
maxTotalPathsLength int
}
var config = &Config{}
// Load reads from the environment and stores the transformed data on the config object for later retrieval.
func (c *Config) Load() {
c.ctagsCommand = c.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)")
c.ctagsPatternLengthLimit = c.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags")
logCtagsErrorsDefault := "false"
if os.Getenv("DEPLOY_TYPE") == "dev" {
logCtagsErrorsDefault = "true"
}
c.ctagsLogErrors = c.GetBool("LOG_CTAGS_ERRORS", logCtagsErrorsDefault, "log ctags errors")
c.ctagsDebugLogs = false
c.sanityCheck = c.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not")
c.cacheDir = c.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols")
c.cacheSizeMB = c.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)")
c.numCtagsProcesses = c.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run")
c.requestBufferSize = c.GetInt("REQUEST_BUFFER_SIZE", "8192", "maximum size of buffered parser request channel")
c.processingTimeout = c.GetInterval("PROCESSING_TIMEOUT", "2h", "maximum time to spend processing a repository")
c.maxTotalPathsLength = c.GetInt("MAX_TOTAL_PATHS_LENGTH", "100000", "maximum sum of lengths of all paths in a single call to git archive")
}

View File

@ -3,10 +3,8 @@
# This script installs ctags within an alpine container.
# Commit hash of github.com/universal-ctags/ctags.
# Last bumped 2022-02-28
# This version includes a fix that hasn't landed on master yet:
# https://github.com/universal-ctags/ctags/pull/3300
CTAGS_VERSION=90a16c009c52a35578140c6c731bcd5faa104f11
# Last bumped 2022-02-10
CTAGS_VERSION=37a4b3601288bcdc02a387197ff8d9b971f7ab34
cleanup() {
apk --no-cache --purge del ctags-build-deps || true

View File

@ -0,0 +1,3 @@
package fetcher
//go:generate ../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver -i GitServerClient -o mock_iface_test.go

View File

@ -7,14 +7,14 @@ import (
"io"
"sync"
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
api "github.com/sourcegraph/sourcegraph/internal/api"
)
// MockGitserverClient is a mock implementation of the GitserverClient
// interface (from the package
// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used
// for unit testing.
// github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver) used for unit
// testing.
type MockGitserverClient struct {
// FetchTarFunc is an instance of a mock function object controlling the
// behavior of the method FetchTar.

View File

@ -2,16 +2,14 @@ package fetcher
import (
"archive/tar"
"bytes"
"context"
"io"
"path"
"strings"
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
@ -22,7 +20,6 @@ type RepositoryFetcher interface {
type repositoryFetcher struct {
gitserverClient gitserver.GitserverClient
fetchSem chan int
operations *operations
maxTotalPathsLength int
}
@ -37,10 +34,9 @@ type parseRequestOrError struct {
Err error
}
func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maximumConcurrentFetches int, maxTotalPathsLength int, observationContext *observation.Context) RepositoryFetcher {
func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maxTotalPathsLength int, observationContext *observation.Context) RepositoryFetcher {
return &repositoryFetcher{
gitserverClient: gitserverClient,
fetchSem: make(chan int, maximumConcurrentFetches),
operations: newOperations(observationContext),
maxTotalPathsLength: maxTotalPathsLength,
}
@ -71,13 +67,6 @@ func (f *repositoryFetcher) fetchRepositoryArchive(ctx context.Context, args typ
}})
defer endObservation(1, observation.Args{})
onDefer, err := f.limitConcurrentFetches(ctx)
if err != nil {
return err
}
defer onDefer()
trace.Log(log.Event("acquired fetch semaphore"))
f.operations.fetching.Inc()
defer f.operations.fetching.Dec()
@ -136,19 +125,6 @@ func batchByTotalLength(paths []string, maxTotalLength int) [][]string {
return batches
}
func (f *repositoryFetcher) limitConcurrentFetches(ctx context.Context) (func(), error) {
f.operations.fetchQueueSize.Inc()
defer f.operations.fetchQueueSize.Dec()
select {
case f.fetchSem <- 1:
return func() { <-f.fetchSem }, nil
case <-ctx.Done():
return func() {}, ctx.Err()
}
}
func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request ParseRequest), traceLog observation.TraceLogger) error {
for {
if ctx.Err() != nil {
@ -156,89 +132,22 @@ func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request P
}
tarHeader, err := tarReader.Next()
if err != nil {
if err == io.EOF {
return nil
}
if err == io.EOF {
return nil
} else if err != nil {
return err
}
readTarHeader(tarReader, tarHeader, callback, traceLog)
}
}
if tarHeader.FileInfo().IsDir() || tarHeader.Typeflag == tar.TypeXGlobalHeader {
continue
}
func readTarHeader(tarReader *tar.Reader, tarHeader *tar.Header, callback func(request ParseRequest), trace observation.TraceLogger) error {
if !shouldParse(tarHeader) {
return nil
}
// 32MB is the same size used by io.Copy
buffer := make([]byte, 32*1024)
trace.Log(log.Event("reading tar header prefix"))
// Read first chunk of tar header contents
n, err := tarReader.Read(buffer)
if err != nil && err != io.EOF {
return err
}
trace.Log(log.Int("n", n))
if n == 0 {
// Empty file, nothing to parse
return nil
}
// Check to see if first 256 bytes contain a 0x00. If so, we'll assume that
// the file is binary and skip parsing. Otherwise, we'll have some non-zero
// contents that passed our filters above to parse.
m := 256
if n < m {
m = n
}
if bytes.IndexByte(buffer[:m], 0x00) >= 0 {
return nil
}
// Copy buffer into appropriately-sized slice for return
data := make([]byte, int(tarHeader.Size))
copy(data, buffer[:n])
if n < int(tarHeader.Size) {
trace.Log(log.Event("reading remaining tar header content"))
// Read the remaining contents
if _, err := io.ReadFull(tarReader, data[n:]); err != nil {
data := make([]byte, int(tarHeader.Size))
traceLog.Log(log.Event("reading tar file contents"))
if _, err := io.ReadFull(tarReader, data); err != nil {
return err
}
trace.Log(log.Int("n", int(tarHeader.Size)-n))
traceLog.Log(log.Int("n", int(tarHeader.Size)))
callback(ParseRequest{Path: tarHeader.Name, Data: data})
}
request := ParseRequest{Path: tarHeader.Name, Data: data}
callback(request)
return nil
}
// maxFileSize (512KB) is the maximum size of files we attempt to parse.
const maxFileSize = 1 << 19
func shouldParse(tarHeader *tar.Header) bool {
// We do not search large files
if tarHeader.Size > maxFileSize {
return false
}
// We only care about files
if tarHeader.Typeflag != tar.TypeReg && tarHeader.Typeflag != tar.TypeRegA {
return false
}
// JSON files are symbol-less
if path.Ext(tarHeader.Name) == ".json" {
return false
}
return true
}

View File

@ -7,8 +7,8 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
@ -29,16 +29,10 @@ func TestRepositoryFetcher(t *testing.T) {
tarContents[name] = content
}
// JSON is ignored
tarContents["ignored.json"] = "{}"
// Large files are ignored
tarContents["payloads.txt"] = strings.Repeat("oversized load", maxFileSize)
gitserverClient := NewMockGitserverClient()
gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(tarContents))
repositoryFetcher := NewRepositoryFetcher(gitserverClient, 15, 1000, &observation.TestContext)
repositoryFetcher := NewRepositoryFetcher(gitserverClient, 1000, &observation.TestContext)
args := types.SearchArgs{Repo: api.RepoName("foo"), CommitID: api.CommitID("deadbeef")}
t.Run("all paths", func(t *testing.T) {

View File

@ -4,7 +4,6 @@ import (
"bytes"
"context"
"io"
"strings"
"github.com/opentracing/opentracing-go/log"
@ -47,7 +46,6 @@ func (c *gitserverClient) FetchTar(ctx context.Context, repo api.RepoName, commi
log.String("repo", string(repo)),
log.String("commit", string(commit)),
log.Int("paths", len(paths)),
log.String("paths", strings.Join(paths, ":")),
}})
defer endObservation(1, observation.Args{})

View File

@ -16,15 +16,36 @@ export GOOS=linux
# go-sqlite3 depends on cgo. Without cgo, it will build but it'll throw an error at query time.
export CGO_ENABLED=1
# Ensure musl-gcc is available since we're building to run on Alpine, which uses musl.
if ! command -v musl-gcc >/dev/null; then
echo "musl-gcc not found, which is needed for cgo for go-sqlite3. Run 'apt-get install -y musl-tools'."
# Default CC to musl-gcc.
export CC="${CC:-musl-gcc}"
help() {
echo "You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine."
echo
echo " Linux: run 'apt-get install -y musl-tools'"
echo " macOS: download https://github.com/FiloSottile/homebrew-musl-cross/blob/6ee3329ee41231fe693306490f8e4d127c70e618/musl-cross.rb and run 'brew install ~/Downloads/musl-cross.rb'"
}
if ! command -v "$CC" >/dev/null; then
echo "$CC not found."
help
exit 1
fi
# Make sure this is a musl compiler.
case "$CC" in
*musl*)
;;
*)
echo "$CC doesn't look like a musl compiler."
help
exit 1
;;
esac
echo "--- go build"
pkg="github.com/sourcegraph/sourcegraph/cmd/symbols"
env CC=musl-gcc go build \
env go build \
-trimpath \
-ldflags "-X github.com/sourcegraph/sourcegraph/internal/version.version=$VERSION -X github.com/sourcegraph/sourcegraph/internal/version.timestamp=$(date +%s)" \
-buildmode exe \

View File

@ -1,3 +1,3 @@
package api
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver -i GitServerClient -o mock_iface_test.go

View File

@ -9,86 +9,71 @@ import (
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
type apiHandler struct {
cachedDatabaseWriter writer.CachedDatabaseWriter
ctagsBinary string
operations *operations
}
func NewHandler(
cachedDatabaseWriter writer.CachedDatabaseWriter,
searchFunc types.SearchFunc,
handleStatus func(http.ResponseWriter, *http.Request),
ctagsBinary string,
observationContext *observation.Context,
) http.Handler {
h := newAPIHandler(cachedDatabaseWriter, ctagsBinary, observationContext)
mux := http.NewServeMux()
mux.HandleFunc("/search", h.handleSearch)
mux.HandleFunc("/healthz", h.handleHealthCheck)
mux.HandleFunc("/list-languages", h.handleListLanguages)
return mux
}
func newAPIHandler(
cachedDatabaseWriter writer.CachedDatabaseWriter,
ctagsBinary string,
observationContext *observation.Context,
) *apiHandler {
return &apiHandler{
cachedDatabaseWriter: cachedDatabaseWriter,
ctagsBinary: ctagsBinary,
operations: newOperations(observationContext),
mux.HandleFunc("/search", handleSearchWith(searchFunc))
mux.HandleFunc("/healthz", handleHealthCheck)
mux.HandleFunc("/list-languages", handleListLanguages(ctagsBinary))
if handleStatus != nil {
mux.HandleFunc("/status", handleStatus)
}
return mux
}
const maxNumSymbolResults = 500
func (h *apiHandler) handleSearch(w http.ResponseWriter, r *http.Request) {
var args types.SearchArgs
if err := json.NewDecoder(r.Body).Decode(&args); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if args.First < 0 || args.First > maxNumSymbolResults {
args.First = maxNumSymbolResults
}
result, err := h.handleSearchInternal(r.Context(), args)
if err != nil {
// Ignore reporting errors where client disconnected
if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) {
func handleSearchWith(searchFunc types.SearchFunc) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
var args types.SearchArgs
if err := json.NewDecoder(r.Body).Decode(&args); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
log15.Error("Symbol search failed", "args", args, "error", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if args.First < 0 || args.First > maxNumSymbolResults {
args.First = maxNumSymbolResults
}
if err := json.NewEncoder(w).Encode(result); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
result, err := searchFunc(r.Context(), args)
if err != nil {
// Ignore reporting errors where client disconnected
if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) {
return
}
log15.Error("Symbol search failed", "args", args, "error", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
}
func (h *apiHandler) handleListLanguages(w http.ResponseWriter, r *http.Request) {
mapping, err := ctags.ListLanguageMappings(r.Context(), h.ctagsBinary)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(mapping); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
func handleListLanguages(ctagsBinary string) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
mapping, err := ctags.ListLanguageMappings(r.Context(), ctagsBinary)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(mapping); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
}
func (h *apiHandler) handleHealthCheck(w http.ResponseWriter, r *http.Request) {
func handleHealthCheck(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
if _, err := w.Write([]byte("OK")); err != nil {

View File

@ -9,12 +9,14 @@ import (
"time"
"github.com/sourcegraph/go-ctags"
"golang.org/x/sync/semaphore"
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
sharedobservability "github.com/sourcegraph/sourcegraph/cmd/symbols/observability"
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
"github.com/sourcegraph/sourcegraph/internal/observation"
@ -50,10 +52,10 @@ func TestHandler(t *testing.T) {
gitserverClient := NewMockGitserverClient()
gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(files))
parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 15, 1000, &observation.TestContext), 0, 10, &observation.TestContext)
databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser)
parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 1000, &observation.TestContext), 0, 10, &observation.TestContext)
databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser, semaphore.NewWeighted(1))
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
handler := NewHandler(cachedDatabaseWriter, "", &observation.TestContext)
handler := NewHandler(MakeSqliteSearchFunc(sharedobservability.NewOperations(&observation.TestContext), cachedDatabaseWriter), nil, "")
server := httptest.NewServer(handler)
defer server.Close()

View File

@ -7,14 +7,14 @@ import (
"io"
"sync"
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
api "github.com/sourcegraph/sourcegraph/internal/api"
)
// MockGitserverClient is a mock implementation of the GitserverClient
// interface (from the package
// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used
// for unit testing.
// github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver) used for unit
// testing.
type MockGitserverClient struct {
// FetchTarFunc is an instance of a mock function object controlling the
// behavior of the method FetchTar.

View File

@ -1,59 +0,0 @@
package api
import (
"context"
"strings"
"time"
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
const searchTimeout = 60 * time.Second
func (h *apiHandler) handleSearchInternal(ctx context.Context, args types.SearchArgs) (_ *result.Symbols, err error) {
ctx, trace, endObservation := h.operations.search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("repo", string(args.Repo)),
log.String("commitID", string(args.CommitID)),
log.String("query", args.Query),
log.Bool("isRegExp", args.IsRegExp),
log.Bool("isCaseSensitive", args.IsCaseSensitive),
log.Int("numIncludePatterns", len(args.IncludePatterns)),
log.String("includePatterns", strings.Join(args.IncludePatterns, ":")),
log.String("excludePattern", args.ExcludePattern),
log.Int("first", args.First),
}})
defer func() {
endObservation(1, observation.Args{
MetricLabelValues: []string{observability.GetParseAmount(ctx)},
LogFields: []log.Field{log.String("parseAmount", observability.GetParseAmount(ctx))},
})
}()
ctx = observability.SeedParseAmount(ctx)
ctx, cancel := context.WithTimeout(ctx, searchTimeout)
defer cancel()
dbFile, err := h.cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args)
if err != nil {
return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile")
}
trace.Log(log.String("dbFile", dbFile))
var results result.Symbols
err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) {
if results, err = db.Search(ctx, args); err != nil {
return errors.Wrap(err, "store.Search")
}
return nil
})
return &results, err
}

View File

@ -0,0 +1,64 @@
package api
import (
"context"
"strings"
"time"
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/sourcegraph/lib/errors"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
sharedobservability "github.com/sourcegraph/sourcegraph/cmd/symbols/observability"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
const searchTimeout = 60 * time.Second
func MakeSqliteSearchFunc(operations *sharedobservability.Operations, cachedDatabaseWriter writer.CachedDatabaseWriter) types.SearchFunc {
return func(ctx context.Context, args types.SearchArgs) (results []result.Symbol, err error) {
ctx, trace, endObservation := operations.Search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("repo", string(args.Repo)),
log.String("commitID", string(args.CommitID)),
log.String("query", args.Query),
log.Bool("isRegExp", args.IsRegExp),
log.Bool("isCaseSensitive", args.IsCaseSensitive),
log.Int("numIncludePatterns", len(args.IncludePatterns)),
log.String("includePatterns", strings.Join(args.IncludePatterns, ":")),
log.String("excludePattern", args.ExcludePattern),
log.Int("first", args.First),
}})
defer func() {
endObservation(1, observation.Args{
MetricLabelValues: []string{observability.GetParseAmount(ctx)},
LogFields: []log.Field{log.String("parseAmount", observability.GetParseAmount(ctx))},
})
}()
ctx = observability.SeedParseAmount(ctx)
ctx, cancel := context.WithTimeout(ctx, searchTimeout)
defer cancel()
dbFile, err := cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args)
if err != nil {
return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile")
}
trace.Log(log.String("dbFile", dbFile))
var res result.Symbols
err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) {
if res, err = db.Search(ctx, args); err != nil {
return errors.Wrap(err, "store.Search")
}
return nil
})
return res, err
}
}

View File

@ -9,7 +9,7 @@ import (
"github.com/grafana/regexp/syntax"
"github.com/keegancsmith/sqlf"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/lib/errors"

View File

@ -7,8 +7,8 @@ import (
"github.com/inconshreveable/log15"
"github.com/jmoiron/sqlx"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)

View File

@ -7,7 +7,7 @@ import (
"github.com/keegancsmith/sqlf"
"golang.org/x/sync/errgroup"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
"github.com/sourcegraph/sourcegraph/internal/database/batch"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)

View File

@ -5,7 +5,7 @@ import (
"fmt"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/lib/errors"
)

View File

@ -4,11 +4,13 @@ import (
"context"
"path/filepath"
"golang.org/x/sync/semaphore"
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/lib/errors"
@ -22,21 +24,27 @@ type databaseWriter struct {
path string
gitserverClient gitserver.GitserverClient
parser parser.Parser
sem *semaphore.Weighted
}
func NewDatabaseWriter(
path string,
gitserverClient gitserver.GitserverClient,
parser parser.Parser,
sem *semaphore.Weighted,
) DatabaseWriter {
return &databaseWriter{
path: path,
gitserverClient: gitserverClient,
parser: parser,
sem: sem,
}
}
func (w *databaseWriter) WriteDBFile(ctx context.Context, args types.SearchArgs, dbFile string) error {
w.sem.Acquire(ctx, 1)
defer w.sem.Release(1)
if newestDBFile, oldCommit, ok, err := w.getNewestCommit(ctx, args); err != nil {
return err
} else if ok {

View File

@ -1,3 +0,0 @@
package fetcher
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go

View File

@ -1,23 +0,0 @@
package parser
import (
"log"
"os"
"github.com/sourcegraph/go-ctags"
)
func NewCtagsParserFactory(ctagsCommand string, patternLengthLimit int, logErrors, debugLogs bool) ParserFactory {
options := ctags.Options{
Bin: ctagsCommand,
PatternLengthLimit: patternLengthLimit,
}
if logErrors {
options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
}
if debugLogs {
options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
}
return func() (ctags.Parser, error) { return ctags.New(options) }
}

View File

@ -1,40 +0,0 @@
package types
import (
"github.com/sourcegraph/sourcegraph/internal/api"
)
// SearchArgs are the arguments to perform a search on the symbols service.
type SearchArgs struct {
// Repo is the name of the repository to search in.
Repo api.RepoName `json:"repo"`
// CommitID is the commit to search in.
CommitID api.CommitID `json:"commitID"`
// Query is the search query.
Query string
// IsRegExp if true will treat the Pattern as a regular expression.
IsRegExp bool
// IsCaseSensitive if false will ignore the case of query and file pattern
// when finding matches.
IsCaseSensitive bool
// IncludePatterns is a list of regexes that symbol's file paths
// need to match to get included in the result
//
// The patterns are ANDed together; a file's path must match all patterns
// for it to be kept. That is also why it is a list (unlike the singular
// ExcludePattern); it is not possible in general to construct a single
// glob or Go regexp that represents multiple such patterns ANDed together.
IncludePatterns []string
// ExcludePattern is an optional regex that symbol's file paths
// need to match to get included in the result
ExcludePattern string
// First indicates that only the first n symbols should be returned.
First int
}

View File

@ -3,128 +3,9 @@
package main
import (
"context"
"fmt"
"log"
"net/http"
"os"
"time"
"github.com/inconshreveable/log15"
"github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api"
sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/internal/actor"
"github.com/sourcegraph/sourcegraph/internal/conf"
"github.com/sourcegraph/sourcegraph/internal/debugserver"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/internal/env"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
"github.com/sourcegraph/sourcegraph/internal/honey"
"github.com/sourcegraph/sourcegraph/internal/httpserver"
"github.com/sourcegraph/sourcegraph/internal/logging"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/profiler"
"github.com/sourcegraph/sourcegraph/internal/sentry"
"github.com/sourcegraph/sourcegraph/internal/trace"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
"github.com/sourcegraph/sourcegraph/internal/tracer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/shared"
)
const addr = ":3184"
func main() {
config.Load()
// Set up Google Cloud Profiler when running in Cloud
if err := profiler.Init(); err != nil {
log.Fatalf("Failed to start profiler: %v", err)
}
env.Lock()
env.HandleHelpFlag()
conf.Init()
logging.Init()
tracer.Init(conf.DefaultClient())
sentry.Init(conf.DefaultClient())
trace.Init()
if err := config.Validate(); err != nil {
log.Fatalf("Failed to load configuration: %s", err)
}
// Ensure we register our database driver before calling
// anything that tries to open a SQLite database.
sqlite.Init()
if config.sanityCheck {
fmt.Print("Running sanity check...")
if err := sqlite.SanityCheck(); err != nil {
fmt.Println("failed ❌", err)
os.Exit(1)
}
fmt.Println("passed ✅")
os.Exit(0)
}
// Initialize tracing/metrics
observationContext := &observation.Context{
Logger: log15.Root(),
Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()},
Registerer: prometheus.DefaultRegisterer,
HoneyDataset: &honey.Dataset{
Name: "codeintel-symbols",
SampleRate: 5,
},
}
// Start debug server
ready := make(chan struct{})
go debugserver.NewServerRoutine(ready).Start()
ctagsParserFactory := parser.NewCtagsParserFactory(
config.ctagsCommand,
config.ctagsPatternLengthLimit,
config.ctagsLogErrors,
config.ctagsDebugLogs,
)
cache := diskcache.NewStore(config.cacheDir, "symbols",
diskcache.WithBackgroundTimeout(config.processingTimeout),
diskcache.WithObservationContext(observationContext),
)
parserPool, err := parser.NewParserPool(ctagsParserFactory, config.numCtagsProcesses)
if err != nil {
log.Fatalf("Failed to create parser pool: %s", err)
}
gitserverClient := gitserver.NewClient(observationContext)
repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, 15, config.maxTotalPathsLength, observationContext)
parser := parser.NewParser(parserPool, repositoryFetcher, config.requestBufferSize, config.numCtagsProcesses, observationContext)
databaseWriter := writer.NewDatabaseWriter(config.cacheDir, gitserverClient, parser)
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
apiHandler := api.NewHandler(cachedDatabaseWriter, config.ctagsCommand, observationContext)
server := httpserver.NewFromAddr(addr, &http.Server{
ReadTimeout: 75 * time.Second,
WriteTimeout: 10 * time.Minute,
Handler: actor.HTTPMiddleware(ot.HTTPMiddleware(trace.HTTPMiddleware(apiHandler, conf.DefaultClient()))),
})
evictionInterval := time.Second * 10
cacheSizeBytes := int64(config.cacheSizeMB) * 1000 * 1000
cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext))
// Mark health server as ready and go!
close(ready)
goroutine.MonitorBackgroundRoutines(context.Background(), server, cacheEvicter)
shared.Main(shared.SetupSqlite)
}

View File

@ -1,4 +1,4 @@
package api
package observability
import (
"fmt"
@ -7,11 +7,11 @@ import (
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type operations struct {
search *observation.Operation
type Operations struct {
Search *observation.Operation
}
func newOperations(observationContext *observation.Context) *operations {
func NewOperations(observationContext *observation.Context) *Operations {
metrics := metrics.NewREDMetrics(
observationContext.Registerer,
"codeintel_symbols_api",
@ -28,7 +28,7 @@ func newOperations(observationContext *observation.Context) *operations {
})
}
return &operations{
search: op("Search"),
return &Operations{
Search: op("Search"),
}
}

View File

@ -0,0 +1,51 @@
package parser
import (
"bytes"
"github.com/sourcegraph/go-ctags"
)
type FilteringParser struct {
parser ctags.Parser
maxFileSize int
maxSymbols int
}
func NewFilteringParser(parser ctags.Parser, maxFileSize int, maxSymbols int) ctags.Parser {
return &FilteringParser{
parser: parser,
maxFileSize: maxFileSize,
maxSymbols: maxSymbols,
}
}
func (p *FilteringParser) Parse(path string, content []byte) ([]*ctags.Entry, error) {
if len(content) > p.maxFileSize {
// File is over 512KiB, don't parse it
return nil, nil
}
// Check to see if first 256 bytes contain a 0x00. If so, we'll assume that
// the file is binary and skip parsing. Otherwise, we'll have some non-zero
// contents that passed our filters above to parse.
if bytes.IndexByte(content[:min(len(content), 256)], 0x00) >= 0 {
return nil, nil
}
entries, err := p.parser.Parse(path, content)
if err != nil {
return nil, err
}
if len(entries) > p.maxSymbols {
// File has too many symbols, don't return any of them
return nil, nil
}
return entries, nil
}
func (p *FilteringParser) Close() {
p.parser.Close()
}

View File

@ -10,8 +10,8 @@ import (
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/lib/errors"
@ -123,6 +123,13 @@ func (p *parser) Parse(ctx context.Context, args types.SearchArgs, paths []strin
return symbolOrErrors, nil
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func (p *parser) handleParseRequest(ctx context.Context, symbolOrErrors chan<- SymbolOrError, parseRequest fetcher.ParseRequest, totalSymbols *uint32) (err error) {
ctx, trace, endObservation := p.operations.handleParseRequest.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("path", parseRequest.Path),

View File

@ -0,0 +1,31 @@
package parser
import (
"log"
"os"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
)
func NewCtagsParserFactory(config types.CtagsConfig) ParserFactory {
options := ctags.Options{
Bin: config.Command,
PatternLengthLimit: config.PatternLengthLimit,
}
if config.LogErrors {
options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
}
if config.DebugLogs {
options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
}
return func() (ctags.Parser, error) {
parser, err := ctags.New(options)
if err != nil {
return nil, err
}
return NewFilteringParser(parser, config.MaxFileSize, config.MaxSymbols), nil
}
}

View File

@ -8,6 +8,8 @@ import (
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
)
func TestCtagsParser(t *testing.T) {
@ -16,7 +18,7 @@ func TestCtagsParser(t *testing.T) {
t.Skip("command not in PATH: universal-ctags")
}
p, err := NewCtagsParserFactory("universal-ctags", 250, false, false)()
p, err := NewCtagsParserFactory(types.CtagsConfig{Command: "universal-ctags", PatternLengthLimit: 250})()
if err != nil {
t.Fatal(err)
}

View File

@ -0,0 +1,87 @@
package shared
import (
"context"
"log"
"net/http"
"time"
"github.com/inconshreveable/log15"
"github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/actor"
"github.com/sourcegraph/sourcegraph/internal/conf"
"github.com/sourcegraph/sourcegraph/internal/debugserver"
"github.com/sourcegraph/sourcegraph/internal/env"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
"github.com/sourcegraph/sourcegraph/internal/honey"
"github.com/sourcegraph/sourcegraph/internal/httpserver"
"github.com/sourcegraph/sourcegraph/internal/logging"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/profiler"
"github.com/sourcegraph/sourcegraph/internal/sentry"
"github.com/sourcegraph/sourcegraph/internal/trace"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
"github.com/sourcegraph/sourcegraph/internal/tracer"
)
const addr = ":3184"
type SetupFunc func(observationContext *observation.Context, gitserverClient gitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error)
func Main(setup SetupFunc) {
routines := []goroutine.BackgroundRoutine{}
// Set up Google Cloud Profiler when running in Cloud
if err := profiler.Init(); err != nil {
log.Fatalf("Failed to start profiler: %v", err)
}
// Initialize tracing/metrics
observationContext := &observation.Context{
Logger: log15.Root(),
Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()},
Registerer: prometheus.DefaultRegisterer,
HoneyDataset: &honey.Dataset{
Name: "codeintel-symbols",
SampleRate: 5,
},
}
// Run setup
gitserverClient := gitserver.NewClient(observationContext)
repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, types.LoadRepositoryFetcherConfig(env.BaseConfig{}).MaxTotalPathsLength, observationContext)
searchFunc, handleStatus, newRoutines, ctagsBinary, err := setup(observationContext, gitserverClient, repositoryFetcher)
if err != nil {
log.Fatalf("Failed to setup: %v", err)
}
routines = append(routines, newRoutines...)
// Initialization
env.HandleHelpFlag()
conf.Init()
logging.Init()
tracer.Init(conf.DefaultClient())
sentry.Init(conf.DefaultClient())
trace.Init()
// Start debug server
ready := make(chan struct{})
go debugserver.NewServerRoutine(ready).Start()
// Create HTTP server
server := httpserver.NewFromAddr(addr, &http.Server{
ReadTimeout: 75 * time.Second,
WriteTimeout: 10 * time.Minute,
Handler: actor.HTTPMiddleware(ot.HTTPMiddleware(trace.HTTPMiddleware(api.NewHandler(searchFunc, handleStatus, ctagsBinary), conf.DefaultClient()))),
})
routines = append(routines, server)
// Mark health server as ready and go!
close(ready)
goroutine.MonitorBackgroundRoutines(context.Background(), routines...)
}

View File

@ -0,0 +1,71 @@
package shared
import (
"fmt"
"log"
"net/http"
"os"
"time"
"golang.org/x/sync/semaphore"
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api"
sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/observability"
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/internal/env"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
func SetupSqlite(observationContext *observation.Context, gitserverClient gitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) {
baseConfig := env.BaseConfig{}
config := types.LoadSqliteConfig(baseConfig)
if err := baseConfig.Validate(); err != nil {
log.Fatalf("Failed to load configuration: %s", err)
}
// Ensure we register our database driver before calling
// anything that tries to open a SQLite database.
sqlite.Init()
if config.SanityCheck {
fmt.Print("Running sanity check...")
if err := sqlite.SanityCheck(); err != nil {
fmt.Println("failed ❌", err)
os.Exit(1)
}
fmt.Println("passed ✅")
os.Exit(0)
}
ctagsParserFactory := parser.NewCtagsParserFactory(config.Ctags)
parserPool, err := parser.NewParserPool(ctagsParserFactory, config.NumCtagsProcesses)
if err != nil {
log.Fatalf("Failed to create parser pool: %s", err)
}
cache := diskcache.NewStore(config.CacheDir, "symbols",
diskcache.WithBackgroundTimeout(config.ProcessingTimeout),
diskcache.WithObservationContext(observationContext),
)
parser := parser.NewParser(parserPool, repositoryFetcher, config.RequestBufferSize, config.NumCtagsProcesses, observationContext)
databaseWriter := writer.NewDatabaseWriter(config.CacheDir, gitserverClient, parser, semaphore.NewWeighted(int64(config.MaxConcurrentlyIndexing)))
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
searchFunc := api.MakeSqliteSearchFunc(observability.NewOperations(observationContext), cachedDatabaseWriter)
evictionInterval := time.Second * 10
cacheSizeBytes := int64(config.CacheSizeMB) * 1000 * 1000
cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext))
return searchFunc, nil, []goroutine.BackgroundRoutine{cacheEvicter}, config.Ctags.Command, nil
}

122
cmd/symbols/types/types.go Normal file
View File

@ -0,0 +1,122 @@
package types
import (
"context"
"os"
"runtime"
"strconv"
"time"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/internal/env"
)
type SqliteConfig struct {
SanityCheck bool
CacheDir string
CacheSizeMB int
NumCtagsProcesses int
RequestBufferSize int
ProcessingTimeout time.Duration
Ctags CtagsConfig
RepositoryFetcher RepositoryFetcherConfig
MaxConcurrentlyIndexing int
}
func LoadSqliteConfig(baseConfig env.BaseConfig) SqliteConfig {
return SqliteConfig{
Ctags: LoadCtagsConfig(baseConfig),
RepositoryFetcher: LoadRepositoryFetcherConfig(baseConfig),
SanityCheck: baseConfig.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not"),
CacheDir: baseConfig.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols"),
CacheSizeMB: baseConfig.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)"),
NumCtagsProcesses: baseConfig.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run"),
RequestBufferSize: baseConfig.GetInt("REQUEST_BUFFER_SIZE", "8192", "maximum size of buffered parser request channel"),
ProcessingTimeout: baseConfig.GetInterval("PROCESSING_TIMEOUT", "2h", "maximum time to spend processing a repository"),
MaxConcurrentlyIndexing: baseConfig.GetInt("MAX_CONCURRENTLY_INDEXING", "10", "maximum number of repositories to index at a time"),
}
}
type CtagsConfig struct {
Command string
PatternLengthLimit int
LogErrors bool
DebugLogs bool
MaxFileSize int
MaxSymbols int
}
func LoadCtagsConfig(baseConfig env.BaseConfig) CtagsConfig {
logCtagsErrorsDefault := "false"
if os.Getenv("DEPLOY_TYPE") == "dev" {
logCtagsErrorsDefault = "true"
}
return CtagsConfig{
Command: baseConfig.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)"),
PatternLengthLimit: baseConfig.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags"),
LogErrors: baseConfig.GetBool("LOG_CTAGS_ERRORS", logCtagsErrorsDefault, "log ctags errors"),
DebugLogs: false,
MaxFileSize: baseConfig.GetInt("CTAGS_MAX_FILE_SIZE", "524288", "skip files larger than this size (in bytes)"),
MaxSymbols: baseConfig.GetInt("CTAGS_MAX_SYMBOLS", "2000", "skip files with more than this many symbols"),
}
}
type RepositoryFetcherConfig struct {
// The maximum sum of lengths of all paths in a single call to git archive. Without this limit, we
// could hit the error "argument list too long" by exceeding the limit on the number of arguments to
// a command enforced by the OS.
//
// Mac : getconf ARG_MAX returns 1,048,576
// Linux: getconf ARG_MAX returns 2,097,152
//
// We want to remain well under that limit, so defaulting to 100,000 seems safe (see the
// MAX_TOTAL_PATHS_LENGTH environment variable below).
MaxTotalPathsLength int
}
func LoadRepositoryFetcherConfig(baseConfig env.BaseConfig) RepositoryFetcherConfig {
return RepositoryFetcherConfig{
MaxTotalPathsLength: baseConfig.GetInt("MAX_TOTAL_PATHS_LENGTH", "100000", "maximum sum of lengths of all paths in a single call to git archive"),
}
}
type SearchFunc func(ctx context.Context, args SearchArgs) (results result.Symbols, err error)
// SearchArgs are the arguments to perform a search on the symbols service.
type SearchArgs struct {
// Repo is the name of the repository to search in.
Repo api.RepoName `json:"repo"`
// CommitID is the commit to search in.
CommitID api.CommitID `json:"commitID"`
// Query is the search query.
Query string
// IsRegExp if true will treat the Pattern as a regular expression.
IsRegExp bool
// IsCaseSensitive if false will ignore the case of query and file pattern
// when finding matches.
IsCaseSensitive bool
// IncludePatterns is a list of regexes that symbol's file paths
// need to match to get included in the result
//
// The patterns are ANDed together; a file's path must match all patterns
// for it to be kept. That is also why it is a list (unlike the singular
// ExcludePattern); it is not possible in general to construct a single
// glob or Go regexp that represents multiple such patterns ANDed together.
IncludePatterns []string
// ExcludePattern is an optional regex that symbol's file paths
// need to match to get included in the result
ExcludePattern string
// First indicates that only the first n symbols should be returned.
First int
}

View File

@ -20,6 +20,7 @@ allowed_prefix=(
github.com/sourcegraph/sourcegraph/enterprise/cmd/worker
github.com/sourcegraph/sourcegraph/enterprise/cmd/repo-updater
github.com/sourcegraph/sourcegraph/enterprise/cmd/precise-code-intel-
github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols
# Doesn't connect but uses db internals for use with sqlite
github.com/sourcegraph/sourcegraph/cmd/symbols
# Transitively depends on zoekt package which imports but does not use DB

View File

@ -57,6 +57,7 @@ var (
"lsif_data_apidocs_num_pages",
"lsif_data_apidocs_num_search_results_private",
"lsif_data_apidocs_num_search_results_public",
"rockskip_ancestry",
},
FS: getFSForPath("codeintel"),
}

View File

@ -93,8 +93,8 @@ The run type for tags starting with `v`.
Default pipeline:
- **Pipeline setup**: Trigger async
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
@ -102,7 +102,7 @@ Default pipeline:
- **CI script tests**: test-trace-command.sh
- **Integration tests**: Backend integration tests, Code Intel QA
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
- Upload build trace
### Release branch
@ -112,8 +112,8 @@ The run type for branches matching `^[0-9]+\.[0-9]+$` (regexp match).
Default pipeline:
- **Pipeline setup**: Trigger async
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
@ -121,7 +121,7 @@ Default pipeline:
- **CI script tests**: test-trace-command.sh
- **Integration tests**: Backend integration tests, Code Intel QA
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
- Upload build trace
### Browser extension release build
@ -149,8 +149,8 @@ The run type for branches matching `main` (exact match).
Default pipeline:
- **Pipeline setup**: Trigger async
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
@ -158,7 +158,7 @@ Default pipeline:
- **CI script tests**: test-trace-command.sh
- **Integration tests**: Backend integration tests, Code Intel QA
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
- Upload build trace
### Main dry run
@ -173,8 +173,8 @@ sg ci build main-dry-run
Default pipeline:
- **Pipeline setup**: Trigger async
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
@ -182,7 +182,7 @@ Default pipeline:
- **CI script tests**: test-trace-command.sh
- **Integration tests**: Backend integration tests, Code Intel QA
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
- Upload build trace
### Patch image
@ -219,6 +219,7 @@ Default pipeline:
- Build cadvisor
- Build codeinsights-db
- Build codeintel-db
- Build enterprise-symbols
- Build frontend
- Build github-proxy
- Build gitserver

View File

@ -6,3 +6,4 @@ Guidance and documentation about writing database interactions within the Source
- High-performance guides
- [Batch operations](batch_operations.md)
- [Materialized cache](materialized_cache.md)
- [Locking behavior](locking_behavior.md)

View File

@ -0,0 +1,31 @@
# Locking behavior
When you're using [advisory locks](https://www.postgresql.org/docs/9.1/functions-admin.html#FUNCTIONS-ADVISORY-LOCKS) in Postgres, lock calls stack when executed on the same connection (A.K.A. session):
- Connection 1 calls `pg_advisory_lock(42)`, acquires the lock and continues
- Connection 1 calls `pg_advisory_lock(42)`, this lock "stacks" with the previous call and continues
- Connection 2 calls `pg_advisory_lock(42)`, this blocks
- Connection 1 calls `pg_advisory_unlock(42)`, this pops one lock call off the stack and continues
- Connection 1 calls `pg_advisory_unlock(42)`, this pops the last lock call off the stack and continues
- Connection 2 finally acquires the lock and continues
If you get connections from a pool (e.g. the standard `sql` library in Go maintains an internal pool of connections), you need to be aware of the locking behavior otherwise you might get unpredictable behavior or deadlock. You can get deterministic behavior by explicitly taking a connection from the pool (e.g. with `db.Conn()`).
Here's an example of bad code that can deadlock if the connection happens to be different across lock calls: ❌
```go
// Grab a write lock
db.Exec("SELECT pg_advisory_lock(1)")
// Grab a read lock
db.Exec("SELECT pg_advisory_lock_shared(1)") // 💥 Can deadlock
```
Good code explicitly takes a connection out of the pool first ✅
```go
conn := db.Conn()
// Grab a write lock
conn.Exec("SELECT pg_advisory_lock(1)")
// Grab a read lock
conn.Exec("SELECT pg_advisory_lock_shared(1)") // OK, will not block
```

View File

@ -0,0 +1,35 @@
# NOTE: This layer of the docker image is also used in local development as a wrapper around universal-ctags
FROM sourcegraph/alpine-3.12:120059_2021-12-09_b34c7b2@sha256:9a1fde12f56fea02027cf4caeebdddfedb7b73bf8db6c16f7907a6e04a29134c AS ctags
# hadolint ignore=DL3002
USER root
COPY ctags-install-alpine.sh /ctags-install-alpine.sh
RUN /ctags-install-alpine.sh
FROM sourcegraph/alpine-3.12:120059_2021-12-09_b34c7b2@sha256:9a1fde12f56fea02027cf4caeebdddfedb7b73bf8db6c16f7907a6e04a29134c AS symbols
# TODO(security): This container should not run as root!
#
# See https://github.com/sourcegraph/sourcegraph/issues/13237
# hadolint ignore=DL3002
USER root
ARG COMMIT_SHA="unknown"
ARG DATE="unknown"
ARG VERSION="unknown"
LABEL org.opencontainers.image.revision=${COMMIT_SHA}
LABEL org.opencontainers.image.created=${DATE}
LABEL org.opencontainers.image.version=${VERSION}
LABEL com.sourcegraph.github.url=https://github.com/sourcegraph/sourcegraph/commit/${COMMIT_SHA}
RUN apk add --no-cache bind-tools ca-certificates mailcap tini
COPY ctags-install-alpine.sh /ctags-install-alpine.sh
RUN /ctags-install-alpine.sh
ENV CACHE_DIR=/mnt/cache/enterprise-symbols
RUN mkdir -p ${CACHE_DIR}
EXPOSE 3184
ENTRYPOINT ["/sbin/tini", "--", "/usr/local/bin/enterprise-symbols"]
COPY enterprise-symbols /usr/local/bin/

24
enterprise/cmd/symbols/build.sh Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env bash
# This script builds the symbols docker image.
cd "$(dirname "${BASH_SOURCE[0]}")/../../.."
set -eu
OUTPUT=$(mktemp -d -t sgdockerbuild_XXXXXXX)
cleanup() {
rm -rf "$OUTPUT"
}
trap cleanup EXIT
cp -a ./cmd/symbols/ctags-install-alpine.sh "$OUTPUT"
# Build go binary into $OUTPUT
./enterprise/cmd/symbols/go-build.sh "$OUTPUT"
echo "--- docker build"
docker build -f enterprise/cmd/symbols/Dockerfile -t "$IMAGE" "$OUTPUT" \
--progress=plain \
--build-arg COMMIT_SHA \
--build-arg DATE \
--build-arg VERSION

View File

@ -0,0 +1,60 @@
#!/usr/bin/env bash
# This script builds the symbols go binary.
# Requires a single argument which is the path to the target bindir.
cd "$(dirname "${BASH_SOURCE[0]}")/../../.."
set -eu
OUTPUT="${1:?no output path provided}"
# Environment for building linux binaries
export GO111MODULE=on
export GOARCH=amd64
export GOOS=linux
# go-sqlite3 depends on cgo. Without cgo, it will build but it'll throw an error at query time.
export CGO_ENABLED=1
# Default CC to musl-gcc.
export CC="${CC:-musl-gcc}"
if ! command -v "$CC" >/dev/null; then
echo "$CC not found. You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine. Run 'apt-get install -y musl-tools'."
exit 1
fi
# Make sure this is a musl compiler.
case "$CC" in
*musl*)
;;
*)
echo "$CC doesn't look like a musl compiler. You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine. Run 'apt-get install -y musl-tools'."
exit 1
;;
esac
echo "--- go build"
pkg="github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols"
env go build \
-trimpath \
-ldflags "-X github.com/sourcegraph/sourcegraph/internal/version.version=$VERSION -X github.com/sourcegraph/sourcegraph/internal/version.timestamp=$(date +%s)" \
-buildmode exe \
-tags dist \
-o "$OUTPUT/enterprise-$(basename $pkg)" \
"$pkg"
# We can't use -v because the spawned container might not share
# the same file system (e.g. when we're already inside docker
# and the spawned docker container will be a sibling on the host).
#
# A workaround is to feed the file into the container via stdin:
#
# 'cat FILE | docker run ... -i ... sh -c "cat > FILE && ..."'
echo "--- sanity check"
# shellcheck disable=SC2002
cat "$OUTPUT/enterprise-$(basename $pkg)" | docker run \
--rm \
-i \
sourcegraph/alpine@sha256:ce099fbcd3cf70b338fc4cb2a4e1fa9ae847de21afdb0a849a393b87d94fb174 \
sh -c "cat > /enterprise-symbols && chmod a+x /enterprise-symbols && env SANITY_CHECK=true /enterprise-symbols"

View File

@ -0,0 +1,241 @@
package main
import (
"context"
"database/sql"
"log"
"net/http"
"os"
"strings"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
symbolsGitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
symbolsParser "github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/shared"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/enterprise/internal/rockskip"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/conf"
"github.com/sourcegraph/sourcegraph/internal/conf/conftypes"
connections "github.com/sourcegraph/sourcegraph/internal/database/connections/live"
"github.com/sourcegraph/sourcegraph/internal/env"
gitserver "github.com/sourcegraph/sourcegraph/internal/gitserver"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
func main() {
reposVar := env.Get("ROCKSKIP_REPOS", "", "comma separated list of repositories to index (e.g. `github.com/torvalds/linux,github.com/pallets/flask`)")
repos := strings.Split(reposVar, ",")
if env.Get("USE_ROCKSKIP", "false", "use Rockskip to index the repos specified in ROCKSKIP_REPOS") == "true" {
shared.Main(func(observationContext *observation.Context, gitserverClient symbolsGitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) {
rockskipSearchFunc, rockskipHandleStatus, rockskipBackgroundRoutines, rockskipCtagsCommand, err := SetupRockskip(observationContext, gitserverClient, repositoryFetcher)
if err != nil {
return nil, nil, nil, "", err
}
// The blanks are the SQLite status endpoint (it's always nil) and the ctags command (same as
// Rockskip's).
sqliteSearchFunc, _, sqliteBackgroundRoutines, _, err := shared.SetupSqlite(observationContext, gitserverClient, repositoryFetcher)
if err != nil {
return nil, nil, nil, "", err
}
searchFunc := func(ctx context.Context, args types.SearchArgs) (results result.Symbols, err error) {
if sliceContains(repos, string(args.Repo)) {
return rockskipSearchFunc(ctx, args)
} else {
return sqliteSearchFunc(ctx, args)
}
}
return searchFunc, rockskipHandleStatus, append(rockskipBackgroundRoutines, sqliteBackgroundRoutines...), rockskipCtagsCommand, nil
})
} else {
shared.Main(shared.SetupSqlite)
}
}
func SetupRockskip(observationContext *observation.Context, gitserverClient symbolsGitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) {
baseConfig := env.BaseConfig{}
config := LoadRockskipConfig(baseConfig)
if err := baseConfig.Validate(); err != nil {
log.Fatalf("Failed to load configuration: %s", err)
}
db := mustInitializeCodeIntelDB()
git := NewGitserver(repositoryFetcher)
createParser := func() rockskip.ParseSymbolsFunc { return createParserWithConfig(config.Ctags) }
server, err := rockskip.NewService(db, git, createParser, config.MaxConcurrentlyIndexing, config.MaxRepos, config.LogQueries, config.IndexRequestsQueueSize, config.SymbolsCacheSize, config.PathSymbolsCacheSize)
if err != nil {
return nil, nil, nil, config.Ctags.Command, err
}
return server.Search, server.HandleStatus, nil, config.Ctags.Command, nil
}
type RockskipConfig struct {
Ctags types.CtagsConfig
RepositoryFetcher types.RepositoryFetcherConfig
MaxRepos int
LogQueries bool
IndexRequestsQueueSize int
MaxConcurrentlyIndexing int
SymbolsCacheSize int
PathSymbolsCacheSize int
}
func LoadRockskipConfig(baseConfig env.BaseConfig) RockskipConfig {
return RockskipConfig{
Ctags: types.LoadCtagsConfig(baseConfig),
RepositoryFetcher: types.LoadRepositoryFetcherConfig(baseConfig),
MaxRepos: baseConfig.GetInt("MAX_REPOS", "1000", "maximum number of repositories to store in Postgres, with LRU eviction"),
LogQueries: baseConfig.GetBool("LOG_QUERIES", "false", "print search queries to stdout"),
IndexRequestsQueueSize: baseConfig.GetInt("INDEX_REQUESTS_QUEUE_SIZE", "1000", "how many index requests can be queued at once, at which point new requests will be rejected"),
MaxConcurrentlyIndexing: baseConfig.GetInt("MAX_CONCURRENTLY_INDEXING", "4", "maximum number of repositories being indexed at a time (also limits ctags processes)"),
SymbolsCacheSize: baseConfig.GetInt("SYMBOLS_CACHE_SIZE", "1000000", "how many tuples of (path, symbol name, int ID) to cache in memory"),
PathSymbolsCacheSize: baseConfig.GetInt("PATH_SYMBOLS_CACHE_SIZE", "100000", "how many sets of symbols for files to cache in memory"),
}
}
func createParserWithConfig(config types.CtagsConfig) rockskip.ParseSymbolsFunc {
parser := mustCreateCtagsParser(config)
return func(path string, bytes []byte) (symbols []rockskip.Symbol, err error) {
entries, err := parser.Parse(path, bytes)
if err != nil {
return nil, err
}
symbols = []rockskip.Symbol{}
for _, entry := range entries {
symbols = append(symbols, rockskip.Symbol{
Name: entry.Name,
Parent: entry.Parent,
Kind: entry.Kind,
Line: entry.Line,
})
}
return symbols, nil
}
}
func mustCreateCtagsParser(ctagsConfig types.CtagsConfig) ctags.Parser {
options := ctags.Options{
Bin: ctagsConfig.Command,
PatternLengthLimit: ctagsConfig.PatternLengthLimit,
}
if ctagsConfig.LogErrors {
options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
}
if ctagsConfig.DebugLogs {
options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
}
parser, err := ctags.New(options)
if err != nil {
log.Fatalf("Failed to create new ctags parser: %s", err)
}
return symbolsParser.NewFilteringParser(parser, ctagsConfig.MaxFileSize, ctagsConfig.MaxSymbols)
}
func mustInitializeCodeIntelDB() *sql.DB {
dsn := conf.GetServiceConnectionValueAndRestartOnChange(func(serviceConnections conftypes.ServiceConnections) string {
return serviceConnections.CodeIntelPostgresDSN
})
var (
db *sql.DB
err error
)
db, err = connections.EnsureNewCodeIntelDB(dsn, "symbols", &observation.TestContext)
if err != nil {
log.Fatalf("Failed to connect to codeintel database: %s", err)
}
return db
}
type Gitserver struct {
repositoryFetcher fetcher.RepositoryFetcher
}
func NewGitserver(repositoryFetcher fetcher.RepositoryFetcher) Gitserver {
return Gitserver{repositoryFetcher: repositoryFetcher}
}
func (g Gitserver) LogReverseEach(repo string, commit string, n int, onLogEntry func(entry rockskip.LogEntry) error) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
command := gitserver.DefaultClient.Command("git", rockskip.LogReverseArgs(n, commit)...)
command.Repo = api.RepoName(repo)
// We run a single `git log` command and stream the output while the repo is being processed, which
// can take much longer than 1 minute (the default timeout).
command.DisableTimeout()
stdout, err := gitserver.StdoutReader(ctx, command)
if err != nil {
return err
}
defer stdout.Close()
return errors.Wrap(rockskip.ParseLogReverseEach(stdout, onLogEntry), "ParseLogReverseEach")
}
func (g Gitserver) RevListEach(repo string, commit string, onCommit func(commit string) (shouldContinue bool, err error)) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
command := gitserver.DefaultClient.Command("git", rockskip.RevListArgs(commit)...)
command.Repo = api.RepoName(repo)
command.DisableTimeout()
stdout, err := gitserver.StdoutReader(ctx, command)
if err != nil {
return err
}
defer stdout.Close()
return rockskip.RevListEach(stdout, onCommit)
}
func (g Gitserver) ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error {
if len(paths) == 0 {
return nil
}
args := types.SearchArgs{Repo: api.RepoName(repo), CommitID: api.CommitID(commit)}
parseRequestOrErrors := g.repositoryFetcher.FetchRepositoryArchive(context.TODO(), args, paths)
defer func() {
// Ensure the channel is drained
for range parseRequestOrErrors {
}
}()
for parseRequestOrError := range parseRequestOrErrors {
if parseRequestOrError.Err != nil {
return errors.Wrap(parseRequestOrError.Err, "FetchRepositoryArchive")
}
err := onFile(parseRequestOrError.ParseRequest.Path, parseRequestOrError.ParseRequest.Data)
if err != nil {
return err
}
}
return nil
}
func sliceContains(slice []string, s string) bool {
for _, v := range slice {
if v == s {
return true
}
}
return false
}

View File

@ -61,6 +61,7 @@ var DeploySourcegraphDockerImages = []string{
"cadvisor",
"codeinsights-db",
"codeintel-db",
"enterprise-symbols",
"frontend",
"github-proxy",
"gitserver",

View File

@ -0,0 +1,226 @@
package rockskip
import (
"bufio"
"fmt"
"io"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
type LogEntry struct {
Commit string
PathStatuses []PathStatus
}
type PathStatus struct {
Path string
Status StatusAMD
}
type CommitStatus struct {
Commit string
Status StatusAMD
}
type StatusAMD int
const (
AddedAMD StatusAMD = 0
ModifiedAMD StatusAMD = 1
DeletedAMD StatusAMD = 2
)
type StatusAD int
const (
AddedAD StatusAD = 0
DeletedAD StatusAD = 1
)
type Git interface {
LogReverseEach(repo string, commit string, n int, onLogEntry func(logEntry LogEntry) error) error
RevListEach(repo string, commit string, onCommit func(commit string) (shouldContinue bool, err error)) error
ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error
}
func LogReverseArgs(n int, givenCommit string) []string {
return []string{
"log",
"--pretty=%H %P",
"--raw",
"-z",
"-m",
// --no-abbrev speeds up git log a lot
"--no-abbrev",
"--no-renames",
"--first-parent",
"--reverse",
"--ignore-submodules",
fmt.Sprintf("-%d", n),
givenCommit,
}
}
func ParseLogReverseEach(stdout io.Reader, onLogEntry func(entry LogEntry) error) error {
reader := bufio.NewReader(stdout)
var buf []byte
for {
// abc... ... NULL '\n'?
// Read the commit
commitBytes, err := reader.Peek(40)
if err == io.EOF {
break
} else if err != nil {
return err
}
commit := string(commitBytes)
// Skip past the NULL byte
_, err = reader.ReadBytes(0)
if err != nil {
return err
}
// A '\n' indicates a list of paths and their statuses is next
buf, err = reader.Peek(1)
if err == io.EOF {
err = onLogEntry(LogEntry{Commit: commit, PathStatuses: []PathStatus{}})
if err != nil {
return err
}
break
} else if err != nil {
return err
}
if buf[0] == '\n' {
// A list of paths and their statuses is next
// Skip the '\n'
discarded, err := reader.Discard(1)
if discarded != 1 {
return errors.Newf("discarded %d bytes, expected 1", discarded)
} else if err != nil {
return err
}
pathStatuses := []PathStatus{}
for {
// :100644 100644 abc... def... M NULL file.txt NULL
// ^ 0 ^ 97 ^ 99
// A ':' indicates a path and its status is next
buf, err = reader.Peek(1)
if err == io.EOF {
break
} else if err != nil {
return err
}
if buf[0] != ':' {
break
}
// Read the status from index 97 and skip to the path at index 99
buf = make([]byte, 99)
read, err := io.ReadFull(reader, buf)
if read != 99 {
return errors.Newf("read %d bytes, expected 99", read)
} else if err != nil {
return err
}
// Read the path
path, err := reader.ReadBytes(0)
if err != nil {
return err
}
path = path[:len(path)-1] // Drop the trailing NULL byte
// Inspect the status
var status StatusAMD
statusByte := buf[97]
switch statusByte {
case 'A':
status = AddedAMD
case 'M':
status = ModifiedAMD
case 'D':
status = DeletedAMD
case 'T':
// Type changed. Check if it changed from a file to a submodule or vice versa,
// treating submodules as empty.
isSubmodule := func(mode string) bool {
// Submodules are mode "160000". https://stackoverflow.com/questions/737673/how-to-read-the-mode-field-of-git-ls-trees-output#comment3519596_737877
return mode == "160000"
}
oldMode := string(buf[1:7])
newMode := string(buf[8:14])
if isSubmodule(oldMode) && !isSubmodule(newMode) {
// It changed from a submodule to a file, so consider it added.
status = AddedAMD
break
}
if !isSubmodule(oldMode) && isSubmodule(newMode) {
// It changed from a file to a submodule, so consider it deleted.
status = DeletedAMD
break
}
// Otherwise, it remained the same, so ignore the type change.
continue
case 'C':
// Copied
return errors.Newf("unexpected status 'C' given --no-renames was specified")
case 'R':
// Renamed
return errors.Newf("unexpected status 'R' given --no-renames was specified")
case 'X':
return errors.Newf("unexpected status 'X' indicates a bug in git")
default:
fmt.Printf("LogReverse commit %q path %q: unrecognized diff status %q, skipping\n", commit, path, string(statusByte))
continue
}
pathStatuses = append(pathStatuses, PathStatus{Path: string(path), Status: status})
}
err = onLogEntry(LogEntry{Commit: commit, PathStatuses: pathStatuses})
if err != nil {
return err
}
}
}
return nil
}
func RevListArgs(givenCommit string) []string {
return []string{"rev-list", "--first-parent", givenCommit}
}
func RevListEach(stdout io.Reader, onCommit func(commit string) (shouldContinue bool, err error)) error {
reader := bufio.NewReader(stdout)
for {
commit, err := reader.ReadString('\n')
if err == io.EOF {
break
} else if err != nil {
return err
}
commit = commit[:len(commit)-1] // Drop the trailing newline
shouldContinue, err := onCommit(commit)
if !shouldContinue {
return err
}
}
return nil
}

View File

@ -0,0 +1,346 @@
package rockskip
import (
"context"
"fmt"
"k8s.io/utils/lru"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
func (s *Service) Index(ctx context.Context, repo, givenCommit string) (err error) {
threadStatus := s.status.NewThreadStatus(fmt.Sprintf("indexing %s@%s", repo, givenCommit))
defer threadStatus.End()
tasklog := threadStatus.Tasklog
// Get a fresh connection from the DB pool to get deterministic "lock stacking" behavior.
// See doc/dev/background-information/sql/locking_behavior.md for more details.
conn, err := s.db.Conn(ctx)
if err != nil {
return errors.Wrap(err, "failed to get connection for indexing")
}
defer conn.Close()
// Acquire the indexing lock on the repo.
releaseLock, err := iLock(ctx, conn, threadStatus, repo)
if err != nil {
return err
}
defer func() { err = errors.CombineErrors(err, releaseLock()) }()
tipCommit := NULL
tipCommitHash := ""
tipHeight := 0
var repoId int
err = conn.QueryRowContext(ctx, "SELECT id FROM rockskip_repos WHERE repo = $1", repo).Scan(&repoId)
if err != nil {
return errors.Wrapf(err, "failed to get repo id for %s", repo)
}
missingCount := 0
tasklog.Start("RevList")
err = s.git.RevListEach(repo, givenCommit, func(commitHash string) (shouldContinue bool, err error) {
defer tasklog.Continue("RevList")
tasklog.Start("GetCommitByHash")
commit, height, present, err := GetCommitByHash(ctx, conn, repoId, commitHash)
if err != nil {
return false, err
} else if present {
tipCommit = commit
tipCommitHash = commitHash
tipHeight = height
return false, nil
}
missingCount += 1
return true, nil
})
if err != nil {
return errors.Wrap(err, "RevList")
}
threadStatus.SetProgress(0, missingCount)
if missingCount == 0 {
return nil
}
parse := s.createParser()
symbolCache := newSymbolIdCache(s.symbolsCacheSize)
pathSymbolsCache := newPathSymbolsCache(s.pathSymbolsCacheSize)
tasklog.Start("Log")
entriesIndexed := 0
err = s.git.LogReverseEach(repo, givenCommit, missingCount, func(entry LogEntry) error {
defer tasklog.Continue("Log")
threadStatus.SetProgress(entriesIndexed, missingCount)
entriesIndexed++
tx, err := conn.BeginTx(ctx, nil)
if err != nil {
return errors.Wrap(err, "begin transaction")
}
defer tx.Rollback()
hops, err := getHops(ctx, tx, tipCommit, tasklog)
if err != nil {
return errors.Wrap(err, "getHops")
}
r := ruler(tipHeight + 1)
if r >= len(hops) {
return errors.Newf("ruler(%d) = %d is out of range of len(hops) = %d", tipHeight+1, r, len(hops))
}
tasklog.Start("InsertCommit")
commit, err := InsertCommit(ctx, tx, repoId, entry.Commit, tipHeight+1, hops[r])
if err != nil {
return errors.Wrap(err, "InsertCommit")
}
tasklog.Start("AppendHop+")
err = AppendHop(ctx, tx, repoId, hops[0:r], AddedAD, commit)
if err != nil {
return errors.Wrap(err, "AppendHop (added)")
}
tasklog.Start("AppendHop-")
err = AppendHop(ctx, tx, repoId, hops[0:r], DeletedAD, commit)
if err != nil {
return errors.Wrap(err, "AppendHop (deleted)")
}
deletedPaths := []string{}
addedPaths := []string{}
for _, pathStatus := range entry.PathStatuses {
if pathStatus.Status == DeletedAMD || pathStatus.Status == ModifiedAMD {
deletedPaths = append(deletedPaths, pathStatus.Path)
}
if pathStatus.Status == AddedAMD || pathStatus.Status == ModifiedAMD {
addedPaths = append(addedPaths, pathStatus.Path)
}
}
getSymbols := func(commit string, paths []string) (map[string]map[string]struct{}, error) {
pathToSymbols := map[string]map[string]struct{}{}
pathsToFetchSet := map[string]struct{}{}
for _, path := range paths {
pathsToFetchSet[path] = struct{}{}
}
// Don't fetch files that are already in the cache.
if commit == tipCommitHash {
for _, path := range paths {
if symbols, ok := pathSymbolsCache.get(path); ok {
pathToSymbols[path] = symbols
delete(pathsToFetchSet, path)
}
}
}
pathsToFetch := []string{}
for path := range pathsToFetchSet {
pathsToFetch = append(pathsToFetch, path)
}
tasklog.Start("ArchiveEach")
err = s.git.ArchiveEach(repo, commit, pathsToFetch, func(path string, contents []byte) error {
defer tasklog.Continue("ArchiveEach")
tasklog.Start("parse")
symbols, err := parse(path, contents)
if err != nil {
return errors.Wrap(err, "parse")
}
pathToSymbols[path] = map[string]struct{}{}
for _, symbol := range symbols {
pathToSymbols[path][symbol.Name] = struct{}{}
}
return nil
})
if err != nil {
return nil, errors.Wrap(err, "while looping ArchiveEach")
}
// Cache the symbols we just parsed.
if commit != tipCommitHash {
for path, symbols := range pathToSymbols {
pathSymbolsCache.set(path, symbols)
}
}
return pathToSymbols, nil
}
symbolsFromDeletedFiles, err := getSymbols(tipCommitHash, deletedPaths)
if err != nil {
return errors.Wrap(err, "getSymbols (deleted)")
}
symbolsFromAddedFiles, err := getSymbols(entry.Commit, addedPaths)
if err != nil {
return errors.Wrap(err, "getSymbols (added)")
}
// Compute the symmetric difference of symbols between the added and deleted paths.
deletedSymbols := map[string]map[string]struct{}{}
addedSymbols := map[string]map[string]struct{}{}
for _, pathStatus := range entry.PathStatuses {
switch pathStatus.Status {
case DeletedAMD:
deletedSymbols[pathStatus.Path] = symbolsFromDeletedFiles[pathStatus.Path]
case AddedAMD:
addedSymbols[pathStatus.Path] = symbolsFromAddedFiles[pathStatus.Path]
case ModifiedAMD:
deletedSymbols[pathStatus.Path] = map[string]struct{}{}
addedSymbols[pathStatus.Path] = map[string]struct{}{}
for name := range symbolsFromDeletedFiles[pathStatus.Path] {
if _, ok := symbolsFromAddedFiles[pathStatus.Path][name]; !ok {
deletedSymbols[pathStatus.Path][name] = struct{}{}
}
}
for name := range symbolsFromAddedFiles[pathStatus.Path] {
if _, ok := symbolsFromDeletedFiles[pathStatus.Path][name]; !ok {
addedSymbols[pathStatus.Path][name] = struct{}{}
}
}
}
}
for path, symbols := range deletedSymbols {
for symbol := range symbols {
id := 0
ok := false
if id, ok = symbolCache.get(path, symbol); !ok {
found := false
for _, hop := range hops {
tasklog.Start("GetSymbol")
id, found, err = GetSymbol(ctx, tx, repoId, path, symbol, hop)
if err != nil {
return err
}
if found {
break
}
}
if !found {
// We did not find the symbol that (supposedly) has been deleted, so ignore the
// deletion. This will probably lead to extra symbols in search results.
//
// The last time this happened, it was caused by impurity in ctags where the
// result of parsing a file was affected by previously parsed files and not fully
// determined by the file itself:
//
// https://github.com/universal-ctags/ctags/pull/3300
log15.Error("Could not find symbol that was supposedly deleted", "repo", repo, "commit", commit, "path", path, "symbol", symbol)
continue
}
}
tasklog.Start("UpdateSymbolHops")
err = UpdateSymbolHops(ctx, tx, id, DeletedAD, commit)
if err != nil {
return errors.Wrap(err, "UpdateSymbolHops")
}
}
}
for path, symbols := range addedSymbols {
for symbol := range symbols {
tasklog.Start("InsertSymbol")
id, err := InsertSymbol(ctx, tx, commit, repoId, path, symbol)
if err != nil {
return errors.Wrap(err, "InsertSymbol")
}
symbolCache.set(path, symbol, id)
}
}
tasklog.Start("DeleteRedundant")
err = DeleteRedundant(ctx, tx, commit)
if err != nil {
return errors.Wrap(err, "DeleteRedundant")
}
tasklog.Start("CommitTx")
err = tx.Commit()
if err != nil {
return errors.Wrap(err, "commit transaction")
}
tipCommit = commit
tipCommitHash = entry.Commit
tipHeight += 1
return nil
})
if err != nil {
return errors.Wrap(err, "LogReverseEach")
}
threadStatus.SetProgress(entriesIndexed, missingCount)
return nil
}
type repoCommit struct {
repo string
commit string
}
type indexRequest struct {
repoCommit
done chan struct{}
}
type symbolIdCache struct {
cache *lru.Cache
}
func newSymbolIdCache(size int) *symbolIdCache {
return &symbolIdCache{cache: lru.New(size)}
}
func (s *symbolIdCache) get(path, symbol string) (int, bool) {
v, ok := s.cache.Get(symbolIdCacheKey(path, symbol))
if !ok {
return 0, false
}
return v.(int), true
}
func (s *symbolIdCache) set(path, symbol string, id int) {
s.cache.Add(symbolIdCacheKey(path, symbol), id)
}
func symbolIdCacheKey(path, symbol string) string {
return path + ":" + symbol
}
type pathSymbolsCache struct {
cache *lru.Cache
}
func newPathSymbolsCache(size int) *pathSymbolsCache {
return &pathSymbolsCache{cache: lru.New(size)}
}
func (s *pathSymbolsCache) get(path string) (map[string]struct{}, bool) {
v, ok := s.cache.Get(path)
if !ok {
return nil, false
}
return v.(map[string]struct{}), true
}
func (s *pathSymbolsCache) set(path string, symbols map[string]struct{}) {
s.cache.Add(path, symbols)
}

View File

@ -0,0 +1,364 @@
package rockskip
import (
"context"
"database/sql"
"fmt"
pg "github.com/lib/pq"
"github.com/segmentio/fasthash/fnv1"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
type CommitId = int
func GetCommitById(ctx context.Context, db dbutil.DB, givenCommit CommitId) (commitHash string, ancestor CommitId, height int, present bool, err error) {
err = db.QueryRowContext(ctx, `
SELECT commit_id, ancestor, height
FROM rockskip_ancestry
WHERE id = $1
`, givenCommit).Scan(&commitHash, &ancestor, &height)
if err == sql.ErrNoRows {
return "", 0, 0, false, nil
} else if err != nil {
return "", 0, 0, false, errors.Newf("GetCommitById: %s", err)
}
return commitHash, ancestor, height, true, nil
}
func GetCommitByHash(ctx context.Context, db dbutil.DB, repoId int, commitHash string) (commit CommitId, height int, present bool, err error) {
err = db.QueryRowContext(ctx, `
SELECT id, height
FROM rockskip_ancestry
WHERE repo_id = $1 AND commit_id = $2
`, repoId, commitHash).Scan(&commit, &height)
if err == sql.ErrNoRows {
return 0, 0, false, nil
} else if err != nil {
return 0, 0, false, errors.Newf("GetCommitByHash: %s", err)
}
return commit, height, true, nil
}
func InsertCommit(ctx context.Context, db dbutil.DB, repoId int, commitHash string, height int, ancestor CommitId) (id CommitId, err error) {
err = db.QueryRowContext(ctx, `
INSERT INTO rockskip_ancestry (commit_id, repo_id, height, ancestor)
VALUES ($1, $2, $3, $4)
RETURNING id
`, commitHash, repoId, height, ancestor).Scan(&id)
return id, errors.Wrap(err, "InsertCommit")
}
func GetSymbol(ctx context.Context, db dbutil.DB, repoId int, path string, name string, hop CommitId) (id int, found bool, err error) {
err = db.QueryRowContext(ctx, `
SELECT id
FROM rockskip_symbols
WHERE repo_id = $1 AND path = $2 AND name = $3 AND $4 && added AND NOT $4 && deleted
`, repoId, path, name, pg.Array([]int{hop})).Scan(&id)
if err == sql.ErrNoRows {
return 0, false, nil
} else if err != nil {
return 0, false, errors.Newf("GetSymbol: %s", err)
}
return id, true, nil
}
func UpdateSymbolHops(ctx context.Context, db dbutil.DB, id int, status StatusAD, hop CommitId) error {
column := statusADToColumn(status)
_, err := db.ExecContext(ctx, fmt.Sprintf(`
UPDATE rockskip_symbols
SET %s = array_append(%s, $1)
WHERE id = $2
`, column, column), hop, id)
return errors.Wrap(err, "UpdateSymbolHops")
}
func InsertSymbol(ctx context.Context, db dbutil.DB, hop CommitId, repoId int, path string, name string) (id int, err error) {
err = db.QueryRowContext(ctx, `
INSERT INTO rockskip_symbols (added, deleted, repo_id, path, name)
VALUES ($1 , $2 , $3 , $4 , $5 )
RETURNING id
`, pg.Array([]int{hop}), pg.Array([]int{}), repoId, path, name).Scan(&id)
return id, errors.Wrap(err, "InsertSymbol")
}
func AppendHop(ctx context.Context, db dbutil.DB, repoId int, hops []CommitId, givenStatus StatusAD, newHop CommitId) error {
column := statusADToColumn(givenStatus)
_, err := db.ExecContext(ctx, fmt.Sprintf(`
UPDATE rockskip_symbols
SET %s = array_append(%s, $1)
WHERE $2 && singleton_integer(repo_id) AND $3 && %s
`, column, column, column), newHop, pg.Array([]int{repoId}), pg.Array(hops))
return errors.Wrap(err, "AppendHop")
}
func DeleteRedundant(ctx context.Context, db dbutil.DB, hop CommitId) error {
_, err := db.ExecContext(ctx, `
UPDATE rockskip_symbols
SET added = array_remove(added, $1), deleted = array_remove(deleted, $1)
WHERE $2 && added AND $2 && deleted
`, hop, pg.Array([]int{hop}))
return errors.Wrap(err, "DeleteRedundant")
}
func tryDeleteOldestRepo(ctx context.Context, db *sql.Conn, maxRepos int, threadStatus *ThreadStatus) (more bool, err error) {
defer threadStatus.Tasklog.Continue("idle")
// Select a candidate repo to delete.
threadStatus.Tasklog.Start("select repo to delete")
var repoId int
var repo string
var repoRank int
err = db.QueryRowContext(ctx, `
SELECT id, repo, repo_rank
FROM (
SELECT *, RANK() OVER (ORDER BY last_accessed_at DESC) repo_rank
FROM rockskip_repos
) sub
WHERE repo_rank > $1
ORDER BY last_accessed_at ASC
LIMIT 1;`, maxRepos,
).Scan(&repoId, &repo, &repoRank)
if err == sql.ErrNoRows {
// No more repos to delete.
return false, nil
}
if err != nil {
return false, errors.Wrap(err, "selecting repo to delete")
}
// Note: a search request or deletion could have intervened here.
// Acquire the write lock on the repo.
releaseWLock, err := wLock(ctx, db, threadStatus, repo)
defer func() { err = errors.CombineErrors(err, releaseWLock()) }()
if err != nil {
return false, errors.Wrap(err, "acquiring write lock on repo")
}
// Make sure the repo is still old. See note above.
var rank int
threadStatus.Tasklog.Start("recheck repo rank")
err = db.QueryRowContext(ctx, `
SELECT repo_rank
FROM (
SELECT id, RANK() OVER (ORDER BY last_accessed_at DESC) repo_rank
FROM rockskip_repos
) sub
WHERE id = $1;`, repoId,
).Scan(&rank)
if err == sql.ErrNoRows {
// The repo was deleted in the meantime, so retry.
return true, nil
}
if err != nil {
return false, errors.Wrap(err, "selecting repo rank")
}
if rank <= maxRepos {
// An intervening search request must have refreshed the repo, so retry.
return true, nil
}
// Acquire the indexing lock on the repo.
releaseILock, err := iLock(ctx, db, threadStatus, repo)
defer func() { err = errors.CombineErrors(err, releaseILock()) }()
if err != nil {
return false, errors.Wrap(err, "acquiring indexing lock on repo")
}
// Delete the repo.
threadStatus.Tasklog.Start("delete repo")
tx, err := db.BeginTx(ctx, nil)
defer tx.Rollback()
if err != nil {
return false, err
}
_, err = tx.ExecContext(ctx, "DELETE FROM rockskip_ancestry WHERE repo_id = $1;", repoId)
if err != nil {
return false, err
}
_, err = tx.ExecContext(ctx, "DELETE FROM rockskip_symbols WHERE repo_id = $1;", pg.Array([]int{repoId}))
if err != nil {
return false, err
}
_, err = tx.ExecContext(ctx, "DELETE FROM rockskip_repos WHERE id = $1;", repoId)
if err != nil {
return false, err
}
err = tx.Commit()
if err != nil {
return false, err
}
return true, nil
}
func PrintInternals(ctx context.Context, db dbutil.DB) error {
fmt.Println("Commit ancestry:")
fmt.Println()
// print all rows in the rockskip_ancestry table
rows, err := db.QueryContext(ctx, `
SELECT a1.commit_id, a1.height, a2.commit_id
FROM rockskip_ancestry a1
JOIN rockskip_ancestry a2 ON a1.ancestor = a2.id
ORDER BY height ASC
`)
if err != nil {
return errors.Wrap(err, "PrintInternals")
}
defer rows.Close()
for rows.Next() {
var commit, ancestor string
var height int
err = rows.Scan(&commit, &height, &ancestor)
if err != nil {
return errors.Wrap(err, "PrintInternals: Scan")
}
fmt.Printf("height %3d commit %s ancestor %s\n", height, commit, ancestor)
}
fmt.Println()
fmt.Println("Symbols:")
fmt.Println()
rows, err = db.QueryContext(ctx, `
SELECT id, path, name, added, deleted
FROM rockskip_symbols
ORDER BY id ASC
`)
if err != nil {
return errors.Wrap(err, "PrintInternals")
}
for rows.Next() {
var id int
var path string
var name string
var added, deleted []int64
err = rows.Scan(&id, &path, &name, pg.Array(&added), pg.Array(&deleted))
if err != nil {
return errors.Wrap(err, "PrintInternals: Scan")
}
fmt.Printf(" id %d path %-10s symbol %s\n", id, path, name)
for _, a := range added {
hash, _, _, _, err := GetCommitById(ctx, db, int(a))
if err != nil {
return err
}
fmt.Printf(" + %-40s\n", hash)
}
fmt.Println()
for _, d := range deleted {
hash, _, _, _, err := GetCommitById(ctx, db, int(d))
if err != nil {
return err
}
fmt.Printf(" - %-40s\n", hash)
}
fmt.Println()
}
fmt.Println()
return nil
}
func updateLastAccessedAt(ctx context.Context, db dbutil.DB, repo string) (id int, err error) {
err = db.QueryRowContext(ctx, `
INSERT INTO rockskip_repos (repo, last_accessed_at)
VALUES ($1, now())
ON CONFLICT (repo)
DO UPDATE SET last_accessed_at = now()
RETURNING id
`, repo).Scan(&id)
if err != nil {
return 0, err
}
return id, nil
}
func statusADToColumn(status StatusAD) string {
switch status {
case AddedAD:
return "added"
case DeletedAD:
return "deleted"
default:
fmt.Println("unexpected status StatusAD: ", status)
return "unknown_status"
}
}
var RW_LOCKS_NAMESPACE = int32(fnv1.HashString32("symbols-rw"))
var INDEXING_LOCKS_NAMESPACE = int32(fnv1.HashString32("symbols-indexing"))
func lock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, namespace int32, name, repo, lockFn, unlockFn string) (func() error, error) {
key := int32(fnv1.HashString32(repo))
threadStatus.Tasklog.Start(name)
_, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, lockFn), namespace, key)
if err != nil {
return nil, errors.Newf("acquire %s: %s", name, err)
}
threadStatus.HoldLock(name)
release := func() error {
_, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, unlockFn), namespace, key)
if err != nil {
return errors.Newf("release %s: %s", name, err)
}
threadStatus.ReleaseLock(name)
return nil
}
return release, nil
}
func tryLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, namespace int32, name, repo, lockFn, unlockFn string) (bool, func() error, error) {
key := int32(fnv1.HashString32(repo))
threadStatus.Tasklog.Start(name)
locked, _, err := basestore.ScanFirstBool(db.QueryContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, lockFn), namespace, key))
if err != nil {
return false, nil, errors.Newf("try acquire %s: %s", name, err)
}
if !locked {
return false, nil, nil
}
threadStatus.HoldLock(name)
release := func() error {
_, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, unlockFn), namespace, key)
if err != nil {
return errors.Newf("release %s: %s", name, err)
}
threadStatus.ReleaseLock(name)
return nil
}
return true, release, nil
}
// tryRLock attempts to acquire a read lock on the repo.
func tryRLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (bool, func() error, error) {
return tryLock(ctx, db, threadStatus, RW_LOCKS_NAMESPACE, "rLock", repo, "pg_try_advisory_lock_shared", "pg_advisory_unlock_shared")
}
// wLock acquires the write lock on the repo. It blocks only when another connection holds a read or the
// write lock. That means a single connection can acquire the write lock while holding a read lock.
func wLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (func() error, error) {
return lock(ctx, db, threadStatus, RW_LOCKS_NAMESPACE, "wLock", repo, "pg_advisory_lock", "pg_advisory_unlock")
}
// iLock acquires the indexing lock on the repo.
func iLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (func() error, error) {
return lock(ctx, db, threadStatus, INDEXING_LOCKS_NAMESPACE, "iLock", repo, "pg_advisory_lock", "pg_advisory_unlock")
}

View File

@ -0,0 +1,469 @@
package rockskip
import (
"context"
"database/sql"
"database/sql/driver"
"fmt"
"strings"
"time"
"github.com/grafana/regexp"
"github.com/grafana/regexp/syntax"
"github.com/keegancsmith/sqlf"
pg "github.com/lib/pq"
"github.com/segmentio/fasthash/fnv1"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
func (s *Service) Search(ctx context.Context, args types.SearchArgs) (symbols []result.Symbol, err error) {
repo := string(args.Repo)
commitHash := string(args.CommitID)
threadStatus := s.status.NewThreadStatus(fmt.Sprintf("searching %+v", args))
if s.logQueries {
defer threadStatus.Tasklog.Print()
}
defer threadStatus.End()
// Acquire a read lock on the repo.
locked, releaseRLock, err := tryRLock(ctx, s.db, threadStatus, repo)
if err != nil {
return nil, err
}
defer func() { err = errors.CombineErrors(err, releaseRLock()) }()
if !locked {
return nil, errors.Newf("deletion in progress", repo)
}
// Insert or set the last_accessed_at column for this repo to now() in the rockskip_repos table.
threadStatus.Tasklog.Start("update last_accessed_at")
repoId, err := updateLastAccessedAt(ctx, s.db, repo)
if err != nil {
return nil, err
}
// Non-blocking send on repoUpdates to notify the background deletion goroutine.
select {
case s.repoUpdates <- struct{}{}:
default:
}
// Check if the commit has already been indexed, and if not then index it.
threadStatus.Tasklog.Start("check commit presence")
commit, _, present, err := GetCommitByHash(ctx, s.db, repoId, commitHash)
if err != nil {
return nil, err
} else if !present {
// Try to send an index request.
done, err := s.emitIndexRequest(repoCommit{repo: repo, commit: commitHash})
if err != nil {
return nil, err
}
// Wait for indexing to complete or the request to be canceled.
threadStatus.Tasklog.Start("awaiting indexing completion")
select {
case <-done:
threadStatus.Tasklog.Start("recheck commit presence")
commit, _, present, err = GetCommitByHash(ctx, s.db, repoId, commitHash)
if err != nil {
return nil, err
}
if !present {
return nil, errors.Newf("indexing failed, check server logs")
}
case <-ctx.Done():
return nil, ctx.Err()
}
}
// Finally search.
symbols, err = s.querySymbols(ctx, args, repoId, commit, threadStatus)
if err != nil {
return nil, err
}
return symbols, nil
}
func mkIsMatch(args types.SearchArgs) (func(string) bool, error) {
if !args.IsRegExp {
if args.IsCaseSensitive {
return func(symbol string) bool { return strings.Contains(symbol, args.Query) }, nil
} else {
return func(symbol string) bool {
return strings.Contains(strings.ToLower(symbol), strings.ToLower(args.Query))
}, nil
}
}
expr := args.Query
if !args.IsCaseSensitive {
expr = "(?i)" + expr
}
regex, err := regexp.Compile(expr)
if err != nil {
return nil, err
}
if args.IsCaseSensitive {
return func(symbol string) bool { return regex.MatchString(symbol) }, nil
} else {
return func(symbol string) bool { return regex.MatchString(strings.ToLower(symbol)) }, nil
}
}
func (s *Service) emitIndexRequest(rc repoCommit) (chan struct{}, error) {
key := fmt.Sprintf("%s@%s", rc.repo, rc.commit)
s.repoCommitToDoneMu.Lock()
if done, ok := s.repoCommitToDone[key]; ok {
s.repoCommitToDoneMu.Unlock()
return done, nil
}
done := make(chan struct{})
s.repoCommitToDone[key] = done
s.repoCommitToDoneMu.Unlock()
go func() {
<-done
s.repoCommitToDoneMu.Lock()
delete(s.repoCommitToDone, key)
s.repoCommitToDoneMu.Unlock()
}()
request := indexRequest{
repoCommit: repoCommit{
repo: rc.repo,
commit: rc.commit,
},
done: done}
// Route the index request to the indexer associated with the repo.
ix := int(fnv1.HashString32(rc.repo)) % len(s.indexRequestQueues)
select {
case s.indexRequestQueues[ix] <- request:
default:
return nil, errors.Newf("the indexing queue is full")
}
return done, nil
}
const DEFAULT_LIMIT = 100
func (s *Service) querySymbols(ctx context.Context, args types.SearchArgs, repoId int, commit int, threadStatus *ThreadStatus) ([]result.Symbol, error) {
hops, err := getHops(ctx, s.db, commit, threadStatus.Tasklog)
if err != nil {
return nil, err
}
// Drop the null commit.
hops = hops[:len(hops)-1]
limit := DEFAULT_LIMIT
if args.First > 0 {
limit = args.First
}
threadStatus.Tasklog.Start("run query")
q := sqlf.Sprintf(`
SELECT DISTINCT path
FROM rockskip_symbols
WHERE
%s && singleton_integer(repo_id)
AND %s && added
AND NOT %s && deleted
AND %s
LIMIT %s;`,
pg.Array([]int{repoId}),
pg.Array(hops),
pg.Array(hops),
convertSearchArgsToSqlQuery(args),
limit,
)
start := time.Now()
var rows *sql.Rows
rows, err = s.db.QueryContext(ctx, q.Query(sqlf.PostgresBindVar), q.Args()...)
duration := time.Since(start)
if err != nil {
return nil, errors.Wrap(err, "Search")
}
defer rows.Close()
isMatch, err := mkIsMatch(args)
if err != nil {
return nil, err
}
paths := []string{}
for rows.Next() {
var path string
err = rows.Scan(&path)
if err != nil {
return nil, errors.Wrap(err, "Search: Scan")
}
paths = append(paths, path)
}
stopErr := errors.New("stop iterating")
symbols := []result.Symbol{}
parse := s.createParser()
threadStatus.Tasklog.Start("ArchiveEach")
err = s.git.ArchiveEach(string(args.Repo), string(args.CommitID), paths, func(path string, contents []byte) error {
defer threadStatus.Tasklog.Continue("ArchiveEach")
threadStatus.Tasklog.Start("parse")
allSymbols, err := parse(path, contents)
if err != nil {
return err
}
for _, symbol := range allSymbols {
if isMatch(symbol.Name) {
symbols = append(symbols, result.Symbol{
Name: symbol.Name,
Path: path,
Line: symbol.Line,
Kind: symbol.Kind,
Parent: symbol.Parent,
})
if len(symbols) >= limit {
return stopErr
}
}
}
return nil
})
if err != nil && err != stopErr {
return nil, err
}
if s.logQueries {
err = logQuery(ctx, s.db, args, q, duration, len(symbols))
if err != nil {
return nil, errors.Wrap(err, "logQuery")
}
}
return symbols, nil
}
func logQuery(ctx context.Context, db dbutil.DB, args types.SearchArgs, q *sqlf.Query, duration time.Duration, symbols int) error {
sb := &strings.Builder{}
fmt.Fprintf(sb, "Search args: %+v\n", args)
fmt.Fprintln(sb, "Query:")
query, err := sqlfToString(q)
if err != nil {
return errors.Wrap(err, "sqlfToString")
}
fmt.Fprintln(sb, query)
fmt.Fprintln(sb, "EXPLAIN:")
explain, err := db.QueryContext(ctx, sqlf.Sprintf("EXPLAIN %s", q).Query(sqlf.PostgresBindVar), q.Args()...)
if err != nil {
return errors.Wrap(err, "EXPLAIN")
}
defer explain.Close()
for explain.Next() {
var plan string
err = explain.Scan(&plan)
if err != nil {
return errors.Wrap(err, "EXPLAIN Scan")
}
fmt.Fprintln(sb, plan)
}
fmt.Fprintf(sb, "%.2fms, %d symbols", float64(duration.Microseconds())/1000, symbols)
fmt.Println(" ")
fmt.Println(bracket(sb.String()))
fmt.Println(" ")
return nil
}
func bracket(text string) string {
lines := strings.Split(strings.TrimSpace(text), "\n")
for i, line := range lines {
if i == 0 {
lines[i] = "┌ " + line
} else if i == len(lines)-1 {
lines[i] = "└ " + line
} else {
lines[i] = "│ " + line
}
}
return strings.Join(lines, "\n")
}
func sqlfToString(q *sqlf.Query) (string, error) {
s := q.Query(sqlf.PostgresBindVar)
for i, arg := range q.Args() {
argString, err := argToString(arg)
if err != nil {
return "", err
}
s = strings.ReplaceAll(s, fmt.Sprintf("$%d", i+1), argString)
}
return s, nil
}
func argToString(arg interface{}) (string, error) {
switch arg := arg.(type) {
case string:
return fmt.Sprintf("'%s'", sqlEscapeQuotes(arg)), nil
case driver.Valuer:
value, err := arg.Value()
if err != nil {
return "", err
}
switch value := value.(type) {
case string:
return fmt.Sprintf("'%s'", sqlEscapeQuotes(value)), nil
case int:
return fmt.Sprintf("'%d'", value), nil
default:
return "", errors.Newf("unrecognized array type %T", value)
}
case int:
return fmt.Sprintf("%d", arg), nil
default:
return "", errors.Newf("unrecognized type %T", arg)
}
}
func sqlEscapeQuotes(s string) string {
return strings.ReplaceAll(s, "'", "''")
}
func convertSearchArgsToSqlQuery(args types.SearchArgs) *sqlf.Query {
// TODO support non regexp queries once the frontend supports it.
conjunctOrNils := []*sqlf.Query{}
// Query
conjunctOrNils = append(conjunctOrNils, regexMatch("name", "", args.Query, args.IsCaseSensitive))
// IncludePatterns
for _, includePattern := range args.IncludePatterns {
conjunctOrNils = append(conjunctOrNils, regexMatch("path", "path_prefixes(path)", includePattern, args.IsCaseSensitive))
}
// ExcludePattern
conjunctOrNils = append(conjunctOrNils, negate(regexMatch("path", "path_prefixes(path)", args.ExcludePattern, args.IsCaseSensitive)))
// Drop nils
conjuncts := []*sqlf.Query{}
for _, condition := range conjunctOrNils {
if condition != nil {
conjuncts = append(conjuncts, condition)
}
}
if len(conjuncts) == 0 {
return sqlf.Sprintf("TRUE")
}
return sqlf.Join(conjuncts, "AND")
}
func regexMatch(column, columnForLiteralPrefix, regex string, isCaseSensitive bool) *sqlf.Query {
if regex == "" || regex == "^" {
return nil
}
// Exact match optimization
if literal, ok, err := isLiteralEquality(regex); err == nil && ok && isCaseSensitive {
return sqlf.Sprintf(fmt.Sprintf("%%s = %s", column), literal)
}
// Prefix match optimization
if literal, ok, err := isLiteralPrefix(regex); err == nil && ok && isCaseSensitive && columnForLiteralPrefix != "" {
return sqlf.Sprintf(fmt.Sprintf("%%s && %s", columnForLiteralPrefix), pg.Array([]string{literal}))
}
// Regex match
operator := "~"
if !isCaseSensitive {
operator = "~*"
}
return sqlf.Sprintf(fmt.Sprintf("%s %s %%s", column, operator), regex)
}
// isLiteralEquality returns true if the given regex matches literal strings exactly.
// If so, this function returns true along with the literal search query. If not, this
// function returns false.
func isLiteralEquality(expr string) (string, bool, error) {
regexp, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return "", false, errors.Wrap(err, "regexp/syntax.Parse")
}
// want a concat of size 3 which is [begin, literal, end]
if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 3 {
// starts with ^
if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText {
// is a literal
if regexp.Sub[1].Op == syntax.OpLiteral {
// ends with $
if regexp.Sub[2].Op == syntax.OpEndLine || regexp.Sub[2].Op == syntax.OpEndText {
return string(regexp.Sub[1].Rune), true, nil
}
}
}
}
return "", false, nil
}
// isLiteralPrefix returns true if the given regex matches literal strings by prefix.
// If so, this function returns true along with the literal search query. If not, this
// function returns false.
func isLiteralPrefix(expr string) (string, bool, error) {
regexp, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return "", false, errors.Wrap(err, "regexp/syntax.Parse")
}
// want a concat of size 2 which is [begin, literal]
if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 2 {
// starts with ^
if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText {
// is a literal
if regexp.Sub[1].Op == syntax.OpLiteral {
return string(regexp.Sub[1].Rune), true, nil
}
}
}
return "", false, nil
}
func negate(query *sqlf.Query) *sqlf.Query {
if query == nil {
return nil
}
return sqlf.Sprintf("NOT %s", query)
}

View File

@ -0,0 +1,160 @@
package rockskip
import (
"context"
"database/sql"
"sync"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
type Symbol struct {
Name string `json:"name"`
Parent string `json:"parent"`
Kind string `json:"kind"`
Line int `json:"line"`
}
type ParseSymbolsFunc func(path string, bytes []byte) (symbols []Symbol, err error)
const NULL CommitId = 0
type Service struct {
db *sql.DB
git Git
createParser func() ParseSymbolsFunc
status *ServiceStatus
repoUpdates chan struct{}
maxRepos int
logQueries bool
repoCommitToDone map[string]chan struct{}
repoCommitToDoneMu sync.Mutex
indexRequestQueues []chan indexRequest
symbolsCacheSize int
pathSymbolsCacheSize int
}
func NewService(
db *sql.DB,
git Git,
createParser func() ParseSymbolsFunc,
maxConcurrentlyIndexing int,
maxRepos int,
logQueries bool,
indexRequestsQueueSize int,
symbolsCacheSize int,
pathSymbolsCacheSize int,
) (*Service, error) {
indexRequestQueues := make([]chan indexRequest, maxConcurrentlyIndexing)
for i := 0; i < maxConcurrentlyIndexing; i++ {
indexRequestQueues[i] = make(chan indexRequest, indexRequestsQueueSize)
}
service := &Service{
db: db,
git: git,
createParser: createParser,
status: NewStatus(),
repoUpdates: make(chan struct{}, 1),
maxRepos: maxRepos,
logQueries: logQueries,
repoCommitToDone: map[string]chan struct{}{},
repoCommitToDoneMu: sync.Mutex{},
indexRequestQueues: indexRequestQueues,
symbolsCacheSize: symbolsCacheSize,
pathSymbolsCacheSize: pathSymbolsCacheSize,
}
go service.startCleanupLoop()
for i := 0; i < maxConcurrentlyIndexing; i++ {
go service.startIndexingLoop(service.indexRequestQueues[i])
}
return service, nil
}
func (s *Service) startIndexingLoop(indexRequestQueue chan indexRequest) {
for indexRequest := range indexRequestQueue {
err := s.Index(context.Background(), indexRequest.repo, indexRequest.commit)
close(indexRequest.done)
if err != nil {
log15.Error("indexing error", "repo", indexRequest.repo, "commit", indexRequest.commit, "err", err)
}
}
}
func (s *Service) startCleanupLoop() {
for range s.repoUpdates {
threadStatus := s.status.NewThreadStatus("cleanup")
err := DeleteOldRepos(context.Background(), s.db, s.maxRepos, threadStatus)
threadStatus.End()
if err != nil {
log15.Error("Failed to delete old repos", "error", err)
}
}
}
func getHops(ctx context.Context, tx dbutil.DB, commit int, tasklog *TaskLog) ([]int, error) {
tasklog.Start("get hops")
current := commit
spine := []int{current}
for {
_, ancestor, _, present, err := GetCommitById(ctx, tx, current)
if err != nil {
return nil, errors.Wrap(err, "GetCommitById")
} else if !present {
break
} else {
if current == NULL {
break
}
current = ancestor
spine = append(spine, current)
}
}
return spine, nil
}
func DeleteOldRepos(ctx context.Context, db *sql.DB, maxRepos int, threadStatus *ThreadStatus) error {
// Get a fresh connection from the DB pool to get deterministic "lock stacking" behavior.
// See doc/dev/background-information/sql/locking_behavior.md for more details.
conn, err := db.Conn(context.Background())
if err != nil {
return errors.Wrap(err, "failed to get connection for deleting old repos")
}
defer conn.Close()
// Keep deleting repos until we're back to at most maxRepos.
for {
more, err := tryDeleteOldestRepo(ctx, conn, maxRepos, threadStatus)
if err != nil {
return err
}
if !more {
return nil
}
}
}
// Ruler sequence
//
// input : 0, 1, 2, 3, 4, 5, 6, 7, 8, ...
// output: 0, 0, 1, 0, 2, 0, 1, 0, 3, ...
//
// https://oeis.org/A007814
func ruler(n int) int {
if n == 0 {
return 0
}
if n%2 != 0 {
return 0
}
return 1 + ruler(n/2)
}

View File

@ -0,0 +1,309 @@
package rockskip
import (
"bufio"
"context"
"fmt"
"io"
"os"
"os/exec"
"path"
"sort"
"strconv"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/database/dbtest"
"github.com/sourcegraph/sourcegraph/lib/errors"
)
// simpleParse converts each line into a symbol.
func simpleParse(path string, bytes []byte) ([]Symbol, error) {
symbols := []Symbol{}
for _, line := range strings.Split(string(bytes), "\n") {
if line == "" {
continue
}
symbols = append(symbols, Symbol{Name: line})
}
return symbols, nil
}
func TestIndex(t *testing.T) {
fatalIfError := func(err error, message string) {
if err != nil {
t.Fatal(errors.Wrap(err, message))
}
}
gitDir, err := os.MkdirTemp("", "rockskip-test-index")
fatalIfError(err, "faiMkdirTemp")
t.Cleanup(func() {
if t.Failed() {
t.Logf("git dir %s left intact for inspection", gitDir)
} else {
os.RemoveAll(gitDir)
}
})
gitCmd := func(args ...string) *exec.Cmd {
cmd := exec.Command("git", args...)
cmd.Dir = gitDir
return cmd
}
gitRun := func(args ...string) {
fatalIfError(gitCmd(args...).Run(), "git "+strings.Join(args, " "))
}
gitStdout := func(args ...string) string {
stdout, err := gitCmd(args...).Output()
fatalIfError(err, "git "+strings.Join(args, " "))
return string(stdout)
}
getHead := func() string {
return strings.TrimSpace(gitStdout("rev-parse", "HEAD"))
}
state := map[string][]string{}
add := func(filename string, contents string) {
fatalIfError(os.WriteFile(path.Join(gitDir, filename), []byte(contents), 0644), "os.WriteFile")
gitRun("add", filename)
symbols, err := simpleParse(filename, []byte(contents))
fatalIfError(err, "simpleParse")
state[filename] = []string{}
for _, symbol := range symbols {
state[filename] = append(state[filename], symbol.Name)
}
}
rm := func(filename string) {
gitRun("rm", filename)
delete(state, filename)
}
gitRun("init")
git, err := NewSubprocessGit(gitDir)
fatalIfError(err, "NewSubprocessGit")
defer git.Close()
db := dbtest.NewDB(t)
defer db.Close()
createParser := func() ParseSymbolsFunc { return simpleParse }
service, err := NewService(db, git, createParser, 1, 1, false, 1, 1, 1)
fatalIfError(err, "NewService")
verifyBlobs := func() {
repo := "somerepo"
commit := getHead()
args := types.SearchArgs{Repo: api.RepoName(repo), CommitID: api.CommitID(commit), Query: ""}
symbols, err := service.Search(context.Background(), args)
fatalIfError(err, "Search")
// Make sure the paths match.
gotPathSet := map[string]struct{}{}
for _, blob := range symbols {
gotPathSet[blob.Path] = struct{}{}
}
gotPaths := []string{}
for path := range gotPathSet {
gotPaths = append(gotPaths, path)
}
wantPaths := []string{}
for path := range state {
wantPaths = append(wantPaths, path)
}
sort.Strings(gotPaths)
sort.Strings(wantPaths)
if diff := cmp.Diff(gotPaths, wantPaths); diff != "" {
fmt.Println("unexpected paths (-got +want)")
fmt.Println(diff)
err = PrintInternals(context.Background(), db)
fatalIfError(err, "PrintInternals")
t.FailNow()
}
gotPathToSymbols := map[string][]string{}
for _, blob := range symbols {
gotPathToSymbols[blob.Path] = append(gotPathToSymbols[blob.Path], blob.Name)
}
// Make sure the symbols match.
for path, gotSymbols := range gotPathToSymbols {
wantSymbols := state[path]
sort.Strings(gotSymbols)
sort.Strings(wantSymbols)
if diff := cmp.Diff(gotSymbols, wantSymbols); diff != "" {
fmt.Println("unexpected symbols (-got +want)")
fmt.Println(diff)
err = PrintInternals(context.Background(), db)
fatalIfError(err, "PrintInternals")
t.FailNow()
}
}
}
commit := func(message string) {
gitRun("commit", "--allow-empty", "-m", message)
verifyBlobs()
}
add("a.txt", "sym1\n")
commit("add a file with 1 symbol")
add("b.txt", "sym1\n")
commit("add another file with 1 symbol")
add("c.txt", "sym1\nsym2")
commit("add another file with 2 symbols")
add("a.txt", "sym1\nsym2")
commit("add a symbol to a.txt")
commit("empty")
rm("a.txt")
commit("rm a.txt")
}
type SubprocessGit struct {
gitDir string
catFileCmd *exec.Cmd
catFileStdin io.WriteCloser
catFileStdout bufio.Reader
}
func NewSubprocessGit(gitDir string) (*SubprocessGit, error) {
cmd := exec.Command("git", "cat-file", "--batch")
cmd.Dir = gitDir
stdin, err := cmd.StdinPipe()
if err != nil {
return nil, err
}
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
err = cmd.Start()
if err != nil {
return nil, err
}
return &SubprocessGit{
gitDir: gitDir,
catFileCmd: cmd,
catFileStdin: stdin,
catFileStdout: *bufio.NewReader(stdout),
}, nil
}
func (git SubprocessGit) Close() error {
err := git.catFileStdin.Close()
if err != nil {
return err
}
return git.catFileCmd.Wait()
}
func (git SubprocessGit) LogReverseEach(repo string, givenCommit string, n int, onLogEntry func(entry LogEntry) error) (returnError error) {
log := exec.Command("git", LogReverseArgs(n, givenCommit)...)
log.Dir = git.gitDir
output, err := log.StdoutPipe()
if err != nil {
return err
}
err = log.Start()
if err != nil {
return err
}
defer func() {
err = log.Wait()
if err != nil {
returnError = err
}
}()
return ParseLogReverseEach(output, onLogEntry)
}
func (git SubprocessGit) RevListEach(repo string, givenCommit string, onCommit func(commit string) (shouldContinue bool, err error)) (returnError error) {
revList := exec.Command("git", RevListArgs(givenCommit)...)
revList.Dir = git.gitDir
output, err := revList.StdoutPipe()
if err != nil {
return err
}
err = revList.Start()
if err != nil {
return err
}
defer func() {
err = revList.Wait()
if err != nil {
returnError = err
}
}()
return RevListEach(output, onCommit)
}
func (git SubprocessGit) ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error {
for _, path := range paths {
_, err := git.catFileStdin.Write([]byte(fmt.Sprintf("%s:%s\n", commit, path)))
if err != nil {
return errors.Wrap(err, "writing to cat-file stdin")
}
line, err := git.catFileStdout.ReadString('\n')
if err != nil {
return errors.Wrap(err, "read newline")
}
line = line[:len(line)-1] // Drop the trailing newline
parts := strings.Split(line, " ")
if len(parts) != 3 {
return errors.Newf("unexpected cat-file output: %q", line)
}
size, err := strconv.ParseInt(parts[2], 10, 64)
if err != nil {
return errors.Wrap(err, "parse size")
}
fileContents, err := io.ReadAll(io.LimitReader(&git.catFileStdout, size))
if err != nil {
return errors.Wrap(err, "read contents")
}
discarded, err := git.catFileStdout.Discard(1) // Discard the trailing newline
if err != nil {
return errors.Wrap(err, "discard newline")
}
if discarded != 1 {
return errors.Newf("expected to discard 1 byte, but discarded %d", discarded)
}
err = onFile(path, fileContents)
if err != nil {
return errors.Wrap(err, "onFile")
}
}
return nil
}

View File

@ -0,0 +1,312 @@
package rockskip
import (
"fmt"
"net/http"
"sort"
"strings"
"sync"
"time"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
)
// RequestId is a unique int for each HTTP request.
type RequestId = int
// ServiceStatus contains the status of all requests.
type ServiceStatus struct {
threadIdToThreadStatus map[RequestId]*ThreadStatus
nextThreadId RequestId
mu sync.Mutex
}
func NewStatus() *ServiceStatus {
return &ServiceStatus{
threadIdToThreadStatus: map[int]*ThreadStatus{},
nextThreadId: 0,
mu: sync.Mutex{},
}
}
func (s *ServiceStatus) NewThreadStatus(name string) *ThreadStatus {
s.mu.Lock()
defer s.mu.Unlock()
threadId := s.nextThreadId
s.nextThreadId++
threadStatus := NewThreadStatus(name, func() {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.threadIdToThreadStatus, threadId)
})
s.threadIdToThreadStatus[threadId] = threadStatus
return threadStatus
}
func (s *Service) HandleStatus(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
repositoryCount, _, err := basestore.ScanFirstInt(s.db.QueryContext(ctx, "SELECT COUNT(*) FROM rockskip_repos"))
if err != nil {
log15.Error("Failed to count repos", "error", err)
w.WriteHeader(http.StatusInternalServerError)
return
}
type repoRow struct {
repo string
lastAccessedAt time.Time
}
repoRows := []repoRow{}
repoSqlRows, err := s.db.QueryContext(ctx, "SELECT repo, last_accessed_at FROM rockskip_repos ORDER BY last_accessed_at DESC LIMIT 5")
if err != nil {
log15.Error("Failed to list repoRows", "error", err)
w.WriteHeader(http.StatusInternalServerError)
return
}
defer repoSqlRows.Close()
for repoSqlRows.Next() {
var repo string
var lastAccessedAt time.Time
if err := repoSqlRows.Scan(&repo, &lastAccessedAt); err != nil {
log15.Error("Failed to scan repo", "error", err)
w.WriteHeader(http.StatusInternalServerError)
return
}
repoRows = append(repoRows, repoRow{repo: repo, lastAccessedAt: lastAccessedAt})
}
symbolsSize, _, err := basestore.ScanFirstString(s.db.QueryContext(ctx, "SELECT pg_size_pretty(pg_total_relation_size('rockskip_symbols'))"))
if err != nil {
log15.Error("Failed to get size of symbols table", "error", err)
w.WriteHeader(http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusOK)
fmt.Fprintln(w, "This is the symbols service status page.")
fmt.Fprintln(w, "")
fmt.Fprintf(w, "Number of repositories: %d\n", repositoryCount)
fmt.Fprintf(w, "Size of symbols table: %s\n", symbolsSize)
fmt.Fprintln(w, "")
if repositoryCount > 0 {
fmt.Fprintf(w, "Most recently searched repositories (at most 5 shown)\n")
for _, repo := range repoRows {
fmt.Fprintf(w, " %s %s\n", repo.lastAccessedAt, repo.repo)
}
fmt.Fprintln(w, "")
}
s.status.mu.Lock()
defer s.status.mu.Unlock()
if len(s.status.threadIdToThreadStatus) == 0 {
fmt.Fprintln(w, "No requests in flight.")
return
}
fmt.Fprintln(w, "Here are all in-flight requests:")
fmt.Fprintln(w, "")
ids := []int{}
for id := range s.status.threadIdToThreadStatus {
ids = append(ids, id)
}
sort.Ints(ids)
for _, id := range ids {
status := s.status.threadIdToThreadStatus[id]
status.WithLock(func() {
fmt.Fprintf(w, "%s\n", status.Name)
if status.Total > 0 {
progress := float64(status.Indexed) / float64(status.Total)
remaining := "unknown"
if progress != 0 {
total := status.Tasklog.TotalDuration()
remaining = fmt.Sprint(time.Duration(total.Seconds()/progress)*time.Second - total)
}
fmt.Fprintf(w, " progress %.2f%% (indexed %d of %d commits), %s remaining\n", progress*100, status.Indexed, status.Total, remaining)
}
fmt.Fprintf(w, " %s\n", status.Tasklog)
locks := []string{}
for lock := range status.HeldLocks {
locks = append(locks, lock)
}
sort.Strings(locks)
for _, lock := range locks {
fmt.Fprintf(w, " holding %s\n", lock)
}
fmt.Fprintln(w)
})
}
}
type ThreadStatus struct {
Tasklog *TaskLog
Name string
HeldLocks map[string]struct{}
Indexed int
Total int
mu sync.Mutex
onEnd func()
}
func NewThreadStatus(name string, onEnd func()) *ThreadStatus {
return &ThreadStatus{
Tasklog: NewTaskLog(),
Name: name,
HeldLocks: map[string]struct{}{},
Indexed: -1,
Total: -1,
mu: sync.Mutex{},
onEnd: onEnd,
}
}
func (s *ThreadStatus) WithLock(f func()) {
s.mu.Lock()
defer s.mu.Unlock()
f()
}
func (s *ThreadStatus) SetProgress(indexed, total int) {
s.WithLock(func() { s.Indexed = indexed; s.Total = total })
}
func (s *ThreadStatus) HoldLock(name string) { s.WithLock(func() { s.HeldLocks[name] = struct{}{} }) }
func (s *ThreadStatus) ReleaseLock(name string) { s.WithLock(func() { delete(s.HeldLocks, name) }) }
func (s *ThreadStatus) End() {
if s.onEnd != nil {
s.mu.Lock()
defer s.mu.Unlock()
s.onEnd()
}
}
type TaskLog struct {
currentName string
currentStart time.Time
nameToTask map[string]*Task
// This mutex is only necessary to synchronize with the status page handler.
mu sync.Mutex
}
type Task struct {
Duration time.Duration
Count int
}
func NewTaskLog() *TaskLog {
return &TaskLog{
currentName: "idle",
currentStart: time.Now(),
nameToTask: map[string]*Task{"idle": {Duration: 0, Count: 1}},
mu: sync.Mutex{},
}
}
func (t *TaskLog) Start(name string) {
t.mu.Lock()
defer t.mu.Unlock()
now := time.Now()
if _, ok := t.nameToTask[t.currentName]; !ok {
t.nameToTask[t.currentName] = &Task{Duration: 0, Count: 0}
}
t.nameToTask[t.currentName].Duration += now.Sub(t.currentStart)
if _, ok := t.nameToTask[name]; !ok {
t.nameToTask[name] = &Task{Duration: 0, Count: 0}
}
t.nameToTask[name].Count += 1
t.currentName = name
t.currentStart = now
}
func (t *TaskLog) Continue(name string) {
t.mu.Lock()
defer t.mu.Unlock()
now := time.Now()
if _, ok := t.nameToTask[t.currentName]; !ok {
t.nameToTask[t.currentName] = &Task{Duration: 0, Count: 0}
}
t.nameToTask[t.currentName].Duration += now.Sub(t.currentStart)
if _, ok := t.nameToTask[name]; !ok {
t.nameToTask[name] = &Task{Duration: 0, Count: 0}
}
t.currentName = name
t.currentStart = now
}
func (t *TaskLog) Reset() {
t.mu.Lock()
defer t.mu.Unlock()
t.currentName = "idle"
t.currentStart = time.Now()
t.nameToTask = map[string]*Task{"idle": {Duration: 0, Count: 1}}
}
func (t *TaskLog) Print() {
fmt.Println(t)
}
func (t *TaskLog) String() string {
var s strings.Builder
t.Continue(t.currentName)
t.mu.Lock()
defer t.mu.Unlock()
var total time.Duration = 0
totalCount := 0
for _, task := range t.nameToTask {
total += task.Duration
totalCount += task.Count
}
fmt.Fprintf(&s, "Tasks (%.2fs total, current %s): ", total.Seconds(), t.currentName)
type kv struct {
Key string
Value *Task
}
var kvs []kv
for k, v := range t.nameToTask {
kvs = append(kvs, kv{k, v})
}
sort.Slice(kvs, func(i, j int) bool {
return kvs[i].Value.Duration > kvs[j].Value.Duration
})
for _, kv := range kvs {
fmt.Fprintf(&s, "%s %.2f%% %dx, ", kv.Key, kv.Value.Duration.Seconds()*100/total.Seconds(), kv.Value.Count)
}
return s.String()
}
func (t *TaskLog) TotalDuration() time.Duration {
t.Continue(t.currentName)
var total time.Duration = 0
for _, task := range t.nameToTask {
total += task.Duration
}
return total
}

2
go.mod
View File

@ -366,7 +366,7 @@ require (
gopkg.in/warnings.v0 v0.1.2 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
k8s.io/kube-openapi v0.0.0-20220124234850-424119656bbf // indirect
k8s.io/utils v0.0.0-20220127004650-9b3446523e65 // indirect
k8s.io/utils v0.0.0-20220127004650-9b3446523e65
mvdan.cc/gofumpt v0.2.1 // indirect
sigs.k8s.io/yaml v1.3.0
)

View File

@ -742,3 +742,51 @@ Indexes:
"migration_logs_pkey" PRIMARY KEY, btree (id)
```
# Table "public.rockskip_ancestry"
```
Column | Type | Collation | Nullable | Default
-----------+-----------------------+-----------+----------+-----------------------------------------------
id | integer | | not null | nextval('rockskip_ancestry_id_seq'::regclass)
repo_id | integer | | not null |
commit_id | character varying(40) | | not null |
height | integer | | not null |
ancestor | integer | | not null |
Indexes:
"rockskip_ancestry_pkey" PRIMARY KEY, btree (id)
"rockskip_ancestry_repo_id_commit_id_key" UNIQUE CONSTRAINT, btree (repo_id, commit_id)
"rockskip_ancestry_repo_commit_id" btree (repo_id, commit_id)
```
# Table "public.rockskip_repos"
```
Column | Type | Collation | Nullable | Default
------------------+--------------------------+-----------+----------+--------------------------------------------
id | integer | | not null | nextval('rockskip_repos_id_seq'::regclass)
repo | text | | not null |
last_accessed_at | timestamp with time zone | | not null |
Indexes:
"rockskip_repos_pkey" PRIMARY KEY, btree (id)
"rockskip_repos_repo_key" UNIQUE CONSTRAINT, btree (repo)
"rockskip_repos_last_accessed_at" btree (last_accessed_at)
"rockskip_repos_repo" btree (repo)
```
# Table "public.rockskip_symbols"
```
Column | Type | Collation | Nullable | Default
---------+-----------+-----------+----------+----------------------------------------------
id | integer | | not null | nextval('rockskip_symbols_id_seq'::regclass)
added | integer[] | | not null |
deleted | integer[] | | not null |
repo_id | integer | | not null |
path | text | | not null |
name | text | | not null |
Indexes:
"rockskip_symbols_pkey" PRIMARY KEY, btree (id)
"rockskip_symbols_gin" gin (singleton_integer(repo_id) gin__int_ops, added gin__int_ops, deleted gin__int_ops, singleton(path), path_prefixes(path), singleton(name), name gin_trgm_ops)
"rockskip_symbols_repo_id_path_name" btree (repo_id, path, name)
```

View File

@ -0,0 +1,3 @@
DROP TABLE IF EXISTS rockskip_ancestry;
DROP TABLE IF EXISTS rockskip_symbols;
DROP TABLE IF EXISTS rockskip_repos;

View File

@ -0,0 +1,2 @@
name: 'rockskip'
parent: 1000000031

View File

@ -0,0 +1,71 @@
CREATE TABLE IF NOT EXISTS rockskip_repos (
id SERIAL PRIMARY KEY,
repo TEXT NOT NULL,
last_accessed_at TIMESTAMP WITH TIME ZONE NOT NULL,
UNIQUE (repo)
);
CREATE TABLE IF NOT EXISTS rockskip_ancestry (
id SERIAL PRIMARY KEY,
repo_id INTEGER NOT NULL,
commit_id VARCHAR(40) NOT NULL,
height INTEGER NOT NULL,
ancestor INTEGER NOT NULL,
UNIQUE (repo_id, commit_id)
);
-- Insert the null commit. repo_id 0 will not conflict with other repos because SERIAL's MINVALUE
-- defaults to 1.
INSERT INTO rockskip_ancestry
(id, commit_id , repo_id , height, ancestor)
VALUES (0 , '0000000000000000000000000000000000000000', 0 , 0 , 0 )
ON CONFLICT DO NOTHING;
CREATE TABLE IF NOT EXISTS rockskip_symbols (
-- Globally unique ID of this instance of the symbol.
id SERIAL PRIMARY KEY,
added INTEGER[] NOT NULL,
deleted INTEGER[] NOT NULL,
-- Since we only support searching by symbol name and we re-parse the file at query time, symbols
-- with the same name in the same file only need to be stored once. Upon re-parsing the file at query
-- time we will discover all symbols that match.
repo_id INTEGER NOT NULL,
path TEXT NOT NULL,
name TEXT NOT NULL
);
CREATE OR REPLACE FUNCTION singleton(value TEXT) RETURNS TEXT[] AS $$ BEGIN
RETURN ARRAY[value];
END; $$ IMMUTABLE language plpgsql;
CREATE OR REPLACE FUNCTION singleton_integer(value INTEGER) RETURNS INTEGER[] AS $$ BEGIN
RETURN ARRAY[value];
END; $$ IMMUTABLE language plpgsql;
CREATE OR REPLACE FUNCTION path_prefixes(path TEXT) RETURNS TEXT[] AS $$ BEGIN
RETURN (
SELECT array_agg(array_to_string(components[:len], '/')) prefixes
FROM
(SELECT regexp_split_to_array(path, E'/') components) t,
generate_series(1, array_length(components, 1)) AS len
);
END; $$ IMMUTABLE language plpgsql;
CREATE INDEX IF NOT EXISTS rockskip_repos_repo ON rockskip_repos(repo);
CREATE INDEX IF NOT EXISTS rockskip_repos_last_accessed_at ON rockskip_repos(last_accessed_at);
CREATE INDEX IF NOT EXISTS rockskip_ancestry_repo_commit_id ON rockskip_ancestry(repo_id, commit_id);
CREATE INDEX IF NOT EXISTS rockskip_symbols_repo_id_path_name ON rockskip_symbols(repo_id, path, name);
CREATE INDEX IF NOT EXISTS rockskip_symbols_gin ON rockskip_symbols USING GIN (
singleton_integer(repo_id) gin__int_ops,
added gin__int_ops,
deleted gin__int_ops,
singleton(path),
path_prefixes(path),
singleton(name),
name gin_trgm_ops
);

View File

@ -245,11 +245,6 @@ commands:
symbols:
cmd: .bin/symbols
install: |
# Remove old pcre libs that might still be lying around.
# TODO delete these two lines after 2021-10-24 (1 month after removal of pcre).
rm -f libsqlite3-pcre.dylib || true
rm -f libsqlite3-pcre.so || true
if [ -n "$DELVE" ]; then
export GCFLAGS='all=-N -l'
fi
@ -265,6 +260,27 @@ commands:
- internal
- cmd/symbols
enterprise-symbols:
cmd: .bin/enterprise-symbols
install: |
if [ -n "$DELVE" ]; then
export GCFLAGS='all=-N -l'
fi
./cmd/symbols/build-ctags.sh &&
go build -gcflags="$GCFLAGS" -o .bin/enterprise-symbols github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols
checkBinary: .bin/enterprise-symbols
env:
CTAGS_COMMAND: cmd/symbols/universal-ctags-dev
CTAGS_PROCESSES: 2
USE_ROCKSKIP: 'false'
watch:
- lib
- internal
- cmd/symbols
- enterprise/cmd/symbols
- enterprise/internal/rockskip
searcher:
cmd: .bin/searcher
install: |
@ -425,8 +441,8 @@ commands:
- enterprise/internal
- lib/codeintel
executor-template: &executor_template
# TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`.
executor-template:
&executor_template # TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`.
cmd: |
env TMPDIR="$HOME/.sourcegraph/executor-temp" .bin/executor
install: |
@ -450,7 +466,7 @@ commands:
env TMPDIR="$HOME/.sourcegraph/indexer-temp" .bin/executor
env:
EXECUTOR_QUEUE_NAME: codeintel
SRC_PROF_HTTP: ":6092"
SRC_PROF_HTTP: ':6092'
batches-executor:
<<: *executor_template
@ -459,7 +475,7 @@ commands:
env:
EXECUTOR_QUEUE_NAME: batches
EXECUTOR_MAXIMUM_NUM_JOBS: 8
SRC_PROF_HTTP: ":6093"
SRC_PROF_HTTP: ':6093'
# If you want to use this, either start it with `sg run batches-executor-firecracker` or
# modify the `commandsets.batches` in your local `sg.config.overwrite.yaml`
@ -472,7 +488,7 @@ commands:
env:
EXECUTOR_USE_FIRECRACKER: true
EXECUTOR_QUEUE_NAME: batches
SRC_PROF_HTTP: ":6093"
SRC_PROF_HTTP: ':6093'
minio:
cmd: |
@ -604,8 +620,8 @@ commands:
CONTAINER: grafana
PORT: 3370
# docker containers must access things via docker host on non-linux platforms
DOCKER_USER: ""
ADD_HOST_FLAG: ""
DOCKER_USER: ''
ADD_HOST_FLAG: ''
CACHE: false
watch:
- monitoring
@ -655,11 +671,11 @@ commands:
CONTAINER: prometheus
PORT: 9090
CONFIG_DIR: docker-images/prometheus/config
DOCKER_USER: ""
DOCKER_NET: ""
DOCKER_USER: ''
DOCKER_NET: ''
PROM_TARGETS: dev/prometheus/all/prometheus_targets.yml
SRC_FRONTEND_INTERNAL: host.docker.internal:3090
ADD_HOST_FLAG: ""
ADD_HOST_FLAG: ''
DISABLE_SOURCEGRAPH_CONFIG: false
postgres_exporter:
@ -686,7 +702,7 @@ commands:
docker pull index.docker.io/grafana/loki:$LOKI_VERSION
env:
LOKI_DISK: $HOME/.sourcegraph-dev/data/loki
LOKI_VERSION: "2.3.0"
LOKI_VERSION: '2.3.0'
LOKI_LOG_FILE: $HOME/.sourcegraph-dev/logs/loki/loki.log
storybook:
@ -772,7 +788,7 @@ commandsets:
- enterprise-web
- gitserver
- searcher
- symbols
- enterprise-symbols
- caddy
- docsite
- syntax-highlighter