mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 15:31:48 +00:00
codeintel: Rockskip for symbols (#28719)
This commit is contained in:
parent
103572c4d8
commit
7a6a2a062b
@ -1,59 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/env"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
env.BaseConfig
|
||||
|
||||
ctagsCommand string
|
||||
ctagsPatternLengthLimit int
|
||||
ctagsLogErrors bool
|
||||
ctagsDebugLogs bool
|
||||
|
||||
sanityCheck bool
|
||||
cacheDir string
|
||||
cacheSizeMB int
|
||||
numCtagsProcesses int
|
||||
requestBufferSize int
|
||||
processingTimeout time.Duration
|
||||
|
||||
// The maximum sum of lengths of all paths in a single call to git archive. Without this limit, we
|
||||
// could hit the error "argument list too long" by exceeding the limit on the number of arguments to
|
||||
// a command enforced by the OS.
|
||||
//
|
||||
// Mac : getconf ARG_MAX returns 1,048,576
|
||||
// Linux: getconf ARG_MAX returns 2,097,152
|
||||
//
|
||||
// We want to remain well under that limit, so defaulting to 100,000 seems safe (see the
|
||||
// MAX_TOTAL_PATHS_LENGTH environment variable below).
|
||||
maxTotalPathsLength int
|
||||
}
|
||||
|
||||
var config = &Config{}
|
||||
|
||||
// Load reads from the environment and stores the transformed data on the config object for later retrieval.
|
||||
func (c *Config) Load() {
|
||||
c.ctagsCommand = c.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)")
|
||||
c.ctagsPatternLengthLimit = c.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags")
|
||||
logCtagsErrorsDefault := "false"
|
||||
if os.Getenv("DEPLOY_TYPE") == "dev" {
|
||||
logCtagsErrorsDefault = "true"
|
||||
}
|
||||
c.ctagsLogErrors = c.GetBool("LOG_CTAGS_ERRORS", logCtagsErrorsDefault, "log ctags errors")
|
||||
c.ctagsDebugLogs = false
|
||||
|
||||
c.sanityCheck = c.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not")
|
||||
c.cacheDir = c.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols")
|
||||
c.cacheSizeMB = c.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)")
|
||||
c.numCtagsProcesses = c.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run")
|
||||
c.requestBufferSize = c.GetInt("REQUEST_BUFFER_SIZE", "8192", "maximum size of buffered parser request channel")
|
||||
c.processingTimeout = c.GetInterval("PROCESSING_TIMEOUT", "2h", "maximum time to spend processing a repository")
|
||||
c.maxTotalPathsLength = c.GetInt("MAX_TOTAL_PATHS_LENGTH", "100000", "maximum sum of lengths of all paths in a single call to git archive")
|
||||
}
|
||||
@ -3,10 +3,8 @@
|
||||
# This script installs ctags within an alpine container.
|
||||
|
||||
# Commit hash of github.com/universal-ctags/ctags.
|
||||
# Last bumped 2022-02-28
|
||||
# This version includes a fix that hasn't landed on master yet:
|
||||
# https://github.com/universal-ctags/ctags/pull/3300
|
||||
CTAGS_VERSION=90a16c009c52a35578140c6c731bcd5faa104f11
|
||||
# Last bumped 2022-02-10
|
||||
CTAGS_VERSION=37a4b3601288bcdc02a387197ff8d9b971f7ab34
|
||||
|
||||
cleanup() {
|
||||
apk --no-cache --purge del ctags-build-deps || true
|
||||
|
||||
3
cmd/symbols/fetcher/gen.go
Normal file
3
cmd/symbols/fetcher/gen.go
Normal file
@ -0,0 +1,3 @@
|
||||
package fetcher
|
||||
|
||||
//go:generate ../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver -i GitServerClient -o mock_iface_test.go
|
||||
@ -7,14 +7,14 @@ import (
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
|
||||
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
api "github.com/sourcegraph/sourcegraph/internal/api"
|
||||
)
|
||||
|
||||
// MockGitserverClient is a mock implementation of the GitserverClient
|
||||
// interface (from the package
|
||||
// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used
|
||||
// for unit testing.
|
||||
// github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver) used for unit
|
||||
// testing.
|
||||
type MockGitserverClient struct {
|
||||
// FetchTarFunc is an instance of a mock function object controlling the
|
||||
// behavior of the method FetchTar.
|
||||
@ -2,16 +2,14 @@ package fetcher
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/opentracing/opentracing-go/log"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
@ -22,7 +20,6 @@ type RepositoryFetcher interface {
|
||||
|
||||
type repositoryFetcher struct {
|
||||
gitserverClient gitserver.GitserverClient
|
||||
fetchSem chan int
|
||||
operations *operations
|
||||
maxTotalPathsLength int
|
||||
}
|
||||
@ -37,10 +34,9 @@ type parseRequestOrError struct {
|
||||
Err error
|
||||
}
|
||||
|
||||
func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maximumConcurrentFetches int, maxTotalPathsLength int, observationContext *observation.Context) RepositoryFetcher {
|
||||
func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maxTotalPathsLength int, observationContext *observation.Context) RepositoryFetcher {
|
||||
return &repositoryFetcher{
|
||||
gitserverClient: gitserverClient,
|
||||
fetchSem: make(chan int, maximumConcurrentFetches),
|
||||
operations: newOperations(observationContext),
|
||||
maxTotalPathsLength: maxTotalPathsLength,
|
||||
}
|
||||
@ -71,13 +67,6 @@ func (f *repositoryFetcher) fetchRepositoryArchive(ctx context.Context, args typ
|
||||
}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
onDefer, err := f.limitConcurrentFetches(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer onDefer()
|
||||
trace.Log(log.Event("acquired fetch semaphore"))
|
||||
|
||||
f.operations.fetching.Inc()
|
||||
defer f.operations.fetching.Dec()
|
||||
|
||||
@ -136,19 +125,6 @@ func batchByTotalLength(paths []string, maxTotalLength int) [][]string {
|
||||
return batches
|
||||
}
|
||||
|
||||
func (f *repositoryFetcher) limitConcurrentFetches(ctx context.Context) (func(), error) {
|
||||
f.operations.fetchQueueSize.Inc()
|
||||
defer f.operations.fetchQueueSize.Dec()
|
||||
|
||||
select {
|
||||
case f.fetchSem <- 1:
|
||||
return func() { <-f.fetchSem }, nil
|
||||
|
||||
case <-ctx.Done():
|
||||
return func() {}, ctx.Err()
|
||||
}
|
||||
}
|
||||
|
||||
func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request ParseRequest), traceLog observation.TraceLogger) error {
|
||||
for {
|
||||
if ctx.Err() != nil {
|
||||
@ -156,89 +132,22 @@ func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request P
|
||||
}
|
||||
|
||||
tarHeader, err := tarReader.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err == io.EOF {
|
||||
return nil
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
readTarHeader(tarReader, tarHeader, callback, traceLog)
|
||||
}
|
||||
}
|
||||
if tarHeader.FileInfo().IsDir() || tarHeader.Typeflag == tar.TypeXGlobalHeader {
|
||||
continue
|
||||
}
|
||||
|
||||
func readTarHeader(tarReader *tar.Reader, tarHeader *tar.Header, callback func(request ParseRequest), trace observation.TraceLogger) error {
|
||||
if !shouldParse(tarHeader) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// 32MB is the same size used by io.Copy
|
||||
buffer := make([]byte, 32*1024)
|
||||
|
||||
trace.Log(log.Event("reading tar header prefix"))
|
||||
|
||||
// Read first chunk of tar header contents
|
||||
n, err := tarReader.Read(buffer)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
trace.Log(log.Int("n", n))
|
||||
|
||||
if n == 0 {
|
||||
// Empty file, nothing to parse
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check to see if first 256 bytes contain a 0x00. If so, we'll assume that
|
||||
// the file is binary and skip parsing. Otherwise, we'll have some non-zero
|
||||
// contents that passed our filters above to parse.
|
||||
|
||||
m := 256
|
||||
if n < m {
|
||||
m = n
|
||||
}
|
||||
if bytes.IndexByte(buffer[:m], 0x00) >= 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Copy buffer into appropriately-sized slice for return
|
||||
data := make([]byte, int(tarHeader.Size))
|
||||
copy(data, buffer[:n])
|
||||
|
||||
if n < int(tarHeader.Size) {
|
||||
trace.Log(log.Event("reading remaining tar header content"))
|
||||
|
||||
// Read the remaining contents
|
||||
if _, err := io.ReadFull(tarReader, data[n:]); err != nil {
|
||||
data := make([]byte, int(tarHeader.Size))
|
||||
traceLog.Log(log.Event("reading tar file contents"))
|
||||
if _, err := io.ReadFull(tarReader, data); err != nil {
|
||||
return err
|
||||
}
|
||||
trace.Log(log.Int("n", int(tarHeader.Size)-n))
|
||||
traceLog.Log(log.Int("n", int(tarHeader.Size)))
|
||||
callback(ParseRequest{Path: tarHeader.Name, Data: data})
|
||||
}
|
||||
|
||||
request := ParseRequest{Path: tarHeader.Name, Data: data}
|
||||
callback(request)
|
||||
return nil
|
||||
}
|
||||
|
||||
// maxFileSize (512KB) is the maximum size of files we attempt to parse.
|
||||
const maxFileSize = 1 << 19
|
||||
|
||||
func shouldParse(tarHeader *tar.Header) bool {
|
||||
// We do not search large files
|
||||
if tarHeader.Size > maxFileSize {
|
||||
return false
|
||||
}
|
||||
|
||||
// We only care about files
|
||||
if tarHeader.Typeflag != tar.TypeReg && tarHeader.Typeflag != tar.TypeRegA {
|
||||
return false
|
||||
}
|
||||
|
||||
// JSON files are symbol-less
|
||||
if path.Ext(tarHeader.Name) == ".json" {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
@ -7,8 +7,8 @@ import (
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
)
|
||||
@ -29,16 +29,10 @@ func TestRepositoryFetcher(t *testing.T) {
|
||||
tarContents[name] = content
|
||||
}
|
||||
|
||||
// JSON is ignored
|
||||
tarContents["ignored.json"] = "{}"
|
||||
|
||||
// Large files are ignored
|
||||
tarContents["payloads.txt"] = strings.Repeat("oversized load", maxFileSize)
|
||||
|
||||
gitserverClient := NewMockGitserverClient()
|
||||
gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(tarContents))
|
||||
|
||||
repositoryFetcher := NewRepositoryFetcher(gitserverClient, 15, 1000, &observation.TestContext)
|
||||
repositoryFetcher := NewRepositoryFetcher(gitserverClient, 1000, &observation.TestContext)
|
||||
args := types.SearchArgs{Repo: api.RepoName("foo"), CommitID: api.CommitID("deadbeef")}
|
||||
|
||||
t.Run("all paths", func(t *testing.T) {
|
||||
@ -4,7 +4,6 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/opentracing/opentracing-go/log"
|
||||
|
||||
@ -47,7 +46,6 @@ func (c *gitserverClient) FetchTar(ctx context.Context, repo api.RepoName, commi
|
||||
log.String("repo", string(repo)),
|
||||
log.String("commit", string(commit)),
|
||||
log.Int("paths", len(paths)),
|
||||
log.String("paths", strings.Join(paths, ":")),
|
||||
}})
|
||||
defer endObservation(1, observation.Args{})
|
||||
|
||||
@ -16,15 +16,36 @@ export GOOS=linux
|
||||
# go-sqlite3 depends on cgo. Without cgo, it will build but it'll throw an error at query time.
|
||||
export CGO_ENABLED=1
|
||||
|
||||
# Ensure musl-gcc is available since we're building to run on Alpine, which uses musl.
|
||||
if ! command -v musl-gcc >/dev/null; then
|
||||
echo "musl-gcc not found, which is needed for cgo for go-sqlite3. Run 'apt-get install -y musl-tools'."
|
||||
# Default CC to musl-gcc.
|
||||
export CC="${CC:-musl-gcc}"
|
||||
|
||||
help() {
|
||||
echo "You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine."
|
||||
echo
|
||||
echo " Linux: run 'apt-get install -y musl-tools'"
|
||||
echo " macOS: download https://github.com/FiloSottile/homebrew-musl-cross/blob/6ee3329ee41231fe693306490f8e4d127c70e618/musl-cross.rb and run 'brew install ~/Downloads/musl-cross.rb'"
|
||||
}
|
||||
|
||||
if ! command -v "$CC" >/dev/null; then
|
||||
echo "$CC not found."
|
||||
help
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Make sure this is a musl compiler.
|
||||
case "$CC" in
|
||||
*musl*)
|
||||
;;
|
||||
*)
|
||||
echo "$CC doesn't look like a musl compiler."
|
||||
help
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "--- go build"
|
||||
pkg="github.com/sourcegraph/sourcegraph/cmd/symbols"
|
||||
env CC=musl-gcc go build \
|
||||
env go build \
|
||||
-trimpath \
|
||||
-ldflags "-X github.com/sourcegraph/sourcegraph/internal/version.version=$VERSION -X github.com/sourcegraph/sourcegraph/internal/version.timestamp=$(date +%s)" \
|
||||
-buildmode exe \
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
package api
|
||||
|
||||
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go
|
||||
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver -i GitServerClient -o mock_iface_test.go
|
||||
|
||||
@ -9,86 +9,71 @@ import (
|
||||
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
type apiHandler struct {
|
||||
cachedDatabaseWriter writer.CachedDatabaseWriter
|
||||
ctagsBinary string
|
||||
operations *operations
|
||||
}
|
||||
|
||||
func NewHandler(
|
||||
cachedDatabaseWriter writer.CachedDatabaseWriter,
|
||||
searchFunc types.SearchFunc,
|
||||
handleStatus func(http.ResponseWriter, *http.Request),
|
||||
ctagsBinary string,
|
||||
observationContext *observation.Context,
|
||||
) http.Handler {
|
||||
h := newAPIHandler(cachedDatabaseWriter, ctagsBinary, observationContext)
|
||||
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/search", h.handleSearch)
|
||||
mux.HandleFunc("/healthz", h.handleHealthCheck)
|
||||
mux.HandleFunc("/list-languages", h.handleListLanguages)
|
||||
return mux
|
||||
}
|
||||
|
||||
func newAPIHandler(
|
||||
cachedDatabaseWriter writer.CachedDatabaseWriter,
|
||||
ctagsBinary string,
|
||||
observationContext *observation.Context,
|
||||
) *apiHandler {
|
||||
return &apiHandler{
|
||||
cachedDatabaseWriter: cachedDatabaseWriter,
|
||||
ctagsBinary: ctagsBinary,
|
||||
operations: newOperations(observationContext),
|
||||
mux.HandleFunc("/search", handleSearchWith(searchFunc))
|
||||
mux.HandleFunc("/healthz", handleHealthCheck)
|
||||
mux.HandleFunc("/list-languages", handleListLanguages(ctagsBinary))
|
||||
if handleStatus != nil {
|
||||
mux.HandleFunc("/status", handleStatus)
|
||||
}
|
||||
return mux
|
||||
}
|
||||
|
||||
const maxNumSymbolResults = 500
|
||||
|
||||
func (h *apiHandler) handleSearch(w http.ResponseWriter, r *http.Request) {
|
||||
var args types.SearchArgs
|
||||
if err := json.NewDecoder(r.Body).Decode(&args); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if args.First < 0 || args.First > maxNumSymbolResults {
|
||||
args.First = maxNumSymbolResults
|
||||
}
|
||||
|
||||
result, err := h.handleSearchInternal(r.Context(), args)
|
||||
if err != nil {
|
||||
// Ignore reporting errors where client disconnected
|
||||
if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) {
|
||||
func handleSearchWith(searchFunc types.SearchFunc) func(w http.ResponseWriter, r *http.Request) {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var args types.SearchArgs
|
||||
if err := json.NewDecoder(r.Body).Decode(&args); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
log15.Error("Symbol search failed", "args", args, "error", err)
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if args.First < 0 || args.First > maxNumSymbolResults {
|
||||
args.First = maxNumSymbolResults
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(w).Encode(result); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
result, err := searchFunc(r.Context(), args)
|
||||
if err != nil {
|
||||
// Ignore reporting errors where client disconnected
|
||||
if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
|
||||
log15.Error("Symbol search failed", "args", args, "error", err)
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(w).Encode(result); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *apiHandler) handleListLanguages(w http.ResponseWriter, r *http.Request) {
|
||||
mapping, err := ctags.ListLanguageMappings(r.Context(), h.ctagsBinary)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(w).Encode(mapping); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
func handleListLanguages(ctagsBinary string) func(w http.ResponseWriter, r *http.Request) {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
mapping, err := ctags.ListLanguageMappings(r.Context(), ctagsBinary)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
if err := json.NewEncoder(w).Encode(mapping); err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *apiHandler) handleHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
func handleHealthCheck(w http.ResponseWriter, r *http.Request) {
|
||||
w.WriteHeader(http.StatusOK)
|
||||
|
||||
if _, err := w.Write([]byte("OK")); err != nil {
|
||||
|
||||
@ -9,12 +9,14 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
"golang.org/x/sync/semaphore"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
|
||||
sharedobservability "github.com/sourcegraph/sourcegraph/cmd/symbols/observability"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
|
||||
"github.com/sourcegraph/sourcegraph/internal/diskcache"
|
||||
"github.com/sourcegraph/sourcegraph/internal/httpcli"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
@ -50,10 +52,10 @@ func TestHandler(t *testing.T) {
|
||||
gitserverClient := NewMockGitserverClient()
|
||||
gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(files))
|
||||
|
||||
parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 15, 1000, &observation.TestContext), 0, 10, &observation.TestContext)
|
||||
databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser)
|
||||
parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 1000, &observation.TestContext), 0, 10, &observation.TestContext)
|
||||
databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser, semaphore.NewWeighted(1))
|
||||
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
|
||||
handler := NewHandler(cachedDatabaseWriter, "", &observation.TestContext)
|
||||
handler := NewHandler(MakeSqliteSearchFunc(sharedobservability.NewOperations(&observation.TestContext), cachedDatabaseWriter), nil, "")
|
||||
|
||||
server := httptest.NewServer(handler)
|
||||
defer server.Close()
|
||||
|
||||
6
cmd/symbols/internal/api/mock_iface_test.go
generated
6
cmd/symbols/internal/api/mock_iface_test.go
generated
@ -7,14 +7,14 @@ import (
|
||||
"io"
|
||||
"sync"
|
||||
|
||||
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
|
||||
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
api "github.com/sourcegraph/sourcegraph/internal/api"
|
||||
)
|
||||
|
||||
// MockGitserverClient is a mock implementation of the GitserverClient
|
||||
// interface (from the package
|
||||
// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used
|
||||
// for unit testing.
|
||||
// github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver) used for unit
|
||||
// testing.
|
||||
type MockGitserverClient struct {
|
||||
// FetchTarFunc is an instance of a mock function object controlling the
|
||||
// behavior of the method FetchTar.
|
||||
|
||||
@ -1,59 +0,0 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opentracing/opentracing-go/log"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
const searchTimeout = 60 * time.Second
|
||||
|
||||
func (h *apiHandler) handleSearchInternal(ctx context.Context, args types.SearchArgs) (_ *result.Symbols, err error) {
|
||||
ctx, trace, endObservation := h.operations.search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
|
||||
log.String("repo", string(args.Repo)),
|
||||
log.String("commitID", string(args.CommitID)),
|
||||
log.String("query", args.Query),
|
||||
log.Bool("isRegExp", args.IsRegExp),
|
||||
log.Bool("isCaseSensitive", args.IsCaseSensitive),
|
||||
log.Int("numIncludePatterns", len(args.IncludePatterns)),
|
||||
log.String("includePatterns", strings.Join(args.IncludePatterns, ":")),
|
||||
log.String("excludePattern", args.ExcludePattern),
|
||||
log.Int("first", args.First),
|
||||
}})
|
||||
defer func() {
|
||||
endObservation(1, observation.Args{
|
||||
MetricLabelValues: []string{observability.GetParseAmount(ctx)},
|
||||
LogFields: []log.Field{log.String("parseAmount", observability.GetParseAmount(ctx))},
|
||||
})
|
||||
}()
|
||||
ctx = observability.SeedParseAmount(ctx)
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, searchTimeout)
|
||||
defer cancel()
|
||||
|
||||
dbFile, err := h.cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile")
|
||||
}
|
||||
trace.Log(log.String("dbFile", dbFile))
|
||||
|
||||
var results result.Symbols
|
||||
err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) {
|
||||
if results, err = db.Search(ctx, args); err != nil {
|
||||
return errors.Wrap(err, "store.Search")
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return &results, err
|
||||
}
|
||||
64
cmd/symbols/internal/api/search_sqlite.go
Normal file
64
cmd/symbols/internal/api/search_sqlite.go
Normal file
@ -0,0 +1,64 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opentracing/opentracing-go/log"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
|
||||
sharedobservability "github.com/sourcegraph/sourcegraph/cmd/symbols/observability"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
)
|
||||
|
||||
const searchTimeout = 60 * time.Second
|
||||
|
||||
func MakeSqliteSearchFunc(operations *sharedobservability.Operations, cachedDatabaseWriter writer.CachedDatabaseWriter) types.SearchFunc {
|
||||
return func(ctx context.Context, args types.SearchArgs) (results []result.Symbol, err error) {
|
||||
ctx, trace, endObservation := operations.Search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
|
||||
log.String("repo", string(args.Repo)),
|
||||
log.String("commitID", string(args.CommitID)),
|
||||
log.String("query", args.Query),
|
||||
log.Bool("isRegExp", args.IsRegExp),
|
||||
log.Bool("isCaseSensitive", args.IsCaseSensitive),
|
||||
log.Int("numIncludePatterns", len(args.IncludePatterns)),
|
||||
log.String("includePatterns", strings.Join(args.IncludePatterns, ":")),
|
||||
log.String("excludePattern", args.ExcludePattern),
|
||||
log.Int("first", args.First),
|
||||
}})
|
||||
defer func() {
|
||||
endObservation(1, observation.Args{
|
||||
MetricLabelValues: []string{observability.GetParseAmount(ctx)},
|
||||
LogFields: []log.Field{log.String("parseAmount", observability.GetParseAmount(ctx))},
|
||||
})
|
||||
}()
|
||||
ctx = observability.SeedParseAmount(ctx)
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, searchTimeout)
|
||||
defer cancel()
|
||||
|
||||
dbFile, err := cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile")
|
||||
}
|
||||
trace.Log(log.String("dbFile", dbFile))
|
||||
|
||||
var res result.Symbols
|
||||
err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) {
|
||||
if res, err = db.Search(ctx, args); err != nil {
|
||||
return errors.Wrap(err, "store.Search")
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return res, err
|
||||
}
|
||||
}
|
||||
@ -9,7 +9,7 @@ import (
|
||||
"github.com/grafana/regexp/syntax"
|
||||
"github.com/keegancsmith/sqlf"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
|
||||
@ -7,8 +7,8 @@ import (
|
||||
"github.com/inconshreveable/log15"
|
||||
"github.com/jmoiron/sqlx"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
)
|
||||
|
||||
@ -7,7 +7,7 @@ import (
|
||||
"github.com/keegancsmith/sqlf"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/batch"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
)
|
||||
|
||||
@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/diskcache"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
@ -4,11 +4,13 @@ import (
|
||||
"context"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sync/semaphore"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api/observability"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/diskcache"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
@ -22,21 +24,27 @@ type databaseWriter struct {
|
||||
path string
|
||||
gitserverClient gitserver.GitserverClient
|
||||
parser parser.Parser
|
||||
sem *semaphore.Weighted
|
||||
}
|
||||
|
||||
func NewDatabaseWriter(
|
||||
path string,
|
||||
gitserverClient gitserver.GitserverClient,
|
||||
parser parser.Parser,
|
||||
sem *semaphore.Weighted,
|
||||
) DatabaseWriter {
|
||||
return &databaseWriter{
|
||||
path: path,
|
||||
gitserverClient: gitserverClient,
|
||||
parser: parser,
|
||||
sem: sem,
|
||||
}
|
||||
}
|
||||
|
||||
func (w *databaseWriter) WriteDBFile(ctx context.Context, args types.SearchArgs, dbFile string) error {
|
||||
w.sem.Acquire(ctx, 1)
|
||||
defer w.sem.Release(1)
|
||||
|
||||
if newestDBFile, oldCommit, ok, err := w.getNewestCommit(ctx, args); err != nil {
|
||||
return err
|
||||
} else if ok {
|
||||
|
||||
@ -1,3 +0,0 @@
|
||||
package fetcher
|
||||
|
||||
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go
|
||||
@ -1,23 +0,0 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
)
|
||||
|
||||
func NewCtagsParserFactory(ctagsCommand string, patternLengthLimit int, logErrors, debugLogs bool) ParserFactory {
|
||||
options := ctags.Options{
|
||||
Bin: ctagsCommand,
|
||||
PatternLengthLimit: patternLengthLimit,
|
||||
}
|
||||
if logErrors {
|
||||
options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
|
||||
}
|
||||
if debugLogs {
|
||||
options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
|
||||
}
|
||||
|
||||
return func() (ctags.Parser, error) { return ctags.New(options) }
|
||||
}
|
||||
@ -1,40 +0,0 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
)
|
||||
|
||||
// SearchArgs are the arguments to perform a search on the symbols service.
|
||||
type SearchArgs struct {
|
||||
// Repo is the name of the repository to search in.
|
||||
Repo api.RepoName `json:"repo"`
|
||||
|
||||
// CommitID is the commit to search in.
|
||||
CommitID api.CommitID `json:"commitID"`
|
||||
|
||||
// Query is the search query.
|
||||
Query string
|
||||
|
||||
// IsRegExp if true will treat the Pattern as a regular expression.
|
||||
IsRegExp bool
|
||||
|
||||
// IsCaseSensitive if false will ignore the case of query and file pattern
|
||||
// when finding matches.
|
||||
IsCaseSensitive bool
|
||||
|
||||
// IncludePatterns is a list of regexes that symbol's file paths
|
||||
// need to match to get included in the result
|
||||
//
|
||||
// The patterns are ANDed together; a file's path must match all patterns
|
||||
// for it to be kept. That is also why it is a list (unlike the singular
|
||||
// ExcludePattern); it is not possible in general to construct a single
|
||||
// glob or Go regexp that represents multiple such patterns ANDed together.
|
||||
IncludePatterns []string
|
||||
|
||||
// ExcludePattern is an optional regex that symbol's file paths
|
||||
// need to match to get included in the result
|
||||
ExcludePattern string
|
||||
|
||||
// First indicates that only the first n symbols should be returned.
|
||||
First int
|
||||
}
|
||||
@ -3,128 +3,9 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/inconshreveable/log15"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api"
|
||||
sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
|
||||
"github.com/sourcegraph/sourcegraph/internal/actor"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/debugserver"
|
||||
"github.com/sourcegraph/sourcegraph/internal/diskcache"
|
||||
"github.com/sourcegraph/sourcegraph/internal/env"
|
||||
"github.com/sourcegraph/sourcegraph/internal/goroutine"
|
||||
"github.com/sourcegraph/sourcegraph/internal/honey"
|
||||
"github.com/sourcegraph/sourcegraph/internal/httpserver"
|
||||
"github.com/sourcegraph/sourcegraph/internal/logging"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/internal/profiler"
|
||||
"github.com/sourcegraph/sourcegraph/internal/sentry"
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace"
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
|
||||
"github.com/sourcegraph/sourcegraph/internal/tracer"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/shared"
|
||||
)
|
||||
|
||||
const addr = ":3184"
|
||||
|
||||
func main() {
|
||||
config.Load()
|
||||
|
||||
// Set up Google Cloud Profiler when running in Cloud
|
||||
if err := profiler.Init(); err != nil {
|
||||
log.Fatalf("Failed to start profiler: %v", err)
|
||||
}
|
||||
|
||||
env.Lock()
|
||||
env.HandleHelpFlag()
|
||||
conf.Init()
|
||||
logging.Init()
|
||||
tracer.Init(conf.DefaultClient())
|
||||
sentry.Init(conf.DefaultClient())
|
||||
trace.Init()
|
||||
|
||||
if err := config.Validate(); err != nil {
|
||||
log.Fatalf("Failed to load configuration: %s", err)
|
||||
}
|
||||
|
||||
// Ensure we register our database driver before calling
|
||||
// anything that tries to open a SQLite database.
|
||||
sqlite.Init()
|
||||
|
||||
if config.sanityCheck {
|
||||
fmt.Print("Running sanity check...")
|
||||
if err := sqlite.SanityCheck(); err != nil {
|
||||
fmt.Println("failed ❌", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("passed ✅")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Initialize tracing/metrics
|
||||
observationContext := &observation.Context{
|
||||
Logger: log15.Root(),
|
||||
Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()},
|
||||
Registerer: prometheus.DefaultRegisterer,
|
||||
HoneyDataset: &honey.Dataset{
|
||||
Name: "codeintel-symbols",
|
||||
SampleRate: 5,
|
||||
},
|
||||
}
|
||||
|
||||
// Start debug server
|
||||
ready := make(chan struct{})
|
||||
go debugserver.NewServerRoutine(ready).Start()
|
||||
|
||||
ctagsParserFactory := parser.NewCtagsParserFactory(
|
||||
config.ctagsCommand,
|
||||
config.ctagsPatternLengthLimit,
|
||||
config.ctagsLogErrors,
|
||||
config.ctagsDebugLogs,
|
||||
)
|
||||
|
||||
cache := diskcache.NewStore(config.cacheDir, "symbols",
|
||||
diskcache.WithBackgroundTimeout(config.processingTimeout),
|
||||
diskcache.WithObservationContext(observationContext),
|
||||
)
|
||||
|
||||
parserPool, err := parser.NewParserPool(ctagsParserFactory, config.numCtagsProcesses)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create parser pool: %s", err)
|
||||
}
|
||||
|
||||
gitserverClient := gitserver.NewClient(observationContext)
|
||||
repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, 15, config.maxTotalPathsLength, observationContext)
|
||||
parser := parser.NewParser(parserPool, repositoryFetcher, config.requestBufferSize, config.numCtagsProcesses, observationContext)
|
||||
databaseWriter := writer.NewDatabaseWriter(config.cacheDir, gitserverClient, parser)
|
||||
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
|
||||
apiHandler := api.NewHandler(cachedDatabaseWriter, config.ctagsCommand, observationContext)
|
||||
|
||||
server := httpserver.NewFromAddr(addr, &http.Server{
|
||||
ReadTimeout: 75 * time.Second,
|
||||
WriteTimeout: 10 * time.Minute,
|
||||
Handler: actor.HTTPMiddleware(ot.HTTPMiddleware(trace.HTTPMiddleware(apiHandler, conf.DefaultClient()))),
|
||||
})
|
||||
|
||||
evictionInterval := time.Second * 10
|
||||
cacheSizeBytes := int64(config.cacheSizeMB) * 1000 * 1000
|
||||
cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext))
|
||||
|
||||
// Mark health server as ready and go!
|
||||
close(ready)
|
||||
goroutine.MonitorBackgroundRoutines(context.Background(), server, cacheEvicter)
|
||||
shared.Main(shared.SetupSqlite)
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
package api
|
||||
package observability
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
@ -7,11 +7,11 @@ import (
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
)
|
||||
|
||||
type operations struct {
|
||||
search *observation.Operation
|
||||
type Operations struct {
|
||||
Search *observation.Operation
|
||||
}
|
||||
|
||||
func newOperations(observationContext *observation.Context) *operations {
|
||||
func NewOperations(observationContext *observation.Context) *Operations {
|
||||
metrics := metrics.NewREDMetrics(
|
||||
observationContext.Registerer,
|
||||
"codeintel_symbols_api",
|
||||
@ -28,7 +28,7 @@ func newOperations(observationContext *observation.Context) *operations {
|
||||
})
|
||||
}
|
||||
|
||||
return &operations{
|
||||
search: op("Search"),
|
||||
return &Operations{
|
||||
Search: op("Search"),
|
||||
}
|
||||
}
|
||||
51
cmd/symbols/parser/filtering_parser.go
Normal file
51
cmd/symbols/parser/filtering_parser.go
Normal file
@ -0,0 +1,51 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
)
|
||||
|
||||
type FilteringParser struct {
|
||||
parser ctags.Parser
|
||||
maxFileSize int
|
||||
maxSymbols int
|
||||
}
|
||||
|
||||
func NewFilteringParser(parser ctags.Parser, maxFileSize int, maxSymbols int) ctags.Parser {
|
||||
return &FilteringParser{
|
||||
parser: parser,
|
||||
maxFileSize: maxFileSize,
|
||||
maxSymbols: maxSymbols,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *FilteringParser) Parse(path string, content []byte) ([]*ctags.Entry, error) {
|
||||
if len(content) > p.maxFileSize {
|
||||
// File is over 512KiB, don't parse it
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Check to see if first 256 bytes contain a 0x00. If so, we'll assume that
|
||||
// the file is binary and skip parsing. Otherwise, we'll have some non-zero
|
||||
// contents that passed our filters above to parse.
|
||||
if bytes.IndexByte(content[:min(len(content), 256)], 0x00) >= 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
entries, err := p.parser.Parse(path, content)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(entries) > p.maxSymbols {
|
||||
// File has too many symbols, don't return any of them
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
return entries, nil
|
||||
}
|
||||
|
||||
func (p *FilteringParser) Close() {
|
||||
p.parser.Close()
|
||||
}
|
||||
@ -10,8 +10,8 @@ import (
|
||||
"github.com/opentracing/opentracing-go/log"
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
@ -123,6 +123,13 @@ func (p *parser) Parse(ctx context.Context, args types.SearchArgs, paths []strin
|
||||
return symbolOrErrors, nil
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func (p *parser) handleParseRequest(ctx context.Context, symbolOrErrors chan<- SymbolOrError, parseRequest fetcher.ParseRequest, totalSymbols *uint32) (err error) {
|
||||
ctx, trace, endObservation := p.operations.handleParseRequest.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
|
||||
log.String("path", parseRequest.Path),
|
||||
31
cmd/symbols/parser/parser_factory_ctags.go
Normal file
31
cmd/symbols/parser/parser_factory_ctags.go
Normal file
@ -0,0 +1,31 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"log"
|
||||
"os"
|
||||
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
)
|
||||
|
||||
func NewCtagsParserFactory(config types.CtagsConfig) ParserFactory {
|
||||
options := ctags.Options{
|
||||
Bin: config.Command,
|
||||
PatternLengthLimit: config.PatternLengthLimit,
|
||||
}
|
||||
if config.LogErrors {
|
||||
options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
|
||||
}
|
||||
if config.DebugLogs {
|
||||
options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
|
||||
}
|
||||
|
||||
return func() (ctags.Parser, error) {
|
||||
parser, err := ctags.New(options)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewFilteringParser(parser, config.MaxFileSize, config.MaxSymbols), nil
|
||||
}
|
||||
}
|
||||
@ -8,6 +8,8 @@ import (
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
)
|
||||
|
||||
func TestCtagsParser(t *testing.T) {
|
||||
@ -16,7 +18,7 @@ func TestCtagsParser(t *testing.T) {
|
||||
t.Skip("command not in PATH: universal-ctags")
|
||||
}
|
||||
|
||||
p, err := NewCtagsParserFactory("universal-ctags", 250, false, false)()
|
||||
p, err := NewCtagsParserFactory(types.CtagsConfig{Command: "universal-ctags", PatternLengthLimit: 250})()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
87
cmd/symbols/shared/main.go
Normal file
87
cmd/symbols/shared/main.go
Normal file
@ -0,0 +1,87 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/inconshreveable/log15"
|
||||
"github.com/opentracing/opentracing-go"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/actor"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/debugserver"
|
||||
"github.com/sourcegraph/sourcegraph/internal/env"
|
||||
"github.com/sourcegraph/sourcegraph/internal/goroutine"
|
||||
"github.com/sourcegraph/sourcegraph/internal/honey"
|
||||
"github.com/sourcegraph/sourcegraph/internal/httpserver"
|
||||
"github.com/sourcegraph/sourcegraph/internal/logging"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/internal/profiler"
|
||||
"github.com/sourcegraph/sourcegraph/internal/sentry"
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace"
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
|
||||
"github.com/sourcegraph/sourcegraph/internal/tracer"
|
||||
)
|
||||
|
||||
const addr = ":3184"
|
||||
|
||||
type SetupFunc func(observationContext *observation.Context, gitserverClient gitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error)
|
||||
|
||||
func Main(setup SetupFunc) {
|
||||
routines := []goroutine.BackgroundRoutine{}
|
||||
|
||||
// Set up Google Cloud Profiler when running in Cloud
|
||||
if err := profiler.Init(); err != nil {
|
||||
log.Fatalf("Failed to start profiler: %v", err)
|
||||
}
|
||||
|
||||
// Initialize tracing/metrics
|
||||
observationContext := &observation.Context{
|
||||
Logger: log15.Root(),
|
||||
Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()},
|
||||
Registerer: prometheus.DefaultRegisterer,
|
||||
HoneyDataset: &honey.Dataset{
|
||||
Name: "codeintel-symbols",
|
||||
SampleRate: 5,
|
||||
},
|
||||
}
|
||||
// Run setup
|
||||
gitserverClient := gitserver.NewClient(observationContext)
|
||||
repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, types.LoadRepositoryFetcherConfig(env.BaseConfig{}).MaxTotalPathsLength, observationContext)
|
||||
searchFunc, handleStatus, newRoutines, ctagsBinary, err := setup(observationContext, gitserverClient, repositoryFetcher)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to setup: %v", err)
|
||||
}
|
||||
routines = append(routines, newRoutines...)
|
||||
|
||||
// Initialization
|
||||
env.HandleHelpFlag()
|
||||
conf.Init()
|
||||
logging.Init()
|
||||
tracer.Init(conf.DefaultClient())
|
||||
sentry.Init(conf.DefaultClient())
|
||||
trace.Init()
|
||||
|
||||
// Start debug server
|
||||
ready := make(chan struct{})
|
||||
go debugserver.NewServerRoutine(ready).Start()
|
||||
|
||||
// Create HTTP server
|
||||
server := httpserver.NewFromAddr(addr, &http.Server{
|
||||
ReadTimeout: 75 * time.Second,
|
||||
WriteTimeout: 10 * time.Minute,
|
||||
Handler: actor.HTTPMiddleware(ot.HTTPMiddleware(trace.HTTPMiddleware(api.NewHandler(searchFunc, handleStatus, ctagsBinary), conf.DefaultClient()))),
|
||||
})
|
||||
routines = append(routines, server)
|
||||
|
||||
// Mark health server as ready and go!
|
||||
close(ready)
|
||||
goroutine.MonitorBackgroundRoutines(context.Background(), routines...)
|
||||
}
|
||||
71
cmd/symbols/shared/sqlite.go
Normal file
71
cmd/symbols/shared/sqlite.go
Normal file
@ -0,0 +1,71 @@
|
||||
package shared
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"golang.org/x/sync/semaphore"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api"
|
||||
sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/observability"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/diskcache"
|
||||
"github.com/sourcegraph/sourcegraph/internal/env"
|
||||
"github.com/sourcegraph/sourcegraph/internal/goroutine"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
)
|
||||
|
||||
func SetupSqlite(observationContext *observation.Context, gitserverClient gitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) {
|
||||
baseConfig := env.BaseConfig{}
|
||||
config := types.LoadSqliteConfig(baseConfig)
|
||||
if err := baseConfig.Validate(); err != nil {
|
||||
log.Fatalf("Failed to load configuration: %s", err)
|
||||
}
|
||||
|
||||
// Ensure we register our database driver before calling
|
||||
// anything that tries to open a SQLite database.
|
||||
sqlite.Init()
|
||||
|
||||
if config.SanityCheck {
|
||||
fmt.Print("Running sanity check...")
|
||||
if err := sqlite.SanityCheck(); err != nil {
|
||||
fmt.Println("failed ❌", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
fmt.Println("passed ✅")
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
ctagsParserFactory := parser.NewCtagsParserFactory(config.Ctags)
|
||||
|
||||
parserPool, err := parser.NewParserPool(ctagsParserFactory, config.NumCtagsProcesses)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create parser pool: %s", err)
|
||||
}
|
||||
|
||||
cache := diskcache.NewStore(config.CacheDir, "symbols",
|
||||
diskcache.WithBackgroundTimeout(config.ProcessingTimeout),
|
||||
diskcache.WithObservationContext(observationContext),
|
||||
)
|
||||
|
||||
parser := parser.NewParser(parserPool, repositoryFetcher, config.RequestBufferSize, config.NumCtagsProcesses, observationContext)
|
||||
databaseWriter := writer.NewDatabaseWriter(config.CacheDir, gitserverClient, parser, semaphore.NewWeighted(int64(config.MaxConcurrentlyIndexing)))
|
||||
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
|
||||
searchFunc := api.MakeSqliteSearchFunc(observability.NewOperations(observationContext), cachedDatabaseWriter)
|
||||
|
||||
evictionInterval := time.Second * 10
|
||||
cacheSizeBytes := int64(config.CacheSizeMB) * 1000 * 1000
|
||||
cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext))
|
||||
|
||||
return searchFunc, nil, []goroutine.BackgroundRoutine{cacheEvicter}, config.Ctags.Command, nil
|
||||
}
|
||||
122
cmd/symbols/types/types.go
Normal file
122
cmd/symbols/types/types.go
Normal file
@ -0,0 +1,122 @@
|
||||
package types
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"runtime"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/env"
|
||||
)
|
||||
|
||||
type SqliteConfig struct {
|
||||
SanityCheck bool
|
||||
CacheDir string
|
||||
CacheSizeMB int
|
||||
NumCtagsProcesses int
|
||||
RequestBufferSize int
|
||||
ProcessingTimeout time.Duration
|
||||
Ctags CtagsConfig
|
||||
RepositoryFetcher RepositoryFetcherConfig
|
||||
MaxConcurrentlyIndexing int
|
||||
}
|
||||
|
||||
func LoadSqliteConfig(baseConfig env.BaseConfig) SqliteConfig {
|
||||
return SqliteConfig{
|
||||
Ctags: LoadCtagsConfig(baseConfig),
|
||||
RepositoryFetcher: LoadRepositoryFetcherConfig(baseConfig),
|
||||
SanityCheck: baseConfig.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not"),
|
||||
CacheDir: baseConfig.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols"),
|
||||
CacheSizeMB: baseConfig.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)"),
|
||||
NumCtagsProcesses: baseConfig.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run"),
|
||||
RequestBufferSize: baseConfig.GetInt("REQUEST_BUFFER_SIZE", "8192", "maximum size of buffered parser request channel"),
|
||||
ProcessingTimeout: baseConfig.GetInterval("PROCESSING_TIMEOUT", "2h", "maximum time to spend processing a repository"),
|
||||
MaxConcurrentlyIndexing: baseConfig.GetInt("MAX_CONCURRENTLY_INDEXING", "10", "maximum number of repositories to index at a time"),
|
||||
}
|
||||
}
|
||||
|
||||
type CtagsConfig struct {
|
||||
Command string
|
||||
PatternLengthLimit int
|
||||
LogErrors bool
|
||||
DebugLogs bool
|
||||
MaxFileSize int
|
||||
MaxSymbols int
|
||||
}
|
||||
|
||||
func LoadCtagsConfig(baseConfig env.BaseConfig) CtagsConfig {
|
||||
logCtagsErrorsDefault := "false"
|
||||
if os.Getenv("DEPLOY_TYPE") == "dev" {
|
||||
logCtagsErrorsDefault = "true"
|
||||
}
|
||||
|
||||
return CtagsConfig{
|
||||
Command: baseConfig.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)"),
|
||||
PatternLengthLimit: baseConfig.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags"),
|
||||
LogErrors: baseConfig.GetBool("LOG_CTAGS_ERRORS", logCtagsErrorsDefault, "log ctags errors"),
|
||||
DebugLogs: false,
|
||||
MaxFileSize: baseConfig.GetInt("CTAGS_MAX_FILE_SIZE", "524288", "skip files larger than this size (in bytes)"),
|
||||
MaxSymbols: baseConfig.GetInt("CTAGS_MAX_SYMBOLS", "2000", "skip files with more than this many symbols"),
|
||||
}
|
||||
}
|
||||
|
||||
type RepositoryFetcherConfig struct {
|
||||
// The maximum sum of lengths of all paths in a single call to git archive. Without this limit, we
|
||||
// could hit the error "argument list too long" by exceeding the limit on the number of arguments to
|
||||
// a command enforced by the OS.
|
||||
//
|
||||
// Mac : getconf ARG_MAX returns 1,048,576
|
||||
// Linux: getconf ARG_MAX returns 2,097,152
|
||||
//
|
||||
// We want to remain well under that limit, so defaulting to 100,000 seems safe (see the
|
||||
// MAX_TOTAL_PATHS_LENGTH environment variable below).
|
||||
MaxTotalPathsLength int
|
||||
}
|
||||
|
||||
func LoadRepositoryFetcherConfig(baseConfig env.BaseConfig) RepositoryFetcherConfig {
|
||||
return RepositoryFetcherConfig{
|
||||
MaxTotalPathsLength: baseConfig.GetInt("MAX_TOTAL_PATHS_LENGTH", "100000", "maximum sum of lengths of all paths in a single call to git archive"),
|
||||
}
|
||||
}
|
||||
|
||||
type SearchFunc func(ctx context.Context, args SearchArgs) (results result.Symbols, err error)
|
||||
|
||||
// SearchArgs are the arguments to perform a search on the symbols service.
|
||||
type SearchArgs struct {
|
||||
// Repo is the name of the repository to search in.
|
||||
Repo api.RepoName `json:"repo"`
|
||||
|
||||
// CommitID is the commit to search in.
|
||||
CommitID api.CommitID `json:"commitID"`
|
||||
|
||||
// Query is the search query.
|
||||
Query string
|
||||
|
||||
// IsRegExp if true will treat the Pattern as a regular expression.
|
||||
IsRegExp bool
|
||||
|
||||
// IsCaseSensitive if false will ignore the case of query and file pattern
|
||||
// when finding matches.
|
||||
IsCaseSensitive bool
|
||||
|
||||
// IncludePatterns is a list of regexes that symbol's file paths
|
||||
// need to match to get included in the result
|
||||
//
|
||||
// The patterns are ANDed together; a file's path must match all patterns
|
||||
// for it to be kept. That is also why it is a list (unlike the singular
|
||||
// ExcludePattern); it is not possible in general to construct a single
|
||||
// glob or Go regexp that represents multiple such patterns ANDed together.
|
||||
IncludePatterns []string
|
||||
|
||||
// ExcludePattern is an optional regex that symbol's file paths
|
||||
// need to match to get included in the result
|
||||
ExcludePattern string
|
||||
|
||||
// First indicates that only the first n symbols should be returned.
|
||||
First int
|
||||
}
|
||||
@ -20,6 +20,7 @@ allowed_prefix=(
|
||||
github.com/sourcegraph/sourcegraph/enterprise/cmd/worker
|
||||
github.com/sourcegraph/sourcegraph/enterprise/cmd/repo-updater
|
||||
github.com/sourcegraph/sourcegraph/enterprise/cmd/precise-code-intel-
|
||||
github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols
|
||||
# Doesn't connect but uses db internals for use with sqlite
|
||||
github.com/sourcegraph/sourcegraph/cmd/symbols
|
||||
# Transitively depends on zoekt package which imports but does not use DB
|
||||
|
||||
@ -57,6 +57,7 @@ var (
|
||||
"lsif_data_apidocs_num_pages",
|
||||
"lsif_data_apidocs_num_search_results_private",
|
||||
"lsif_data_apidocs_num_search_results_public",
|
||||
"rockskip_ancestry",
|
||||
},
|
||||
FS: getFSForPath("codeintel"),
|
||||
}
|
||||
|
||||
@ -93,8 +93,8 @@ The run type for tags starting with `v`.
|
||||
Default pipeline:
|
||||
|
||||
- **Pipeline setup**: Trigger async
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
|
||||
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
|
||||
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
|
||||
@ -102,7 +102,7 @@ Default pipeline:
|
||||
- **CI script tests**: test-trace-command.sh
|
||||
- **Integration tests**: Backend integration tests, Code Intel QA
|
||||
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
|
||||
- Upload build trace
|
||||
|
||||
### Release branch
|
||||
@ -112,8 +112,8 @@ The run type for branches matching `^[0-9]+\.[0-9]+$` (regexp match).
|
||||
Default pipeline:
|
||||
|
||||
- **Pipeline setup**: Trigger async
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
|
||||
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
|
||||
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
|
||||
@ -121,7 +121,7 @@ Default pipeline:
|
||||
- **CI script tests**: test-trace-command.sh
|
||||
- **Integration tests**: Backend integration tests, Code Intel QA
|
||||
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
|
||||
- Upload build trace
|
||||
|
||||
### Browser extension release build
|
||||
@ -149,8 +149,8 @@ The run type for branches matching `main` (exact match).
|
||||
Default pipeline:
|
||||
|
||||
- **Pipeline setup**: Trigger async
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
|
||||
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
|
||||
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
|
||||
@ -158,7 +158,7 @@ Default pipeline:
|
||||
- **CI script tests**: test-trace-command.sh
|
||||
- **Integration tests**: Backend integration tests, Code Intel QA
|
||||
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server, Publish executor image, Publish docker registry mirror image
|
||||
- Upload build trace
|
||||
|
||||
### Main dry run
|
||||
@ -173,8 +173,8 @@ sg ci build main-dry-run
|
||||
Default pipeline:
|
||||
|
||||
- **Pipeline setup**: Trigger async
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Image builds**: Build alpine-3.12, Build alpine-3.14, Build cadvisor, Build codeinsights-db, Build codeintel-db, Build enterprise-symbols, Build frontend, Build github-proxy, Build gitserver, Build grafana, Build indexed-searcher, Build jaeger-agent, Build jaeger-all-in-one, Build minio, Build postgres-12.6-alpine, Build postgres_exporter, Build precise-code-intel-worker, Build prometheus, Build redis-cache, Build redis-store, Build redis_exporter, Build repo-updater, Build search-indexer, Build searcher, Build symbols, Build syntax-highlighter, Build worker, Build migrator, Build server, Build executor image, Build docker registry mirror image
|
||||
- **Image security scans**: Scan alpine-3.12, Scan alpine-3.14, Scan cadvisor, Scan codeinsights-db, Scan codeintel-db, Scan enterprise-symbols, Scan frontend, Scan github-proxy, Scan gitserver, Scan grafana, Scan indexed-searcher, Scan jaeger-agent, Scan jaeger-all-in-one, Scan minio, Scan postgres-12.6-alpine, Scan postgres_exporter, Scan precise-code-intel-worker, Scan prometheus, Scan redis-cache, Scan redis-store, Scan redis_exporter, Scan repo-updater, Scan search-indexer, Scan searcher, Scan symbols, Scan syntax-highlighter, Scan worker, Scan migrator, Scan server
|
||||
- **Linters and static analysis**: Prettier, Misc linters, GraphQL lint, SVG lint, Yarn deduplicate lint, Docker linters, Checkov Terraform scanning, Check and build docsite
|
||||
- **Client checks**: Puppeteer tests prep, Puppeteer tests chunk #1, Puppeteer tests chunk #2, Puppeteer tests chunk #3, Puppeteer tests chunk #4, Puppeteer tests chunk #5, Puppeteer tests chunk #6, Puppeteer tests chunk #7, Puppeteer tests chunk #8, Puppeteer tests chunk #9, Puppeteer tests finalize, Upload Storybook to Chromatic, Test shared client code, Test wildcard client code, Build, Enterprise build, Test, Puppeteer tests for chrome extension, Test browser extension, Test branded client code, Typescript eslint, Stylelint
|
||||
- **Go checks**: Test (all), Test (enterprise/internal/codeintel/stores/dbstore), Test (enterprise/internal/codeintel/stores/lsifstore), Test (enterprise/internal/insights), Test (internal/database), Test (internal/repos), Test (enterprise/internal/batches), Test (cmd/frontend), Test (enterprise/internal/database), Test (enterprise/cmd/frontend/internal/batches/resolvers), Build
|
||||
@ -182,7 +182,7 @@ Default pipeline:
|
||||
- **CI script tests**: test-trace-command.sh
|
||||
- **Integration tests**: Backend integration tests, Code Intel QA
|
||||
- **End-to-end tests**: Sourcegraph E2E, Sourcegraph QA, Sourcegraph Cluster (deploy-sourcegraph) QA, Sourcegraph Upgrade
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
|
||||
- **Publish images**: alpine-3.12, alpine-3.14, cadvisor, codeinsights-db, codeintel-db, enterprise-symbols, frontend, github-proxy, gitserver, grafana, indexed-searcher, jaeger-agent, jaeger-all-in-one, minio, postgres-12.6-alpine, postgres_exporter, precise-code-intel-worker, prometheus, redis-cache, redis-store, redis_exporter, repo-updater, search-indexer, searcher, symbols, syntax-highlighter, worker, migrator, server
|
||||
- Upload build trace
|
||||
|
||||
### Patch image
|
||||
@ -219,6 +219,7 @@ Default pipeline:
|
||||
- Build cadvisor
|
||||
- Build codeinsights-db
|
||||
- Build codeintel-db
|
||||
- Build enterprise-symbols
|
||||
- Build frontend
|
||||
- Build github-proxy
|
||||
- Build gitserver
|
||||
|
||||
@ -6,3 +6,4 @@ Guidance and documentation about writing database interactions within the Source
|
||||
- High-performance guides
|
||||
- [Batch operations](batch_operations.md)
|
||||
- [Materialized cache](materialized_cache.md)
|
||||
- [Locking behavior](locking_behavior.md)
|
||||
|
||||
31
doc/dev/background-information/sql/locking_behavior.md
Normal file
31
doc/dev/background-information/sql/locking_behavior.md
Normal file
@ -0,0 +1,31 @@
|
||||
# Locking behavior
|
||||
|
||||
When you're using [advisory locks](https://www.postgresql.org/docs/9.1/functions-admin.html#FUNCTIONS-ADVISORY-LOCKS) in Postgres, lock calls stack when executed on the same connection (A.K.A. session):
|
||||
|
||||
- Connection 1 calls `pg_advisory_lock(42)`, acquires the lock and continues
|
||||
- Connection 1 calls `pg_advisory_lock(42)`, this lock "stacks" with the previous call and continues
|
||||
- Connection 2 calls `pg_advisory_lock(42)`, this blocks
|
||||
- Connection 1 calls `pg_advisory_unlock(42)`, this pops one lock call off the stack and continues
|
||||
- Connection 1 calls `pg_advisory_unlock(42)`, this pops the last lock call off the stack and continues
|
||||
- Connection 2 finally acquires the lock and continues
|
||||
|
||||
If you get connections from a pool (e.g. the standard `sql` library in Go maintains an internal pool of connections), you need to be aware of the locking behavior otherwise you might get unpredictable behavior or deadlock. You can get deterministic behavior by explicitly taking a connection from the pool (e.g. with `db.Conn()`).
|
||||
|
||||
Here's an example of bad code that can deadlock if the connection happens to be different across lock calls: ❌
|
||||
|
||||
```go
|
||||
// Grab a write lock
|
||||
db.Exec("SELECT pg_advisory_lock(1)")
|
||||
// Grab a read lock
|
||||
db.Exec("SELECT pg_advisory_lock_shared(1)") // 💥 Can deadlock
|
||||
```
|
||||
|
||||
Good code explicitly takes a connection out of the pool first ✅
|
||||
|
||||
```go
|
||||
conn := db.Conn()
|
||||
// Grab a write lock
|
||||
conn.Exec("SELECT pg_advisory_lock(1)")
|
||||
// Grab a read lock
|
||||
conn.Exec("SELECT pg_advisory_lock_shared(1)") // OK, will not block
|
||||
```
|
||||
35
enterprise/cmd/symbols/Dockerfile
Normal file
35
enterprise/cmd/symbols/Dockerfile
Normal file
@ -0,0 +1,35 @@
|
||||
# NOTE: This layer of the docker image is also used in local development as a wrapper around universal-ctags
|
||||
FROM sourcegraph/alpine-3.12:120059_2021-12-09_b34c7b2@sha256:9a1fde12f56fea02027cf4caeebdddfedb7b73bf8db6c16f7907a6e04a29134c AS ctags
|
||||
# hadolint ignore=DL3002
|
||||
USER root
|
||||
|
||||
COPY ctags-install-alpine.sh /ctags-install-alpine.sh
|
||||
RUN /ctags-install-alpine.sh
|
||||
|
||||
FROM sourcegraph/alpine-3.12:120059_2021-12-09_b34c7b2@sha256:9a1fde12f56fea02027cf4caeebdddfedb7b73bf8db6c16f7907a6e04a29134c AS symbols
|
||||
|
||||
# TODO(security): This container should not run as root!
|
||||
#
|
||||
# See https://github.com/sourcegraph/sourcegraph/issues/13237
|
||||
# hadolint ignore=DL3002
|
||||
USER root
|
||||
|
||||
ARG COMMIT_SHA="unknown"
|
||||
ARG DATE="unknown"
|
||||
ARG VERSION="unknown"
|
||||
|
||||
LABEL org.opencontainers.image.revision=${COMMIT_SHA}
|
||||
LABEL org.opencontainers.image.created=${DATE}
|
||||
LABEL org.opencontainers.image.version=${VERSION}
|
||||
LABEL com.sourcegraph.github.url=https://github.com/sourcegraph/sourcegraph/commit/${COMMIT_SHA}
|
||||
|
||||
RUN apk add --no-cache bind-tools ca-certificates mailcap tini
|
||||
|
||||
COPY ctags-install-alpine.sh /ctags-install-alpine.sh
|
||||
RUN /ctags-install-alpine.sh
|
||||
|
||||
ENV CACHE_DIR=/mnt/cache/enterprise-symbols
|
||||
RUN mkdir -p ${CACHE_DIR}
|
||||
EXPOSE 3184
|
||||
ENTRYPOINT ["/sbin/tini", "--", "/usr/local/bin/enterprise-symbols"]
|
||||
COPY enterprise-symbols /usr/local/bin/
|
||||
24
enterprise/cmd/symbols/build.sh
Executable file
24
enterprise/cmd/symbols/build.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This script builds the symbols docker image.
|
||||
|
||||
cd "$(dirname "${BASH_SOURCE[0]}")/../../.."
|
||||
set -eu
|
||||
|
||||
OUTPUT=$(mktemp -d -t sgdockerbuild_XXXXXXX)
|
||||
cleanup() {
|
||||
rm -rf "$OUTPUT"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
cp -a ./cmd/symbols/ctags-install-alpine.sh "$OUTPUT"
|
||||
|
||||
# Build go binary into $OUTPUT
|
||||
./enterprise/cmd/symbols/go-build.sh "$OUTPUT"
|
||||
|
||||
echo "--- docker build"
|
||||
docker build -f enterprise/cmd/symbols/Dockerfile -t "$IMAGE" "$OUTPUT" \
|
||||
--progress=plain \
|
||||
--build-arg COMMIT_SHA \
|
||||
--build-arg DATE \
|
||||
--build-arg VERSION
|
||||
60
enterprise/cmd/symbols/go-build.sh
Executable file
60
enterprise/cmd/symbols/go-build.sh
Executable file
@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This script builds the symbols go binary.
|
||||
# Requires a single argument which is the path to the target bindir.
|
||||
|
||||
cd "$(dirname "${BASH_SOURCE[0]}")/../../.."
|
||||
set -eu
|
||||
|
||||
OUTPUT="${1:?no output path provided}"
|
||||
|
||||
# Environment for building linux binaries
|
||||
export GO111MODULE=on
|
||||
export GOARCH=amd64
|
||||
export GOOS=linux
|
||||
|
||||
# go-sqlite3 depends on cgo. Without cgo, it will build but it'll throw an error at query time.
|
||||
export CGO_ENABLED=1
|
||||
|
||||
# Default CC to musl-gcc.
|
||||
export CC="${CC:-musl-gcc}"
|
||||
|
||||
if ! command -v "$CC" >/dev/null; then
|
||||
echo "$CC not found. You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine. Run 'apt-get install -y musl-tools'."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Make sure this is a musl compiler.
|
||||
case "$CC" in
|
||||
*musl*)
|
||||
;;
|
||||
*)
|
||||
echo "$CC doesn't look like a musl compiler. You need to set CC to a musl compiler in order to compile go-sqlite3 for Alpine. Run 'apt-get install -y musl-tools'."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "--- go build"
|
||||
pkg="github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols"
|
||||
env go build \
|
||||
-trimpath \
|
||||
-ldflags "-X github.com/sourcegraph/sourcegraph/internal/version.version=$VERSION -X github.com/sourcegraph/sourcegraph/internal/version.timestamp=$(date +%s)" \
|
||||
-buildmode exe \
|
||||
-tags dist \
|
||||
-o "$OUTPUT/enterprise-$(basename $pkg)" \
|
||||
"$pkg"
|
||||
|
||||
# We can't use -v because the spawned container might not share
|
||||
# the same file system (e.g. when we're already inside docker
|
||||
# and the spawned docker container will be a sibling on the host).
|
||||
#
|
||||
# A workaround is to feed the file into the container via stdin:
|
||||
#
|
||||
# 'cat FILE | docker run ... -i ... sh -c "cat > FILE && ..."'
|
||||
echo "--- sanity check"
|
||||
# shellcheck disable=SC2002
|
||||
cat "$OUTPUT/enterprise-$(basename $pkg)" | docker run \
|
||||
--rm \
|
||||
-i \
|
||||
sourcegraph/alpine@sha256:ce099fbcd3cf70b338fc4cb2a4e1fa9ae847de21afdb0a849a393b87d94fb174 \
|
||||
sh -c "cat > /enterprise-symbols && chmod a+x /enterprise-symbols && env SANITY_CHECK=true /enterprise-symbols"
|
||||
241
enterprise/cmd/symbols/main.go
Normal file
241
enterprise/cmd/symbols/main.go
Normal file
@ -0,0 +1,241 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/sourcegraph/go-ctags"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/fetcher"
|
||||
symbolsGitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/gitserver"
|
||||
symbolsParser "github.com/sourcegraph/sourcegraph/cmd/symbols/parser"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/shared"
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/enterprise/internal/rockskip"
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf"
|
||||
"github.com/sourcegraph/sourcegraph/internal/conf/conftypes"
|
||||
connections "github.com/sourcegraph/sourcegraph/internal/database/connections/live"
|
||||
"github.com/sourcegraph/sourcegraph/internal/env"
|
||||
gitserver "github.com/sourcegraph/sourcegraph/internal/gitserver"
|
||||
"github.com/sourcegraph/sourcegraph/internal/goroutine"
|
||||
"github.com/sourcegraph/sourcegraph/internal/observation"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
func main() {
|
||||
reposVar := env.Get("ROCKSKIP_REPOS", "", "comma separated list of repositories to index (e.g. `github.com/torvalds/linux,github.com/pallets/flask`)")
|
||||
repos := strings.Split(reposVar, ",")
|
||||
|
||||
if env.Get("USE_ROCKSKIP", "false", "use Rockskip to index the repos specified in ROCKSKIP_REPOS") == "true" {
|
||||
shared.Main(func(observationContext *observation.Context, gitserverClient symbolsGitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) {
|
||||
rockskipSearchFunc, rockskipHandleStatus, rockskipBackgroundRoutines, rockskipCtagsCommand, err := SetupRockskip(observationContext, gitserverClient, repositoryFetcher)
|
||||
if err != nil {
|
||||
return nil, nil, nil, "", err
|
||||
}
|
||||
|
||||
// The blanks are the SQLite status endpoint (it's always nil) and the ctags command (same as
|
||||
// Rockskip's).
|
||||
sqliteSearchFunc, _, sqliteBackgroundRoutines, _, err := shared.SetupSqlite(observationContext, gitserverClient, repositoryFetcher)
|
||||
if err != nil {
|
||||
return nil, nil, nil, "", err
|
||||
}
|
||||
|
||||
searchFunc := func(ctx context.Context, args types.SearchArgs) (results result.Symbols, err error) {
|
||||
if sliceContains(repos, string(args.Repo)) {
|
||||
return rockskipSearchFunc(ctx, args)
|
||||
} else {
|
||||
return sqliteSearchFunc(ctx, args)
|
||||
}
|
||||
}
|
||||
|
||||
return searchFunc, rockskipHandleStatus, append(rockskipBackgroundRoutines, sqliteBackgroundRoutines...), rockskipCtagsCommand, nil
|
||||
})
|
||||
} else {
|
||||
shared.Main(shared.SetupSqlite)
|
||||
}
|
||||
}
|
||||
|
||||
func SetupRockskip(observationContext *observation.Context, gitserverClient symbolsGitserver.GitserverClient, repositoryFetcher fetcher.RepositoryFetcher) (types.SearchFunc, func(http.ResponseWriter, *http.Request), []goroutine.BackgroundRoutine, string, error) {
|
||||
baseConfig := env.BaseConfig{}
|
||||
config := LoadRockskipConfig(baseConfig)
|
||||
if err := baseConfig.Validate(); err != nil {
|
||||
log.Fatalf("Failed to load configuration: %s", err)
|
||||
}
|
||||
|
||||
db := mustInitializeCodeIntelDB()
|
||||
git := NewGitserver(repositoryFetcher)
|
||||
createParser := func() rockskip.ParseSymbolsFunc { return createParserWithConfig(config.Ctags) }
|
||||
server, err := rockskip.NewService(db, git, createParser, config.MaxConcurrentlyIndexing, config.MaxRepos, config.LogQueries, config.IndexRequestsQueueSize, config.SymbolsCacheSize, config.PathSymbolsCacheSize)
|
||||
if err != nil {
|
||||
return nil, nil, nil, config.Ctags.Command, err
|
||||
}
|
||||
|
||||
return server.Search, server.HandleStatus, nil, config.Ctags.Command, nil
|
||||
}
|
||||
|
||||
type RockskipConfig struct {
|
||||
Ctags types.CtagsConfig
|
||||
RepositoryFetcher types.RepositoryFetcherConfig
|
||||
MaxRepos int
|
||||
LogQueries bool
|
||||
IndexRequestsQueueSize int
|
||||
MaxConcurrentlyIndexing int
|
||||
SymbolsCacheSize int
|
||||
PathSymbolsCacheSize int
|
||||
}
|
||||
|
||||
func LoadRockskipConfig(baseConfig env.BaseConfig) RockskipConfig {
|
||||
return RockskipConfig{
|
||||
Ctags: types.LoadCtagsConfig(baseConfig),
|
||||
RepositoryFetcher: types.LoadRepositoryFetcherConfig(baseConfig),
|
||||
MaxRepos: baseConfig.GetInt("MAX_REPOS", "1000", "maximum number of repositories to store in Postgres, with LRU eviction"),
|
||||
LogQueries: baseConfig.GetBool("LOG_QUERIES", "false", "print search queries to stdout"),
|
||||
IndexRequestsQueueSize: baseConfig.GetInt("INDEX_REQUESTS_QUEUE_SIZE", "1000", "how many index requests can be queued at once, at which point new requests will be rejected"),
|
||||
MaxConcurrentlyIndexing: baseConfig.GetInt("MAX_CONCURRENTLY_INDEXING", "4", "maximum number of repositories being indexed at a time (also limits ctags processes)"),
|
||||
SymbolsCacheSize: baseConfig.GetInt("SYMBOLS_CACHE_SIZE", "1000000", "how many tuples of (path, symbol name, int ID) to cache in memory"),
|
||||
PathSymbolsCacheSize: baseConfig.GetInt("PATH_SYMBOLS_CACHE_SIZE", "100000", "how many sets of symbols for files to cache in memory"),
|
||||
}
|
||||
}
|
||||
|
||||
func createParserWithConfig(config types.CtagsConfig) rockskip.ParseSymbolsFunc {
|
||||
parser := mustCreateCtagsParser(config)
|
||||
|
||||
return func(path string, bytes []byte) (symbols []rockskip.Symbol, err error) {
|
||||
entries, err := parser.Parse(path, bytes)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
symbols = []rockskip.Symbol{}
|
||||
for _, entry := range entries {
|
||||
symbols = append(symbols, rockskip.Symbol{
|
||||
Name: entry.Name,
|
||||
Parent: entry.Parent,
|
||||
Kind: entry.Kind,
|
||||
Line: entry.Line,
|
||||
})
|
||||
}
|
||||
|
||||
return symbols, nil
|
||||
}
|
||||
}
|
||||
|
||||
func mustCreateCtagsParser(ctagsConfig types.CtagsConfig) ctags.Parser {
|
||||
options := ctags.Options{
|
||||
Bin: ctagsConfig.Command,
|
||||
PatternLengthLimit: ctagsConfig.PatternLengthLimit,
|
||||
}
|
||||
if ctagsConfig.LogErrors {
|
||||
options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
|
||||
}
|
||||
if ctagsConfig.DebugLogs {
|
||||
options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
|
||||
}
|
||||
|
||||
parser, err := ctags.New(options)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to create new ctags parser: %s", err)
|
||||
}
|
||||
|
||||
return symbolsParser.NewFilteringParser(parser, ctagsConfig.MaxFileSize, ctagsConfig.MaxSymbols)
|
||||
}
|
||||
|
||||
func mustInitializeCodeIntelDB() *sql.DB {
|
||||
dsn := conf.GetServiceConnectionValueAndRestartOnChange(func(serviceConnections conftypes.ServiceConnections) string {
|
||||
return serviceConnections.CodeIntelPostgresDSN
|
||||
})
|
||||
var (
|
||||
db *sql.DB
|
||||
err error
|
||||
)
|
||||
db, err = connections.EnsureNewCodeIntelDB(dsn, "symbols", &observation.TestContext)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to connect to codeintel database: %s", err)
|
||||
}
|
||||
|
||||
return db
|
||||
}
|
||||
|
||||
type Gitserver struct {
|
||||
repositoryFetcher fetcher.RepositoryFetcher
|
||||
}
|
||||
|
||||
func NewGitserver(repositoryFetcher fetcher.RepositoryFetcher) Gitserver {
|
||||
return Gitserver{repositoryFetcher: repositoryFetcher}
|
||||
}
|
||||
|
||||
func (g Gitserver) LogReverseEach(repo string, commit string, n int, onLogEntry func(entry rockskip.LogEntry) error) error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
command := gitserver.DefaultClient.Command("git", rockskip.LogReverseArgs(n, commit)...)
|
||||
command.Repo = api.RepoName(repo)
|
||||
// We run a single `git log` command and stream the output while the repo is being processed, which
|
||||
// can take much longer than 1 minute (the default timeout).
|
||||
command.DisableTimeout()
|
||||
stdout, err := gitserver.StdoutReader(ctx, command)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stdout.Close()
|
||||
|
||||
return errors.Wrap(rockskip.ParseLogReverseEach(stdout, onLogEntry), "ParseLogReverseEach")
|
||||
}
|
||||
|
||||
func (g Gitserver) RevListEach(repo string, commit string, onCommit func(commit string) (shouldContinue bool, err error)) error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
command := gitserver.DefaultClient.Command("git", rockskip.RevListArgs(commit)...)
|
||||
command.Repo = api.RepoName(repo)
|
||||
command.DisableTimeout()
|
||||
stdout, err := gitserver.StdoutReader(ctx, command)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer stdout.Close()
|
||||
|
||||
return rockskip.RevListEach(stdout, onCommit)
|
||||
}
|
||||
|
||||
func (g Gitserver) ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error {
|
||||
if len(paths) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
args := types.SearchArgs{Repo: api.RepoName(repo), CommitID: api.CommitID(commit)}
|
||||
parseRequestOrErrors := g.repositoryFetcher.FetchRepositoryArchive(context.TODO(), args, paths)
|
||||
defer func() {
|
||||
// Ensure the channel is drained
|
||||
for range parseRequestOrErrors {
|
||||
}
|
||||
}()
|
||||
|
||||
for parseRequestOrError := range parseRequestOrErrors {
|
||||
if parseRequestOrError.Err != nil {
|
||||
return errors.Wrap(parseRequestOrError.Err, "FetchRepositoryArchive")
|
||||
}
|
||||
|
||||
err := onFile(parseRequestOrError.ParseRequest.Path, parseRequestOrError.ParseRequest.Data)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func sliceContains(slice []string, s string) bool {
|
||||
for _, v := range slice {
|
||||
if v == s {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
@ -61,6 +61,7 @@ var DeploySourcegraphDockerImages = []string{
|
||||
"cadvisor",
|
||||
"codeinsights-db",
|
||||
"codeintel-db",
|
||||
"enterprise-symbols",
|
||||
"frontend",
|
||||
"github-proxy",
|
||||
"gitserver",
|
||||
|
||||
226
enterprise/internal/rockskip/git.go
Normal file
226
enterprise/internal/rockskip/git.go
Normal file
@ -0,0 +1,226 @@
|
||||
package rockskip
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
type LogEntry struct {
|
||||
Commit string
|
||||
PathStatuses []PathStatus
|
||||
}
|
||||
|
||||
type PathStatus struct {
|
||||
Path string
|
||||
Status StatusAMD
|
||||
}
|
||||
|
||||
type CommitStatus struct {
|
||||
Commit string
|
||||
Status StatusAMD
|
||||
}
|
||||
|
||||
type StatusAMD int
|
||||
|
||||
const (
|
||||
AddedAMD StatusAMD = 0
|
||||
ModifiedAMD StatusAMD = 1
|
||||
DeletedAMD StatusAMD = 2
|
||||
)
|
||||
|
||||
type StatusAD int
|
||||
|
||||
const (
|
||||
AddedAD StatusAD = 0
|
||||
DeletedAD StatusAD = 1
|
||||
)
|
||||
|
||||
type Git interface {
|
||||
LogReverseEach(repo string, commit string, n int, onLogEntry func(logEntry LogEntry) error) error
|
||||
RevListEach(repo string, commit string, onCommit func(commit string) (shouldContinue bool, err error)) error
|
||||
ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error
|
||||
}
|
||||
|
||||
func LogReverseArgs(n int, givenCommit string) []string {
|
||||
return []string{
|
||||
"log",
|
||||
"--pretty=%H %P",
|
||||
"--raw",
|
||||
"-z",
|
||||
"-m",
|
||||
// --no-abbrev speeds up git log a lot
|
||||
"--no-abbrev",
|
||||
"--no-renames",
|
||||
"--first-parent",
|
||||
"--reverse",
|
||||
"--ignore-submodules",
|
||||
fmt.Sprintf("-%d", n),
|
||||
givenCommit,
|
||||
}
|
||||
}
|
||||
|
||||
func ParseLogReverseEach(stdout io.Reader, onLogEntry func(entry LogEntry) error) error {
|
||||
reader := bufio.NewReader(stdout)
|
||||
|
||||
var buf []byte
|
||||
|
||||
for {
|
||||
// abc... ... NULL '\n'?
|
||||
|
||||
// Read the commit
|
||||
commitBytes, err := reader.Peek(40)
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
commit := string(commitBytes)
|
||||
|
||||
// Skip past the NULL byte
|
||||
_, err = reader.ReadBytes(0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// A '\n' indicates a list of paths and their statuses is next
|
||||
buf, err = reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
err = onLogEntry(LogEntry{Commit: commit, PathStatuses: []PathStatus{}})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
if buf[0] == '\n' {
|
||||
// A list of paths and their statuses is next
|
||||
|
||||
// Skip the '\n'
|
||||
discarded, err := reader.Discard(1)
|
||||
if discarded != 1 {
|
||||
return errors.Newf("discarded %d bytes, expected 1", discarded)
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
pathStatuses := []PathStatus{}
|
||||
for {
|
||||
// :100644 100644 abc... def... M NULL file.txt NULL
|
||||
// ^ 0 ^ 97 ^ 99
|
||||
|
||||
// A ':' indicates a path and its status is next
|
||||
buf, err = reader.Peek(1)
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
if buf[0] != ':' {
|
||||
break
|
||||
}
|
||||
|
||||
// Read the status from index 97 and skip to the path at index 99
|
||||
buf = make([]byte, 99)
|
||||
read, err := io.ReadFull(reader, buf)
|
||||
if read != 99 {
|
||||
return errors.Newf("read %d bytes, expected 99", read)
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Read the path
|
||||
path, err := reader.ReadBytes(0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
path = path[:len(path)-1] // Drop the trailing NULL byte
|
||||
|
||||
// Inspect the status
|
||||
var status StatusAMD
|
||||
statusByte := buf[97]
|
||||
switch statusByte {
|
||||
case 'A':
|
||||
status = AddedAMD
|
||||
case 'M':
|
||||
status = ModifiedAMD
|
||||
case 'D':
|
||||
status = DeletedAMD
|
||||
case 'T':
|
||||
// Type changed. Check if it changed from a file to a submodule or vice versa,
|
||||
// treating submodules as empty.
|
||||
|
||||
isSubmodule := func(mode string) bool {
|
||||
// Submodules are mode "160000". https://stackoverflow.com/questions/737673/how-to-read-the-mode-field-of-git-ls-trees-output#comment3519596_737877
|
||||
return mode == "160000"
|
||||
}
|
||||
|
||||
oldMode := string(buf[1:7])
|
||||
newMode := string(buf[8:14])
|
||||
|
||||
if isSubmodule(oldMode) && !isSubmodule(newMode) {
|
||||
// It changed from a submodule to a file, so consider it added.
|
||||
status = AddedAMD
|
||||
break
|
||||
}
|
||||
|
||||
if !isSubmodule(oldMode) && isSubmodule(newMode) {
|
||||
// It changed from a file to a submodule, so consider it deleted.
|
||||
status = DeletedAMD
|
||||
break
|
||||
}
|
||||
|
||||
// Otherwise, it remained the same, so ignore the type change.
|
||||
continue
|
||||
case 'C':
|
||||
// Copied
|
||||
return errors.Newf("unexpected status 'C' given --no-renames was specified")
|
||||
case 'R':
|
||||
// Renamed
|
||||
return errors.Newf("unexpected status 'R' given --no-renames was specified")
|
||||
case 'X':
|
||||
return errors.Newf("unexpected status 'X' indicates a bug in git")
|
||||
default:
|
||||
fmt.Printf("LogReverse commit %q path %q: unrecognized diff status %q, skipping\n", commit, path, string(statusByte))
|
||||
continue
|
||||
}
|
||||
|
||||
pathStatuses = append(pathStatuses, PathStatus{Path: string(path), Status: status})
|
||||
}
|
||||
|
||||
err = onLogEntry(LogEntry{Commit: commit, PathStatuses: pathStatuses})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func RevListArgs(givenCommit string) []string {
|
||||
return []string{"rev-list", "--first-parent", givenCommit}
|
||||
}
|
||||
|
||||
func RevListEach(stdout io.Reader, onCommit func(commit string) (shouldContinue bool, err error)) error {
|
||||
reader := bufio.NewReader(stdout)
|
||||
|
||||
for {
|
||||
commit, err := reader.ReadString('\n')
|
||||
if err == io.EOF {
|
||||
break
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
commit = commit[:len(commit)-1] // Drop the trailing newline
|
||||
shouldContinue, err := onCommit(commit)
|
||||
if !shouldContinue {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
346
enterprise/internal/rockskip/index.go
Normal file
346
enterprise/internal/rockskip/index.go
Normal file
@ -0,0 +1,346 @@
|
||||
package rockskip
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/utils/lru"
|
||||
|
||||
"github.com/inconshreveable/log15"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
func (s *Service) Index(ctx context.Context, repo, givenCommit string) (err error) {
|
||||
threadStatus := s.status.NewThreadStatus(fmt.Sprintf("indexing %s@%s", repo, givenCommit))
|
||||
defer threadStatus.End()
|
||||
|
||||
tasklog := threadStatus.Tasklog
|
||||
|
||||
// Get a fresh connection from the DB pool to get deterministic "lock stacking" behavior.
|
||||
// See doc/dev/background-information/sql/locking_behavior.md for more details.
|
||||
conn, err := s.db.Conn(ctx)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to get connection for indexing")
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// Acquire the indexing lock on the repo.
|
||||
releaseLock, err := iLock(ctx, conn, threadStatus, repo)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() { err = errors.CombineErrors(err, releaseLock()) }()
|
||||
|
||||
tipCommit := NULL
|
||||
tipCommitHash := ""
|
||||
tipHeight := 0
|
||||
|
||||
var repoId int
|
||||
err = conn.QueryRowContext(ctx, "SELECT id FROM rockskip_repos WHERE repo = $1", repo).Scan(&repoId)
|
||||
if err != nil {
|
||||
return errors.Wrapf(err, "failed to get repo id for %s", repo)
|
||||
}
|
||||
|
||||
missingCount := 0
|
||||
tasklog.Start("RevList")
|
||||
err = s.git.RevListEach(repo, givenCommit, func(commitHash string) (shouldContinue bool, err error) {
|
||||
defer tasklog.Continue("RevList")
|
||||
|
||||
tasklog.Start("GetCommitByHash")
|
||||
commit, height, present, err := GetCommitByHash(ctx, conn, repoId, commitHash)
|
||||
if err != nil {
|
||||
return false, err
|
||||
} else if present {
|
||||
tipCommit = commit
|
||||
tipCommitHash = commitHash
|
||||
tipHeight = height
|
||||
return false, nil
|
||||
}
|
||||
missingCount += 1
|
||||
return true, nil
|
||||
})
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "RevList")
|
||||
}
|
||||
|
||||
threadStatus.SetProgress(0, missingCount)
|
||||
|
||||
if missingCount == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
parse := s.createParser()
|
||||
|
||||
symbolCache := newSymbolIdCache(s.symbolsCacheSize)
|
||||
pathSymbolsCache := newPathSymbolsCache(s.pathSymbolsCacheSize)
|
||||
|
||||
tasklog.Start("Log")
|
||||
entriesIndexed := 0
|
||||
err = s.git.LogReverseEach(repo, givenCommit, missingCount, func(entry LogEntry) error {
|
||||
defer tasklog.Continue("Log")
|
||||
|
||||
threadStatus.SetProgress(entriesIndexed, missingCount)
|
||||
entriesIndexed++
|
||||
|
||||
tx, err := conn.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "begin transaction")
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
hops, err := getHops(ctx, tx, tipCommit, tasklog)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "getHops")
|
||||
}
|
||||
|
||||
r := ruler(tipHeight + 1)
|
||||
if r >= len(hops) {
|
||||
return errors.Newf("ruler(%d) = %d is out of range of len(hops) = %d", tipHeight+1, r, len(hops))
|
||||
}
|
||||
|
||||
tasklog.Start("InsertCommit")
|
||||
commit, err := InsertCommit(ctx, tx, repoId, entry.Commit, tipHeight+1, hops[r])
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "InsertCommit")
|
||||
}
|
||||
|
||||
tasklog.Start("AppendHop+")
|
||||
err = AppendHop(ctx, tx, repoId, hops[0:r], AddedAD, commit)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "AppendHop (added)")
|
||||
}
|
||||
tasklog.Start("AppendHop-")
|
||||
err = AppendHop(ctx, tx, repoId, hops[0:r], DeletedAD, commit)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "AppendHop (deleted)")
|
||||
}
|
||||
|
||||
deletedPaths := []string{}
|
||||
addedPaths := []string{}
|
||||
for _, pathStatus := range entry.PathStatuses {
|
||||
if pathStatus.Status == DeletedAMD || pathStatus.Status == ModifiedAMD {
|
||||
deletedPaths = append(deletedPaths, pathStatus.Path)
|
||||
}
|
||||
if pathStatus.Status == AddedAMD || pathStatus.Status == ModifiedAMD {
|
||||
addedPaths = append(addedPaths, pathStatus.Path)
|
||||
}
|
||||
}
|
||||
|
||||
getSymbols := func(commit string, paths []string) (map[string]map[string]struct{}, error) {
|
||||
pathToSymbols := map[string]map[string]struct{}{}
|
||||
pathsToFetchSet := map[string]struct{}{}
|
||||
for _, path := range paths {
|
||||
pathsToFetchSet[path] = struct{}{}
|
||||
}
|
||||
|
||||
// Don't fetch files that are already in the cache.
|
||||
if commit == tipCommitHash {
|
||||
for _, path := range paths {
|
||||
if symbols, ok := pathSymbolsCache.get(path); ok {
|
||||
pathToSymbols[path] = symbols
|
||||
delete(pathsToFetchSet, path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pathsToFetch := []string{}
|
||||
for path := range pathsToFetchSet {
|
||||
pathsToFetch = append(pathsToFetch, path)
|
||||
}
|
||||
|
||||
tasklog.Start("ArchiveEach")
|
||||
err = s.git.ArchiveEach(repo, commit, pathsToFetch, func(path string, contents []byte) error {
|
||||
defer tasklog.Continue("ArchiveEach")
|
||||
|
||||
tasklog.Start("parse")
|
||||
symbols, err := parse(path, contents)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parse")
|
||||
}
|
||||
|
||||
pathToSymbols[path] = map[string]struct{}{}
|
||||
for _, symbol := range symbols {
|
||||
pathToSymbols[path][symbol.Name] = struct{}{}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "while looping ArchiveEach")
|
||||
}
|
||||
|
||||
// Cache the symbols we just parsed.
|
||||
if commit != tipCommitHash {
|
||||
for path, symbols := range pathToSymbols {
|
||||
pathSymbolsCache.set(path, symbols)
|
||||
}
|
||||
}
|
||||
|
||||
return pathToSymbols, nil
|
||||
}
|
||||
|
||||
symbolsFromDeletedFiles, err := getSymbols(tipCommitHash, deletedPaths)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "getSymbols (deleted)")
|
||||
}
|
||||
symbolsFromAddedFiles, err := getSymbols(entry.Commit, addedPaths)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "getSymbols (added)")
|
||||
}
|
||||
|
||||
// Compute the symmetric difference of symbols between the added and deleted paths.
|
||||
deletedSymbols := map[string]map[string]struct{}{}
|
||||
addedSymbols := map[string]map[string]struct{}{}
|
||||
for _, pathStatus := range entry.PathStatuses {
|
||||
switch pathStatus.Status {
|
||||
case DeletedAMD:
|
||||
deletedSymbols[pathStatus.Path] = symbolsFromDeletedFiles[pathStatus.Path]
|
||||
case AddedAMD:
|
||||
addedSymbols[pathStatus.Path] = symbolsFromAddedFiles[pathStatus.Path]
|
||||
case ModifiedAMD:
|
||||
deletedSymbols[pathStatus.Path] = map[string]struct{}{}
|
||||
addedSymbols[pathStatus.Path] = map[string]struct{}{}
|
||||
for name := range symbolsFromDeletedFiles[pathStatus.Path] {
|
||||
if _, ok := symbolsFromAddedFiles[pathStatus.Path][name]; !ok {
|
||||
deletedSymbols[pathStatus.Path][name] = struct{}{}
|
||||
}
|
||||
}
|
||||
for name := range symbolsFromAddedFiles[pathStatus.Path] {
|
||||
if _, ok := symbolsFromDeletedFiles[pathStatus.Path][name]; !ok {
|
||||
addedSymbols[pathStatus.Path][name] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for path, symbols := range deletedSymbols {
|
||||
for symbol := range symbols {
|
||||
id := 0
|
||||
ok := false
|
||||
if id, ok = symbolCache.get(path, symbol); !ok {
|
||||
found := false
|
||||
for _, hop := range hops {
|
||||
tasklog.Start("GetSymbol")
|
||||
id, found, err = GetSymbol(ctx, tx, repoId, path, symbol, hop)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if found {
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
// We did not find the symbol that (supposedly) has been deleted, so ignore the
|
||||
// deletion. This will probably lead to extra symbols in search results.
|
||||
//
|
||||
// The last time this happened, it was caused by impurity in ctags where the
|
||||
// result of parsing a file was affected by previously parsed files and not fully
|
||||
// determined by the file itself:
|
||||
//
|
||||
// https://github.com/universal-ctags/ctags/pull/3300
|
||||
log15.Error("Could not find symbol that was supposedly deleted", "repo", repo, "commit", commit, "path", path, "symbol", symbol)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
tasklog.Start("UpdateSymbolHops")
|
||||
err = UpdateSymbolHops(ctx, tx, id, DeletedAD, commit)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "UpdateSymbolHops")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for path, symbols := range addedSymbols {
|
||||
for symbol := range symbols {
|
||||
tasklog.Start("InsertSymbol")
|
||||
id, err := InsertSymbol(ctx, tx, commit, repoId, path, symbol)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "InsertSymbol")
|
||||
}
|
||||
symbolCache.set(path, symbol, id)
|
||||
}
|
||||
}
|
||||
|
||||
tasklog.Start("DeleteRedundant")
|
||||
err = DeleteRedundant(ctx, tx, commit)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "DeleteRedundant")
|
||||
}
|
||||
|
||||
tasklog.Start("CommitTx")
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "commit transaction")
|
||||
}
|
||||
|
||||
tipCommit = commit
|
||||
tipCommitHash = entry.Commit
|
||||
tipHeight += 1
|
||||
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "LogReverseEach")
|
||||
}
|
||||
|
||||
threadStatus.SetProgress(entriesIndexed, missingCount)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type repoCommit struct {
|
||||
repo string
|
||||
commit string
|
||||
}
|
||||
|
||||
type indexRequest struct {
|
||||
repoCommit
|
||||
done chan struct{}
|
||||
}
|
||||
|
||||
type symbolIdCache struct {
|
||||
cache *lru.Cache
|
||||
}
|
||||
|
||||
func newSymbolIdCache(size int) *symbolIdCache {
|
||||
return &symbolIdCache{cache: lru.New(size)}
|
||||
}
|
||||
|
||||
func (s *symbolIdCache) get(path, symbol string) (int, bool) {
|
||||
v, ok := s.cache.Get(symbolIdCacheKey(path, symbol))
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
return v.(int), true
|
||||
}
|
||||
|
||||
func (s *symbolIdCache) set(path, symbol string, id int) {
|
||||
s.cache.Add(symbolIdCacheKey(path, symbol), id)
|
||||
}
|
||||
|
||||
func symbolIdCacheKey(path, symbol string) string {
|
||||
return path + ":" + symbol
|
||||
}
|
||||
|
||||
type pathSymbolsCache struct {
|
||||
cache *lru.Cache
|
||||
}
|
||||
|
||||
func newPathSymbolsCache(size int) *pathSymbolsCache {
|
||||
return &pathSymbolsCache{cache: lru.New(size)}
|
||||
}
|
||||
|
||||
func (s *pathSymbolsCache) get(path string) (map[string]struct{}, bool) {
|
||||
v, ok := s.cache.Get(path)
|
||||
if !ok {
|
||||
return nil, false
|
||||
}
|
||||
return v.(map[string]struct{}), true
|
||||
}
|
||||
|
||||
func (s *pathSymbolsCache) set(path string, symbols map[string]struct{}) {
|
||||
s.cache.Add(path, symbols)
|
||||
}
|
||||
364
enterprise/internal/rockskip/postgres.go
Normal file
364
enterprise/internal/rockskip/postgres.go
Normal file
@ -0,0 +1,364 @@
|
||||
package rockskip
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
|
||||
pg "github.com/lib/pq"
|
||||
"github.com/segmentio/fasthash/fnv1"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
type CommitId = int
|
||||
|
||||
func GetCommitById(ctx context.Context, db dbutil.DB, givenCommit CommitId) (commitHash string, ancestor CommitId, height int, present bool, err error) {
|
||||
err = db.QueryRowContext(ctx, `
|
||||
SELECT commit_id, ancestor, height
|
||||
FROM rockskip_ancestry
|
||||
WHERE id = $1
|
||||
`, givenCommit).Scan(&commitHash, &ancestor, &height)
|
||||
if err == sql.ErrNoRows {
|
||||
return "", 0, 0, false, nil
|
||||
} else if err != nil {
|
||||
return "", 0, 0, false, errors.Newf("GetCommitById: %s", err)
|
||||
}
|
||||
return commitHash, ancestor, height, true, nil
|
||||
}
|
||||
|
||||
func GetCommitByHash(ctx context.Context, db dbutil.DB, repoId int, commitHash string) (commit CommitId, height int, present bool, err error) {
|
||||
err = db.QueryRowContext(ctx, `
|
||||
SELECT id, height
|
||||
FROM rockskip_ancestry
|
||||
WHERE repo_id = $1 AND commit_id = $2
|
||||
`, repoId, commitHash).Scan(&commit, &height)
|
||||
if err == sql.ErrNoRows {
|
||||
return 0, 0, false, nil
|
||||
} else if err != nil {
|
||||
return 0, 0, false, errors.Newf("GetCommitByHash: %s", err)
|
||||
}
|
||||
return commit, height, true, nil
|
||||
}
|
||||
|
||||
func InsertCommit(ctx context.Context, db dbutil.DB, repoId int, commitHash string, height int, ancestor CommitId) (id CommitId, err error) {
|
||||
err = db.QueryRowContext(ctx, `
|
||||
INSERT INTO rockskip_ancestry (commit_id, repo_id, height, ancestor)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
RETURNING id
|
||||
`, commitHash, repoId, height, ancestor).Scan(&id)
|
||||
return id, errors.Wrap(err, "InsertCommit")
|
||||
}
|
||||
|
||||
func GetSymbol(ctx context.Context, db dbutil.DB, repoId int, path string, name string, hop CommitId) (id int, found bool, err error) {
|
||||
err = db.QueryRowContext(ctx, `
|
||||
SELECT id
|
||||
FROM rockskip_symbols
|
||||
WHERE repo_id = $1 AND path = $2 AND name = $3 AND $4 && added AND NOT $4 && deleted
|
||||
`, repoId, path, name, pg.Array([]int{hop})).Scan(&id)
|
||||
if err == sql.ErrNoRows {
|
||||
return 0, false, nil
|
||||
} else if err != nil {
|
||||
return 0, false, errors.Newf("GetSymbol: %s", err)
|
||||
}
|
||||
return id, true, nil
|
||||
}
|
||||
|
||||
func UpdateSymbolHops(ctx context.Context, db dbutil.DB, id int, status StatusAD, hop CommitId) error {
|
||||
column := statusADToColumn(status)
|
||||
_, err := db.ExecContext(ctx, fmt.Sprintf(`
|
||||
UPDATE rockskip_symbols
|
||||
SET %s = array_append(%s, $1)
|
||||
WHERE id = $2
|
||||
`, column, column), hop, id)
|
||||
return errors.Wrap(err, "UpdateSymbolHops")
|
||||
}
|
||||
|
||||
func InsertSymbol(ctx context.Context, db dbutil.DB, hop CommitId, repoId int, path string, name string) (id int, err error) {
|
||||
err = db.QueryRowContext(ctx, `
|
||||
INSERT INTO rockskip_symbols (added, deleted, repo_id, path, name)
|
||||
VALUES ($1 , $2 , $3 , $4 , $5 )
|
||||
RETURNING id
|
||||
`, pg.Array([]int{hop}), pg.Array([]int{}), repoId, path, name).Scan(&id)
|
||||
return id, errors.Wrap(err, "InsertSymbol")
|
||||
}
|
||||
|
||||
func AppendHop(ctx context.Context, db dbutil.DB, repoId int, hops []CommitId, givenStatus StatusAD, newHop CommitId) error {
|
||||
column := statusADToColumn(givenStatus)
|
||||
_, err := db.ExecContext(ctx, fmt.Sprintf(`
|
||||
UPDATE rockskip_symbols
|
||||
SET %s = array_append(%s, $1)
|
||||
WHERE $2 && singleton_integer(repo_id) AND $3 && %s
|
||||
`, column, column, column), newHop, pg.Array([]int{repoId}), pg.Array(hops))
|
||||
return errors.Wrap(err, "AppendHop")
|
||||
}
|
||||
|
||||
func DeleteRedundant(ctx context.Context, db dbutil.DB, hop CommitId) error {
|
||||
_, err := db.ExecContext(ctx, `
|
||||
UPDATE rockskip_symbols
|
||||
SET added = array_remove(added, $1), deleted = array_remove(deleted, $1)
|
||||
WHERE $2 && added AND $2 && deleted
|
||||
`, hop, pg.Array([]int{hop}))
|
||||
return errors.Wrap(err, "DeleteRedundant")
|
||||
}
|
||||
|
||||
func tryDeleteOldestRepo(ctx context.Context, db *sql.Conn, maxRepos int, threadStatus *ThreadStatus) (more bool, err error) {
|
||||
defer threadStatus.Tasklog.Continue("idle")
|
||||
|
||||
// Select a candidate repo to delete.
|
||||
threadStatus.Tasklog.Start("select repo to delete")
|
||||
var repoId int
|
||||
var repo string
|
||||
var repoRank int
|
||||
err = db.QueryRowContext(ctx, `
|
||||
SELECT id, repo, repo_rank
|
||||
FROM (
|
||||
SELECT *, RANK() OVER (ORDER BY last_accessed_at DESC) repo_rank
|
||||
FROM rockskip_repos
|
||||
) sub
|
||||
WHERE repo_rank > $1
|
||||
ORDER BY last_accessed_at ASC
|
||||
LIMIT 1;`, maxRepos,
|
||||
).Scan(&repoId, &repo, &repoRank)
|
||||
if err == sql.ErrNoRows {
|
||||
// No more repos to delete.
|
||||
return false, nil
|
||||
}
|
||||
if err != nil {
|
||||
return false, errors.Wrap(err, "selecting repo to delete")
|
||||
}
|
||||
|
||||
// Note: a search request or deletion could have intervened here.
|
||||
|
||||
// Acquire the write lock on the repo.
|
||||
releaseWLock, err := wLock(ctx, db, threadStatus, repo)
|
||||
defer func() { err = errors.CombineErrors(err, releaseWLock()) }()
|
||||
if err != nil {
|
||||
return false, errors.Wrap(err, "acquiring write lock on repo")
|
||||
}
|
||||
|
||||
// Make sure the repo is still old. See note above.
|
||||
var rank int
|
||||
threadStatus.Tasklog.Start("recheck repo rank")
|
||||
err = db.QueryRowContext(ctx, `
|
||||
SELECT repo_rank
|
||||
FROM (
|
||||
SELECT id, RANK() OVER (ORDER BY last_accessed_at DESC) repo_rank
|
||||
FROM rockskip_repos
|
||||
) sub
|
||||
WHERE id = $1;`, repoId,
|
||||
).Scan(&rank)
|
||||
if err == sql.ErrNoRows {
|
||||
// The repo was deleted in the meantime, so retry.
|
||||
return true, nil
|
||||
}
|
||||
if err != nil {
|
||||
return false, errors.Wrap(err, "selecting repo rank")
|
||||
}
|
||||
if rank <= maxRepos {
|
||||
// An intervening search request must have refreshed the repo, so retry.
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Acquire the indexing lock on the repo.
|
||||
releaseILock, err := iLock(ctx, db, threadStatus, repo)
|
||||
defer func() { err = errors.CombineErrors(err, releaseILock()) }()
|
||||
if err != nil {
|
||||
return false, errors.Wrap(err, "acquiring indexing lock on repo")
|
||||
}
|
||||
|
||||
// Delete the repo.
|
||||
threadStatus.Tasklog.Start("delete repo")
|
||||
tx, err := db.BeginTx(ctx, nil)
|
||||
defer tx.Rollback()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
_, err = tx.ExecContext(ctx, "DELETE FROM rockskip_ancestry WHERE repo_id = $1;", repoId)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
_, err = tx.ExecContext(ctx, "DELETE FROM rockskip_symbols WHERE repo_id = $1;", pg.Array([]int{repoId}))
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
_, err = tx.ExecContext(ctx, "DELETE FROM rockskip_repos WHERE id = $1;", repoId)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
err = tx.Commit()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func PrintInternals(ctx context.Context, db dbutil.DB) error {
|
||||
fmt.Println("Commit ancestry:")
|
||||
fmt.Println()
|
||||
|
||||
// print all rows in the rockskip_ancestry table
|
||||
rows, err := db.QueryContext(ctx, `
|
||||
SELECT a1.commit_id, a1.height, a2.commit_id
|
||||
FROM rockskip_ancestry a1
|
||||
JOIN rockskip_ancestry a2 ON a1.ancestor = a2.id
|
||||
ORDER BY height ASC
|
||||
`)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "PrintInternals")
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var commit, ancestor string
|
||||
var height int
|
||||
err = rows.Scan(&commit, &height, &ancestor)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "PrintInternals: Scan")
|
||||
}
|
||||
fmt.Printf("height %3d commit %s ancestor %s\n", height, commit, ancestor)
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
fmt.Println("Symbols:")
|
||||
fmt.Println()
|
||||
|
||||
rows, err = db.QueryContext(ctx, `
|
||||
SELECT id, path, name, added, deleted
|
||||
FROM rockskip_symbols
|
||||
ORDER BY id ASC
|
||||
`)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "PrintInternals")
|
||||
}
|
||||
|
||||
for rows.Next() {
|
||||
var id int
|
||||
var path string
|
||||
var name string
|
||||
var added, deleted []int64
|
||||
err = rows.Scan(&id, &path, &name, pg.Array(&added), pg.Array(&deleted))
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "PrintInternals: Scan")
|
||||
}
|
||||
fmt.Printf(" id %d path %-10s symbol %s\n", id, path, name)
|
||||
for _, a := range added {
|
||||
hash, _, _, _, err := GetCommitById(ctx, db, int(a))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf(" + %-40s\n", hash)
|
||||
}
|
||||
fmt.Println()
|
||||
for _, d := range deleted {
|
||||
hash, _, _, _, err := GetCommitById(ctx, db, int(d))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Printf(" - %-40s\n", hash)
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
}
|
||||
|
||||
fmt.Println()
|
||||
return nil
|
||||
}
|
||||
|
||||
func updateLastAccessedAt(ctx context.Context, db dbutil.DB, repo string) (id int, err error) {
|
||||
err = db.QueryRowContext(ctx, `
|
||||
INSERT INTO rockskip_repos (repo, last_accessed_at)
|
||||
VALUES ($1, now())
|
||||
ON CONFLICT (repo)
|
||||
DO UPDATE SET last_accessed_at = now()
|
||||
RETURNING id
|
||||
`, repo).Scan(&id)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
return id, nil
|
||||
}
|
||||
|
||||
func statusADToColumn(status StatusAD) string {
|
||||
switch status {
|
||||
case AddedAD:
|
||||
return "added"
|
||||
case DeletedAD:
|
||||
return "deleted"
|
||||
default:
|
||||
fmt.Println("unexpected status StatusAD: ", status)
|
||||
return "unknown_status"
|
||||
}
|
||||
}
|
||||
|
||||
var RW_LOCKS_NAMESPACE = int32(fnv1.HashString32("symbols-rw"))
|
||||
var INDEXING_LOCKS_NAMESPACE = int32(fnv1.HashString32("symbols-indexing"))
|
||||
|
||||
func lock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, namespace int32, name, repo, lockFn, unlockFn string) (func() error, error) {
|
||||
key := int32(fnv1.HashString32(repo))
|
||||
|
||||
threadStatus.Tasklog.Start(name)
|
||||
_, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, lockFn), namespace, key)
|
||||
if err != nil {
|
||||
return nil, errors.Newf("acquire %s: %s", name, err)
|
||||
}
|
||||
threadStatus.HoldLock(name)
|
||||
|
||||
release := func() error {
|
||||
_, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, unlockFn), namespace, key)
|
||||
if err != nil {
|
||||
return errors.Newf("release %s: %s", name, err)
|
||||
}
|
||||
threadStatus.ReleaseLock(name)
|
||||
return nil
|
||||
}
|
||||
|
||||
return release, nil
|
||||
}
|
||||
|
||||
func tryLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, namespace int32, name, repo, lockFn, unlockFn string) (bool, func() error, error) {
|
||||
key := int32(fnv1.HashString32(repo))
|
||||
|
||||
threadStatus.Tasklog.Start(name)
|
||||
locked, _, err := basestore.ScanFirstBool(db.QueryContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, lockFn), namespace, key))
|
||||
if err != nil {
|
||||
return false, nil, errors.Newf("try acquire %s: %s", name, err)
|
||||
}
|
||||
|
||||
if !locked {
|
||||
return false, nil, nil
|
||||
}
|
||||
|
||||
threadStatus.HoldLock(name)
|
||||
|
||||
release := func() error {
|
||||
_, err := db.ExecContext(ctx, fmt.Sprintf(`SELECT %s($1, $2)`, unlockFn), namespace, key)
|
||||
if err != nil {
|
||||
return errors.Newf("release %s: %s", name, err)
|
||||
}
|
||||
threadStatus.ReleaseLock(name)
|
||||
return nil
|
||||
}
|
||||
|
||||
return true, release, nil
|
||||
}
|
||||
|
||||
// tryRLock attempts to acquire a read lock on the repo.
|
||||
func tryRLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (bool, func() error, error) {
|
||||
return tryLock(ctx, db, threadStatus, RW_LOCKS_NAMESPACE, "rLock", repo, "pg_try_advisory_lock_shared", "pg_advisory_unlock_shared")
|
||||
}
|
||||
|
||||
// wLock acquires the write lock on the repo. It blocks only when another connection holds a read or the
|
||||
// write lock. That means a single connection can acquire the write lock while holding a read lock.
|
||||
func wLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (func() error, error) {
|
||||
return lock(ctx, db, threadStatus, RW_LOCKS_NAMESPACE, "wLock", repo, "pg_advisory_lock", "pg_advisory_unlock")
|
||||
}
|
||||
|
||||
// iLock acquires the indexing lock on the repo.
|
||||
func iLock(ctx context.Context, db dbutil.DB, threadStatus *ThreadStatus, repo string) (func() error, error) {
|
||||
return lock(ctx, db, threadStatus, INDEXING_LOCKS_NAMESPACE, "iLock", repo, "pg_advisory_lock", "pg_advisory_unlock")
|
||||
}
|
||||
469
enterprise/internal/rockskip/search.go
Normal file
469
enterprise/internal/rockskip/search.go
Normal file
@ -0,0 +1,469 @@
|
||||
package rockskip
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"database/sql/driver"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/grafana/regexp"
|
||||
"github.com/grafana/regexp/syntax"
|
||||
"github.com/keegancsmith/sqlf"
|
||||
pg "github.com/lib/pq"
|
||||
"github.com/segmentio/fasthash/fnv1"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/result"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
func (s *Service) Search(ctx context.Context, args types.SearchArgs) (symbols []result.Symbol, err error) {
|
||||
repo := string(args.Repo)
|
||||
commitHash := string(args.CommitID)
|
||||
|
||||
threadStatus := s.status.NewThreadStatus(fmt.Sprintf("searching %+v", args))
|
||||
if s.logQueries {
|
||||
defer threadStatus.Tasklog.Print()
|
||||
}
|
||||
defer threadStatus.End()
|
||||
|
||||
// Acquire a read lock on the repo.
|
||||
locked, releaseRLock, err := tryRLock(ctx, s.db, threadStatus, repo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { err = errors.CombineErrors(err, releaseRLock()) }()
|
||||
if !locked {
|
||||
return nil, errors.Newf("deletion in progress", repo)
|
||||
}
|
||||
|
||||
// Insert or set the last_accessed_at column for this repo to now() in the rockskip_repos table.
|
||||
threadStatus.Tasklog.Start("update last_accessed_at")
|
||||
repoId, err := updateLastAccessedAt(ctx, s.db, repo)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Non-blocking send on repoUpdates to notify the background deletion goroutine.
|
||||
select {
|
||||
case s.repoUpdates <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
|
||||
// Check if the commit has already been indexed, and if not then index it.
|
||||
threadStatus.Tasklog.Start("check commit presence")
|
||||
commit, _, present, err := GetCommitByHash(ctx, s.db, repoId, commitHash)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
} else if !present {
|
||||
|
||||
// Try to send an index request.
|
||||
done, err := s.emitIndexRequest(repoCommit{repo: repo, commit: commitHash})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Wait for indexing to complete or the request to be canceled.
|
||||
threadStatus.Tasklog.Start("awaiting indexing completion")
|
||||
select {
|
||||
case <-done:
|
||||
threadStatus.Tasklog.Start("recheck commit presence")
|
||||
commit, _, present, err = GetCommitByHash(ctx, s.db, repoId, commitHash)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !present {
|
||||
return nil, errors.Newf("indexing failed, check server logs")
|
||||
}
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Finally search.
|
||||
symbols, err = s.querySymbols(ctx, args, repoId, commit, threadStatus)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return symbols, nil
|
||||
}
|
||||
|
||||
func mkIsMatch(args types.SearchArgs) (func(string) bool, error) {
|
||||
if !args.IsRegExp {
|
||||
if args.IsCaseSensitive {
|
||||
return func(symbol string) bool { return strings.Contains(symbol, args.Query) }, nil
|
||||
} else {
|
||||
return func(symbol string) bool {
|
||||
return strings.Contains(strings.ToLower(symbol), strings.ToLower(args.Query))
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
expr := args.Query
|
||||
if !args.IsCaseSensitive {
|
||||
expr = "(?i)" + expr
|
||||
}
|
||||
|
||||
regex, err := regexp.Compile(expr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if args.IsCaseSensitive {
|
||||
return func(symbol string) bool { return regex.MatchString(symbol) }, nil
|
||||
} else {
|
||||
return func(symbol string) bool { return regex.MatchString(strings.ToLower(symbol)) }, nil
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) emitIndexRequest(rc repoCommit) (chan struct{}, error) {
|
||||
key := fmt.Sprintf("%s@%s", rc.repo, rc.commit)
|
||||
|
||||
s.repoCommitToDoneMu.Lock()
|
||||
|
||||
if done, ok := s.repoCommitToDone[key]; ok {
|
||||
s.repoCommitToDoneMu.Unlock()
|
||||
return done, nil
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
|
||||
s.repoCommitToDone[key] = done
|
||||
s.repoCommitToDoneMu.Unlock()
|
||||
go func() {
|
||||
<-done
|
||||
s.repoCommitToDoneMu.Lock()
|
||||
delete(s.repoCommitToDone, key)
|
||||
s.repoCommitToDoneMu.Unlock()
|
||||
}()
|
||||
|
||||
request := indexRequest{
|
||||
repoCommit: repoCommit{
|
||||
repo: rc.repo,
|
||||
commit: rc.commit,
|
||||
},
|
||||
done: done}
|
||||
|
||||
// Route the index request to the indexer associated with the repo.
|
||||
ix := int(fnv1.HashString32(rc.repo)) % len(s.indexRequestQueues)
|
||||
|
||||
select {
|
||||
case s.indexRequestQueues[ix] <- request:
|
||||
default:
|
||||
return nil, errors.Newf("the indexing queue is full")
|
||||
}
|
||||
|
||||
return done, nil
|
||||
}
|
||||
|
||||
const DEFAULT_LIMIT = 100
|
||||
|
||||
func (s *Service) querySymbols(ctx context.Context, args types.SearchArgs, repoId int, commit int, threadStatus *ThreadStatus) ([]result.Symbol, error) {
|
||||
hops, err := getHops(ctx, s.db, commit, threadStatus.Tasklog)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Drop the null commit.
|
||||
hops = hops[:len(hops)-1]
|
||||
|
||||
limit := DEFAULT_LIMIT
|
||||
if args.First > 0 {
|
||||
limit = args.First
|
||||
}
|
||||
|
||||
threadStatus.Tasklog.Start("run query")
|
||||
q := sqlf.Sprintf(`
|
||||
SELECT DISTINCT path
|
||||
FROM rockskip_symbols
|
||||
WHERE
|
||||
%s && singleton_integer(repo_id)
|
||||
AND %s && added
|
||||
AND NOT %s && deleted
|
||||
AND %s
|
||||
LIMIT %s;`,
|
||||
pg.Array([]int{repoId}),
|
||||
pg.Array(hops),
|
||||
pg.Array(hops),
|
||||
convertSearchArgsToSqlQuery(args),
|
||||
limit,
|
||||
)
|
||||
|
||||
start := time.Now()
|
||||
var rows *sql.Rows
|
||||
rows, err = s.db.QueryContext(ctx, q.Query(sqlf.PostgresBindVar), q.Args()...)
|
||||
duration := time.Since(start)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "Search")
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
isMatch, err := mkIsMatch(args)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
paths := []string{}
|
||||
for rows.Next() {
|
||||
var path string
|
||||
err = rows.Scan(&path)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "Search: Scan")
|
||||
}
|
||||
paths = append(paths, path)
|
||||
}
|
||||
|
||||
stopErr := errors.New("stop iterating")
|
||||
|
||||
symbols := []result.Symbol{}
|
||||
|
||||
parse := s.createParser()
|
||||
|
||||
threadStatus.Tasklog.Start("ArchiveEach")
|
||||
err = s.git.ArchiveEach(string(args.Repo), string(args.CommitID), paths, func(path string, contents []byte) error {
|
||||
defer threadStatus.Tasklog.Continue("ArchiveEach")
|
||||
|
||||
threadStatus.Tasklog.Start("parse")
|
||||
allSymbols, err := parse(path, contents)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, symbol := range allSymbols {
|
||||
if isMatch(symbol.Name) {
|
||||
symbols = append(symbols, result.Symbol{
|
||||
Name: symbol.Name,
|
||||
Path: path,
|
||||
Line: symbol.Line,
|
||||
Kind: symbol.Kind,
|
||||
Parent: symbol.Parent,
|
||||
})
|
||||
|
||||
if len(symbols) >= limit {
|
||||
return stopErr
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil && err != stopErr {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if s.logQueries {
|
||||
err = logQuery(ctx, s.db, args, q, duration, len(symbols))
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "logQuery")
|
||||
}
|
||||
}
|
||||
|
||||
return symbols, nil
|
||||
}
|
||||
|
||||
func logQuery(ctx context.Context, db dbutil.DB, args types.SearchArgs, q *sqlf.Query, duration time.Duration, symbols int) error {
|
||||
sb := &strings.Builder{}
|
||||
|
||||
fmt.Fprintf(sb, "Search args: %+v\n", args)
|
||||
|
||||
fmt.Fprintln(sb, "Query:")
|
||||
query, err := sqlfToString(q)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "sqlfToString")
|
||||
}
|
||||
fmt.Fprintln(sb, query)
|
||||
|
||||
fmt.Fprintln(sb, "EXPLAIN:")
|
||||
explain, err := db.QueryContext(ctx, sqlf.Sprintf("EXPLAIN %s", q).Query(sqlf.PostgresBindVar), q.Args()...)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "EXPLAIN")
|
||||
}
|
||||
defer explain.Close()
|
||||
for explain.Next() {
|
||||
var plan string
|
||||
err = explain.Scan(&plan)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "EXPLAIN Scan")
|
||||
}
|
||||
fmt.Fprintln(sb, plan)
|
||||
}
|
||||
|
||||
fmt.Fprintf(sb, "%.2fms, %d symbols", float64(duration.Microseconds())/1000, symbols)
|
||||
|
||||
fmt.Println(" ")
|
||||
fmt.Println(bracket(sb.String()))
|
||||
fmt.Println(" ")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func bracket(text string) string {
|
||||
lines := strings.Split(strings.TrimSpace(text), "\n")
|
||||
for i, line := range lines {
|
||||
if i == 0 {
|
||||
lines[i] = "┌ " + line
|
||||
} else if i == len(lines)-1 {
|
||||
lines[i] = "└ " + line
|
||||
} else {
|
||||
lines[i] = "│ " + line
|
||||
}
|
||||
}
|
||||
return strings.Join(lines, "\n")
|
||||
}
|
||||
|
||||
func sqlfToString(q *sqlf.Query) (string, error) {
|
||||
s := q.Query(sqlf.PostgresBindVar)
|
||||
for i, arg := range q.Args() {
|
||||
argString, err := argToString(arg)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
s = strings.ReplaceAll(s, fmt.Sprintf("$%d", i+1), argString)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func argToString(arg interface{}) (string, error) {
|
||||
switch arg := arg.(type) {
|
||||
case string:
|
||||
return fmt.Sprintf("'%s'", sqlEscapeQuotes(arg)), nil
|
||||
case driver.Valuer:
|
||||
value, err := arg.Value()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
switch value := value.(type) {
|
||||
case string:
|
||||
return fmt.Sprintf("'%s'", sqlEscapeQuotes(value)), nil
|
||||
case int:
|
||||
return fmt.Sprintf("'%d'", value), nil
|
||||
default:
|
||||
return "", errors.Newf("unrecognized array type %T", value)
|
||||
}
|
||||
case int:
|
||||
return fmt.Sprintf("%d", arg), nil
|
||||
default:
|
||||
return "", errors.Newf("unrecognized type %T", arg)
|
||||
}
|
||||
}
|
||||
|
||||
func sqlEscapeQuotes(s string) string {
|
||||
return strings.ReplaceAll(s, "'", "''")
|
||||
}
|
||||
|
||||
func convertSearchArgsToSqlQuery(args types.SearchArgs) *sqlf.Query {
|
||||
// TODO support non regexp queries once the frontend supports it.
|
||||
|
||||
conjunctOrNils := []*sqlf.Query{}
|
||||
|
||||
// Query
|
||||
conjunctOrNils = append(conjunctOrNils, regexMatch("name", "", args.Query, args.IsCaseSensitive))
|
||||
|
||||
// IncludePatterns
|
||||
for _, includePattern := range args.IncludePatterns {
|
||||
conjunctOrNils = append(conjunctOrNils, regexMatch("path", "path_prefixes(path)", includePattern, args.IsCaseSensitive))
|
||||
}
|
||||
|
||||
// ExcludePattern
|
||||
conjunctOrNils = append(conjunctOrNils, negate(regexMatch("path", "path_prefixes(path)", args.ExcludePattern, args.IsCaseSensitive)))
|
||||
|
||||
// Drop nils
|
||||
conjuncts := []*sqlf.Query{}
|
||||
for _, condition := range conjunctOrNils {
|
||||
if condition != nil {
|
||||
conjuncts = append(conjuncts, condition)
|
||||
}
|
||||
}
|
||||
|
||||
if len(conjuncts) == 0 {
|
||||
return sqlf.Sprintf("TRUE")
|
||||
}
|
||||
|
||||
return sqlf.Join(conjuncts, "AND")
|
||||
}
|
||||
|
||||
func regexMatch(column, columnForLiteralPrefix, regex string, isCaseSensitive bool) *sqlf.Query {
|
||||
if regex == "" || regex == "^" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Exact match optimization
|
||||
if literal, ok, err := isLiteralEquality(regex); err == nil && ok && isCaseSensitive {
|
||||
return sqlf.Sprintf(fmt.Sprintf("%%s = %s", column), literal)
|
||||
}
|
||||
|
||||
// Prefix match optimization
|
||||
if literal, ok, err := isLiteralPrefix(regex); err == nil && ok && isCaseSensitive && columnForLiteralPrefix != "" {
|
||||
return sqlf.Sprintf(fmt.Sprintf("%%s && %s", columnForLiteralPrefix), pg.Array([]string{literal}))
|
||||
}
|
||||
|
||||
// Regex match
|
||||
operator := "~"
|
||||
if !isCaseSensitive {
|
||||
operator = "~*"
|
||||
}
|
||||
|
||||
return sqlf.Sprintf(fmt.Sprintf("%s %s %%s", column, operator), regex)
|
||||
}
|
||||
|
||||
// isLiteralEquality returns true if the given regex matches literal strings exactly.
|
||||
// If so, this function returns true along with the literal search query. If not, this
|
||||
// function returns false.
|
||||
func isLiteralEquality(expr string) (string, bool, error) {
|
||||
regexp, err := syntax.Parse(expr, syntax.Perl)
|
||||
if err != nil {
|
||||
return "", false, errors.Wrap(err, "regexp/syntax.Parse")
|
||||
}
|
||||
|
||||
// want a concat of size 3 which is [begin, literal, end]
|
||||
if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 3 {
|
||||
// starts with ^
|
||||
if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText {
|
||||
// is a literal
|
||||
if regexp.Sub[1].Op == syntax.OpLiteral {
|
||||
// ends with $
|
||||
if regexp.Sub[2].Op == syntax.OpEndLine || regexp.Sub[2].Op == syntax.OpEndText {
|
||||
return string(regexp.Sub[1].Rune), true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
// isLiteralPrefix returns true if the given regex matches literal strings by prefix.
|
||||
// If so, this function returns true along with the literal search query. If not, this
|
||||
// function returns false.
|
||||
func isLiteralPrefix(expr string) (string, bool, error) {
|
||||
regexp, err := syntax.Parse(expr, syntax.Perl)
|
||||
if err != nil {
|
||||
return "", false, errors.Wrap(err, "regexp/syntax.Parse")
|
||||
}
|
||||
|
||||
// want a concat of size 2 which is [begin, literal]
|
||||
if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 2 {
|
||||
// starts with ^
|
||||
if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText {
|
||||
// is a literal
|
||||
if regexp.Sub[1].Op == syntax.OpLiteral {
|
||||
return string(regexp.Sub[1].Rune), true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", false, nil
|
||||
}
|
||||
|
||||
func negate(query *sqlf.Query) *sqlf.Query {
|
||||
if query == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return sqlf.Sprintf("NOT %s", query)
|
||||
}
|
||||
160
enterprise/internal/rockskip/server.go
Normal file
160
enterprise/internal/rockskip/server.go
Normal file
@ -0,0 +1,160 @@
|
||||
package rockskip
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"sync"
|
||||
|
||||
"github.com/inconshreveable/log15"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/dbutil"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
type Symbol struct {
|
||||
Name string `json:"name"`
|
||||
Parent string `json:"parent"`
|
||||
Kind string `json:"kind"`
|
||||
Line int `json:"line"`
|
||||
}
|
||||
|
||||
type ParseSymbolsFunc func(path string, bytes []byte) (symbols []Symbol, err error)
|
||||
|
||||
const NULL CommitId = 0
|
||||
|
||||
type Service struct {
|
||||
db *sql.DB
|
||||
git Git
|
||||
createParser func() ParseSymbolsFunc
|
||||
status *ServiceStatus
|
||||
repoUpdates chan struct{}
|
||||
maxRepos int
|
||||
logQueries bool
|
||||
repoCommitToDone map[string]chan struct{}
|
||||
repoCommitToDoneMu sync.Mutex
|
||||
indexRequestQueues []chan indexRequest
|
||||
symbolsCacheSize int
|
||||
pathSymbolsCacheSize int
|
||||
}
|
||||
|
||||
func NewService(
|
||||
db *sql.DB,
|
||||
git Git,
|
||||
createParser func() ParseSymbolsFunc,
|
||||
maxConcurrentlyIndexing int,
|
||||
maxRepos int,
|
||||
logQueries bool,
|
||||
indexRequestsQueueSize int,
|
||||
symbolsCacheSize int,
|
||||
pathSymbolsCacheSize int,
|
||||
) (*Service, error) {
|
||||
indexRequestQueues := make([]chan indexRequest, maxConcurrentlyIndexing)
|
||||
for i := 0; i < maxConcurrentlyIndexing; i++ {
|
||||
indexRequestQueues[i] = make(chan indexRequest, indexRequestsQueueSize)
|
||||
}
|
||||
|
||||
service := &Service{
|
||||
db: db,
|
||||
git: git,
|
||||
createParser: createParser,
|
||||
status: NewStatus(),
|
||||
repoUpdates: make(chan struct{}, 1),
|
||||
maxRepos: maxRepos,
|
||||
logQueries: logQueries,
|
||||
repoCommitToDone: map[string]chan struct{}{},
|
||||
repoCommitToDoneMu: sync.Mutex{},
|
||||
indexRequestQueues: indexRequestQueues,
|
||||
symbolsCacheSize: symbolsCacheSize,
|
||||
pathSymbolsCacheSize: pathSymbolsCacheSize,
|
||||
}
|
||||
|
||||
go service.startCleanupLoop()
|
||||
|
||||
for i := 0; i < maxConcurrentlyIndexing; i++ {
|
||||
go service.startIndexingLoop(service.indexRequestQueues[i])
|
||||
}
|
||||
|
||||
return service, nil
|
||||
}
|
||||
|
||||
func (s *Service) startIndexingLoop(indexRequestQueue chan indexRequest) {
|
||||
for indexRequest := range indexRequestQueue {
|
||||
err := s.Index(context.Background(), indexRequest.repo, indexRequest.commit)
|
||||
close(indexRequest.done)
|
||||
if err != nil {
|
||||
log15.Error("indexing error", "repo", indexRequest.repo, "commit", indexRequest.commit, "err", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Service) startCleanupLoop() {
|
||||
for range s.repoUpdates {
|
||||
threadStatus := s.status.NewThreadStatus("cleanup")
|
||||
err := DeleteOldRepos(context.Background(), s.db, s.maxRepos, threadStatus)
|
||||
threadStatus.End()
|
||||
if err != nil {
|
||||
log15.Error("Failed to delete old repos", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getHops(ctx context.Context, tx dbutil.DB, commit int, tasklog *TaskLog) ([]int, error) {
|
||||
tasklog.Start("get hops")
|
||||
|
||||
current := commit
|
||||
spine := []int{current}
|
||||
|
||||
for {
|
||||
_, ancestor, _, present, err := GetCommitById(ctx, tx, current)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "GetCommitById")
|
||||
} else if !present {
|
||||
break
|
||||
} else {
|
||||
if current == NULL {
|
||||
break
|
||||
}
|
||||
current = ancestor
|
||||
spine = append(spine, current)
|
||||
}
|
||||
}
|
||||
|
||||
return spine, nil
|
||||
}
|
||||
|
||||
func DeleteOldRepos(ctx context.Context, db *sql.DB, maxRepos int, threadStatus *ThreadStatus) error {
|
||||
// Get a fresh connection from the DB pool to get deterministic "lock stacking" behavior.
|
||||
// See doc/dev/background-information/sql/locking_behavior.md for more details.
|
||||
conn, err := db.Conn(context.Background())
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "failed to get connection for deleting old repos")
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
// Keep deleting repos until we're back to at most maxRepos.
|
||||
for {
|
||||
more, err := tryDeleteOldestRepo(ctx, conn, maxRepos, threadStatus)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !more {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ruler sequence
|
||||
//
|
||||
// input : 0, 1, 2, 3, 4, 5, 6, 7, 8, ...
|
||||
// output: 0, 0, 1, 0, 2, 0, 1, 0, 3, ...
|
||||
//
|
||||
// https://oeis.org/A007814
|
||||
func ruler(n int) int {
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
if n%2 != 0 {
|
||||
return 0
|
||||
}
|
||||
return 1 + ruler(n/2)
|
||||
}
|
||||
309
enterprise/internal/rockskip/server_test.go
Normal file
309
enterprise/internal/rockskip/server_test.go
Normal file
@ -0,0 +1,309 @@
|
||||
package rockskip
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/cmd/symbols/types"
|
||||
"github.com/sourcegraph/sourcegraph/internal/api"
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/dbtest"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
// simpleParse converts each line into a symbol.
|
||||
func simpleParse(path string, bytes []byte) ([]Symbol, error) {
|
||||
symbols := []Symbol{}
|
||||
|
||||
for _, line := range strings.Split(string(bytes), "\n") {
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
symbols = append(symbols, Symbol{Name: line})
|
||||
}
|
||||
|
||||
return symbols, nil
|
||||
}
|
||||
|
||||
func TestIndex(t *testing.T) {
|
||||
fatalIfError := func(err error, message string) {
|
||||
if err != nil {
|
||||
t.Fatal(errors.Wrap(err, message))
|
||||
}
|
||||
}
|
||||
|
||||
gitDir, err := os.MkdirTemp("", "rockskip-test-index")
|
||||
fatalIfError(err, "faiMkdirTemp")
|
||||
|
||||
t.Cleanup(func() {
|
||||
if t.Failed() {
|
||||
t.Logf("git dir %s left intact for inspection", gitDir)
|
||||
} else {
|
||||
os.RemoveAll(gitDir)
|
||||
}
|
||||
})
|
||||
|
||||
gitCmd := func(args ...string) *exec.Cmd {
|
||||
cmd := exec.Command("git", args...)
|
||||
cmd.Dir = gitDir
|
||||
return cmd
|
||||
}
|
||||
|
||||
gitRun := func(args ...string) {
|
||||
fatalIfError(gitCmd(args...).Run(), "git "+strings.Join(args, " "))
|
||||
}
|
||||
|
||||
gitStdout := func(args ...string) string {
|
||||
stdout, err := gitCmd(args...).Output()
|
||||
fatalIfError(err, "git "+strings.Join(args, " "))
|
||||
return string(stdout)
|
||||
}
|
||||
|
||||
getHead := func() string {
|
||||
return strings.TrimSpace(gitStdout("rev-parse", "HEAD"))
|
||||
}
|
||||
|
||||
state := map[string][]string{}
|
||||
|
||||
add := func(filename string, contents string) {
|
||||
fatalIfError(os.WriteFile(path.Join(gitDir, filename), []byte(contents), 0644), "os.WriteFile")
|
||||
gitRun("add", filename)
|
||||
symbols, err := simpleParse(filename, []byte(contents))
|
||||
fatalIfError(err, "simpleParse")
|
||||
state[filename] = []string{}
|
||||
for _, symbol := range symbols {
|
||||
state[filename] = append(state[filename], symbol.Name)
|
||||
}
|
||||
}
|
||||
|
||||
rm := func(filename string) {
|
||||
gitRun("rm", filename)
|
||||
delete(state, filename)
|
||||
}
|
||||
|
||||
gitRun("init")
|
||||
|
||||
git, err := NewSubprocessGit(gitDir)
|
||||
fatalIfError(err, "NewSubprocessGit")
|
||||
defer git.Close()
|
||||
|
||||
db := dbtest.NewDB(t)
|
||||
defer db.Close()
|
||||
|
||||
createParser := func() ParseSymbolsFunc { return simpleParse }
|
||||
|
||||
service, err := NewService(db, git, createParser, 1, 1, false, 1, 1, 1)
|
||||
fatalIfError(err, "NewService")
|
||||
|
||||
verifyBlobs := func() {
|
||||
repo := "somerepo"
|
||||
commit := getHead()
|
||||
args := types.SearchArgs{Repo: api.RepoName(repo), CommitID: api.CommitID(commit), Query: ""}
|
||||
symbols, err := service.Search(context.Background(), args)
|
||||
fatalIfError(err, "Search")
|
||||
|
||||
// Make sure the paths match.
|
||||
gotPathSet := map[string]struct{}{}
|
||||
for _, blob := range symbols {
|
||||
gotPathSet[blob.Path] = struct{}{}
|
||||
}
|
||||
gotPaths := []string{}
|
||||
for path := range gotPathSet {
|
||||
gotPaths = append(gotPaths, path)
|
||||
}
|
||||
wantPaths := []string{}
|
||||
for path := range state {
|
||||
wantPaths = append(wantPaths, path)
|
||||
}
|
||||
sort.Strings(gotPaths)
|
||||
sort.Strings(wantPaths)
|
||||
if diff := cmp.Diff(gotPaths, wantPaths); diff != "" {
|
||||
fmt.Println("unexpected paths (-got +want)")
|
||||
fmt.Println(diff)
|
||||
err = PrintInternals(context.Background(), db)
|
||||
fatalIfError(err, "PrintInternals")
|
||||
t.FailNow()
|
||||
}
|
||||
|
||||
gotPathToSymbols := map[string][]string{}
|
||||
for _, blob := range symbols {
|
||||
gotPathToSymbols[blob.Path] = append(gotPathToSymbols[blob.Path], blob.Name)
|
||||
}
|
||||
|
||||
// Make sure the symbols match.
|
||||
for path, gotSymbols := range gotPathToSymbols {
|
||||
wantSymbols := state[path]
|
||||
sort.Strings(gotSymbols)
|
||||
sort.Strings(wantSymbols)
|
||||
if diff := cmp.Diff(gotSymbols, wantSymbols); diff != "" {
|
||||
fmt.Println("unexpected symbols (-got +want)")
|
||||
fmt.Println(diff)
|
||||
err = PrintInternals(context.Background(), db)
|
||||
fatalIfError(err, "PrintInternals")
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commit := func(message string) {
|
||||
gitRun("commit", "--allow-empty", "-m", message)
|
||||
verifyBlobs()
|
||||
}
|
||||
|
||||
add("a.txt", "sym1\n")
|
||||
commit("add a file with 1 symbol")
|
||||
|
||||
add("b.txt", "sym1\n")
|
||||
commit("add another file with 1 symbol")
|
||||
|
||||
add("c.txt", "sym1\nsym2")
|
||||
commit("add another file with 2 symbols")
|
||||
|
||||
add("a.txt", "sym1\nsym2")
|
||||
commit("add a symbol to a.txt")
|
||||
|
||||
commit("empty")
|
||||
|
||||
rm("a.txt")
|
||||
commit("rm a.txt")
|
||||
}
|
||||
|
||||
type SubprocessGit struct {
|
||||
gitDir string
|
||||
catFileCmd *exec.Cmd
|
||||
catFileStdin io.WriteCloser
|
||||
catFileStdout bufio.Reader
|
||||
}
|
||||
|
||||
func NewSubprocessGit(gitDir string) (*SubprocessGit, error) {
|
||||
cmd := exec.Command("git", "cat-file", "--batch")
|
||||
cmd.Dir = gitDir
|
||||
|
||||
stdin, err := cmd.StdinPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
stdout, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = cmd.Start()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &SubprocessGit{
|
||||
gitDir: gitDir,
|
||||
catFileCmd: cmd,
|
||||
catFileStdin: stdin,
|
||||
catFileStdout: *bufio.NewReader(stdout),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (git SubprocessGit) Close() error {
|
||||
err := git.catFileStdin.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return git.catFileCmd.Wait()
|
||||
}
|
||||
|
||||
func (git SubprocessGit) LogReverseEach(repo string, givenCommit string, n int, onLogEntry func(entry LogEntry) error) (returnError error) {
|
||||
log := exec.Command("git", LogReverseArgs(n, givenCommit)...)
|
||||
log.Dir = git.gitDir
|
||||
output, err := log.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = log.Start()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
err = log.Wait()
|
||||
if err != nil {
|
||||
returnError = err
|
||||
}
|
||||
}()
|
||||
|
||||
return ParseLogReverseEach(output, onLogEntry)
|
||||
}
|
||||
|
||||
func (git SubprocessGit) RevListEach(repo string, givenCommit string, onCommit func(commit string) (shouldContinue bool, err error)) (returnError error) {
|
||||
revList := exec.Command("git", RevListArgs(givenCommit)...)
|
||||
revList.Dir = git.gitDir
|
||||
output, err := revList.StdoutPipe()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
err = revList.Start()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
err = revList.Wait()
|
||||
if err != nil {
|
||||
returnError = err
|
||||
}
|
||||
}()
|
||||
|
||||
return RevListEach(output, onCommit)
|
||||
}
|
||||
|
||||
func (git SubprocessGit) ArchiveEach(repo string, commit string, paths []string, onFile func(path string, contents []byte) error) error {
|
||||
for _, path := range paths {
|
||||
_, err := git.catFileStdin.Write([]byte(fmt.Sprintf("%s:%s\n", commit, path)))
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "writing to cat-file stdin")
|
||||
}
|
||||
|
||||
line, err := git.catFileStdout.ReadString('\n')
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read newline")
|
||||
}
|
||||
line = line[:len(line)-1] // Drop the trailing newline
|
||||
parts := strings.Split(line, " ")
|
||||
if len(parts) != 3 {
|
||||
return errors.Newf("unexpected cat-file output: %q", line)
|
||||
}
|
||||
size, err := strconv.ParseInt(parts[2], 10, 64)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "parse size")
|
||||
}
|
||||
|
||||
fileContents, err := io.ReadAll(io.LimitReader(&git.catFileStdout, size))
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "read contents")
|
||||
}
|
||||
|
||||
discarded, err := git.catFileStdout.Discard(1) // Discard the trailing newline
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "discard newline")
|
||||
}
|
||||
if discarded != 1 {
|
||||
return errors.Newf("expected to discard 1 byte, but discarded %d", discarded)
|
||||
}
|
||||
|
||||
err = onFile(path, fileContents)
|
||||
if err != nil {
|
||||
return errors.Wrap(err, "onFile")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
312
enterprise/internal/rockskip/status.go
Normal file
312
enterprise/internal/rockskip/status.go
Normal file
@ -0,0 +1,312 @@
|
||||
package rockskip
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/inconshreveable/log15"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
|
||||
)
|
||||
|
||||
// RequestId is a unique int for each HTTP request.
|
||||
type RequestId = int
|
||||
|
||||
// ServiceStatus contains the status of all requests.
|
||||
type ServiceStatus struct {
|
||||
threadIdToThreadStatus map[RequestId]*ThreadStatus
|
||||
nextThreadId RequestId
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func NewStatus() *ServiceStatus {
|
||||
return &ServiceStatus{
|
||||
threadIdToThreadStatus: map[int]*ThreadStatus{},
|
||||
nextThreadId: 0,
|
||||
mu: sync.Mutex{},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ServiceStatus) NewThreadStatus(name string) *ThreadStatus {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
threadId := s.nextThreadId
|
||||
s.nextThreadId++
|
||||
|
||||
threadStatus := NewThreadStatus(name, func() {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
delete(s.threadIdToThreadStatus, threadId)
|
||||
})
|
||||
|
||||
s.threadIdToThreadStatus[threadId] = threadStatus
|
||||
|
||||
return threadStatus
|
||||
}
|
||||
|
||||
func (s *Service) HandleStatus(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
|
||||
repositoryCount, _, err := basestore.ScanFirstInt(s.db.QueryContext(ctx, "SELECT COUNT(*) FROM rockskip_repos"))
|
||||
if err != nil {
|
||||
log15.Error("Failed to count repos", "error", err)
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
type repoRow struct {
|
||||
repo string
|
||||
lastAccessedAt time.Time
|
||||
}
|
||||
|
||||
repoRows := []repoRow{}
|
||||
repoSqlRows, err := s.db.QueryContext(ctx, "SELECT repo, last_accessed_at FROM rockskip_repos ORDER BY last_accessed_at DESC LIMIT 5")
|
||||
if err != nil {
|
||||
log15.Error("Failed to list repoRows", "error", err)
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer repoSqlRows.Close()
|
||||
for repoSqlRows.Next() {
|
||||
var repo string
|
||||
var lastAccessedAt time.Time
|
||||
if err := repoSqlRows.Scan(&repo, &lastAccessedAt); err != nil {
|
||||
log15.Error("Failed to scan repo", "error", err)
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
repoRows = append(repoRows, repoRow{repo: repo, lastAccessedAt: lastAccessedAt})
|
||||
}
|
||||
|
||||
symbolsSize, _, err := basestore.ScanFirstString(s.db.QueryContext(ctx, "SELECT pg_size_pretty(pg_total_relation_size('rockskip_symbols'))"))
|
||||
if err != nil {
|
||||
log15.Error("Failed to get size of symbols table", "error", err)
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintln(w, "This is the symbols service status page.")
|
||||
fmt.Fprintln(w, "")
|
||||
|
||||
fmt.Fprintf(w, "Number of repositories: %d\n", repositoryCount)
|
||||
fmt.Fprintf(w, "Size of symbols table: %s\n", symbolsSize)
|
||||
fmt.Fprintln(w, "")
|
||||
|
||||
if repositoryCount > 0 {
|
||||
fmt.Fprintf(w, "Most recently searched repositories (at most 5 shown)\n")
|
||||
for _, repo := range repoRows {
|
||||
fmt.Fprintf(w, " %s %s\n", repo.lastAccessedAt, repo.repo)
|
||||
}
|
||||
fmt.Fprintln(w, "")
|
||||
}
|
||||
|
||||
s.status.mu.Lock()
|
||||
defer s.status.mu.Unlock()
|
||||
|
||||
if len(s.status.threadIdToThreadStatus) == 0 {
|
||||
fmt.Fprintln(w, "No requests in flight.")
|
||||
return
|
||||
}
|
||||
fmt.Fprintln(w, "Here are all in-flight requests:")
|
||||
fmt.Fprintln(w, "")
|
||||
|
||||
ids := []int{}
|
||||
for id := range s.status.threadIdToThreadStatus {
|
||||
ids = append(ids, id)
|
||||
}
|
||||
sort.Ints(ids)
|
||||
|
||||
for _, id := range ids {
|
||||
status := s.status.threadIdToThreadStatus[id]
|
||||
status.WithLock(func() {
|
||||
fmt.Fprintf(w, "%s\n", status.Name)
|
||||
if status.Total > 0 {
|
||||
progress := float64(status.Indexed) / float64(status.Total)
|
||||
remaining := "unknown"
|
||||
if progress != 0 {
|
||||
total := status.Tasklog.TotalDuration()
|
||||
remaining = fmt.Sprint(time.Duration(total.Seconds()/progress)*time.Second - total)
|
||||
}
|
||||
fmt.Fprintf(w, " progress %.2f%% (indexed %d of %d commits), %s remaining\n", progress*100, status.Indexed, status.Total, remaining)
|
||||
}
|
||||
fmt.Fprintf(w, " %s\n", status.Tasklog)
|
||||
locks := []string{}
|
||||
for lock := range status.HeldLocks {
|
||||
locks = append(locks, lock)
|
||||
}
|
||||
sort.Strings(locks)
|
||||
for _, lock := range locks {
|
||||
fmt.Fprintf(w, " holding %s\n", lock)
|
||||
}
|
||||
fmt.Fprintln(w)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type ThreadStatus struct {
|
||||
Tasklog *TaskLog
|
||||
Name string
|
||||
HeldLocks map[string]struct{}
|
||||
Indexed int
|
||||
Total int
|
||||
mu sync.Mutex
|
||||
onEnd func()
|
||||
}
|
||||
|
||||
func NewThreadStatus(name string, onEnd func()) *ThreadStatus {
|
||||
return &ThreadStatus{
|
||||
Tasklog: NewTaskLog(),
|
||||
Name: name,
|
||||
HeldLocks: map[string]struct{}{},
|
||||
Indexed: -1,
|
||||
Total: -1,
|
||||
mu: sync.Mutex{},
|
||||
onEnd: onEnd,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ThreadStatus) WithLock(f func()) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
f()
|
||||
}
|
||||
|
||||
func (s *ThreadStatus) SetProgress(indexed, total int) {
|
||||
s.WithLock(func() { s.Indexed = indexed; s.Total = total })
|
||||
}
|
||||
func (s *ThreadStatus) HoldLock(name string) { s.WithLock(func() { s.HeldLocks[name] = struct{}{} }) }
|
||||
func (s *ThreadStatus) ReleaseLock(name string) { s.WithLock(func() { delete(s.HeldLocks, name) }) }
|
||||
|
||||
func (s *ThreadStatus) End() {
|
||||
if s.onEnd != nil {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.onEnd()
|
||||
}
|
||||
}
|
||||
|
||||
type TaskLog struct {
|
||||
currentName string
|
||||
currentStart time.Time
|
||||
nameToTask map[string]*Task
|
||||
// This mutex is only necessary to synchronize with the status page handler.
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
type Task struct {
|
||||
Duration time.Duration
|
||||
Count int
|
||||
}
|
||||
|
||||
func NewTaskLog() *TaskLog {
|
||||
return &TaskLog{
|
||||
currentName: "idle",
|
||||
currentStart: time.Now(),
|
||||
nameToTask: map[string]*Task{"idle": {Duration: 0, Count: 1}},
|
||||
mu: sync.Mutex{},
|
||||
}
|
||||
}
|
||||
|
||||
func (t *TaskLog) Start(name string) {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
|
||||
if _, ok := t.nameToTask[t.currentName]; !ok {
|
||||
t.nameToTask[t.currentName] = &Task{Duration: 0, Count: 0}
|
||||
}
|
||||
t.nameToTask[t.currentName].Duration += now.Sub(t.currentStart)
|
||||
|
||||
if _, ok := t.nameToTask[name]; !ok {
|
||||
t.nameToTask[name] = &Task{Duration: 0, Count: 0}
|
||||
}
|
||||
t.nameToTask[name].Count += 1
|
||||
|
||||
t.currentName = name
|
||||
t.currentStart = now
|
||||
}
|
||||
|
||||
func (t *TaskLog) Continue(name string) {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
now := time.Now()
|
||||
|
||||
if _, ok := t.nameToTask[t.currentName]; !ok {
|
||||
t.nameToTask[t.currentName] = &Task{Duration: 0, Count: 0}
|
||||
}
|
||||
t.nameToTask[t.currentName].Duration += now.Sub(t.currentStart)
|
||||
|
||||
if _, ok := t.nameToTask[name]; !ok {
|
||||
t.nameToTask[name] = &Task{Duration: 0, Count: 0}
|
||||
}
|
||||
|
||||
t.currentName = name
|
||||
t.currentStart = now
|
||||
}
|
||||
|
||||
func (t *TaskLog) Reset() {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
t.currentName = "idle"
|
||||
t.currentStart = time.Now()
|
||||
t.nameToTask = map[string]*Task{"idle": {Duration: 0, Count: 1}}
|
||||
}
|
||||
|
||||
func (t *TaskLog) Print() {
|
||||
fmt.Println(t)
|
||||
}
|
||||
|
||||
func (t *TaskLog) String() string {
|
||||
var s strings.Builder
|
||||
|
||||
t.Continue(t.currentName)
|
||||
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
var total time.Duration = 0
|
||||
totalCount := 0
|
||||
for _, task := range t.nameToTask {
|
||||
total += task.Duration
|
||||
totalCount += task.Count
|
||||
}
|
||||
fmt.Fprintf(&s, "Tasks (%.2fs total, current %s): ", total.Seconds(), t.currentName)
|
||||
|
||||
type kv struct {
|
||||
Key string
|
||||
Value *Task
|
||||
}
|
||||
|
||||
var kvs []kv
|
||||
for k, v := range t.nameToTask {
|
||||
kvs = append(kvs, kv{k, v})
|
||||
}
|
||||
|
||||
sort.Slice(kvs, func(i, j int) bool {
|
||||
return kvs[i].Value.Duration > kvs[j].Value.Duration
|
||||
})
|
||||
|
||||
for _, kv := range kvs {
|
||||
fmt.Fprintf(&s, "%s %.2f%% %dx, ", kv.Key, kv.Value.Duration.Seconds()*100/total.Seconds(), kv.Value.Count)
|
||||
}
|
||||
|
||||
return s.String()
|
||||
}
|
||||
|
||||
func (t *TaskLog) TotalDuration() time.Duration {
|
||||
t.Continue(t.currentName)
|
||||
var total time.Duration = 0
|
||||
for _, task := range t.nameToTask {
|
||||
total += task.Duration
|
||||
}
|
||||
return total
|
||||
}
|
||||
2
go.mod
2
go.mod
@ -366,7 +366,7 @@ require (
|
||||
gopkg.in/warnings.v0 v0.1.2 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
|
||||
k8s.io/kube-openapi v0.0.0-20220124234850-424119656bbf // indirect
|
||||
k8s.io/utils v0.0.0-20220127004650-9b3446523e65 // indirect
|
||||
k8s.io/utils v0.0.0-20220127004650-9b3446523e65
|
||||
mvdan.cc/gofumpt v0.2.1 // indirect
|
||||
sigs.k8s.io/yaml v1.3.0
|
||||
)
|
||||
|
||||
@ -742,3 +742,51 @@ Indexes:
|
||||
"migration_logs_pkey" PRIMARY KEY, btree (id)
|
||||
|
||||
```
|
||||
|
||||
# Table "public.rockskip_ancestry"
|
||||
```
|
||||
Column | Type | Collation | Nullable | Default
|
||||
-----------+-----------------------+-----------+----------+-----------------------------------------------
|
||||
id | integer | | not null | nextval('rockskip_ancestry_id_seq'::regclass)
|
||||
repo_id | integer | | not null |
|
||||
commit_id | character varying(40) | | not null |
|
||||
height | integer | | not null |
|
||||
ancestor | integer | | not null |
|
||||
Indexes:
|
||||
"rockskip_ancestry_pkey" PRIMARY KEY, btree (id)
|
||||
"rockskip_ancestry_repo_id_commit_id_key" UNIQUE CONSTRAINT, btree (repo_id, commit_id)
|
||||
"rockskip_ancestry_repo_commit_id" btree (repo_id, commit_id)
|
||||
|
||||
```
|
||||
|
||||
# Table "public.rockskip_repos"
|
||||
```
|
||||
Column | Type | Collation | Nullable | Default
|
||||
------------------+--------------------------+-----------+----------+--------------------------------------------
|
||||
id | integer | | not null | nextval('rockskip_repos_id_seq'::regclass)
|
||||
repo | text | | not null |
|
||||
last_accessed_at | timestamp with time zone | | not null |
|
||||
Indexes:
|
||||
"rockskip_repos_pkey" PRIMARY KEY, btree (id)
|
||||
"rockskip_repos_repo_key" UNIQUE CONSTRAINT, btree (repo)
|
||||
"rockskip_repos_last_accessed_at" btree (last_accessed_at)
|
||||
"rockskip_repos_repo" btree (repo)
|
||||
|
||||
```
|
||||
|
||||
# Table "public.rockskip_symbols"
|
||||
```
|
||||
Column | Type | Collation | Nullable | Default
|
||||
---------+-----------+-----------+----------+----------------------------------------------
|
||||
id | integer | | not null | nextval('rockskip_symbols_id_seq'::regclass)
|
||||
added | integer[] | | not null |
|
||||
deleted | integer[] | | not null |
|
||||
repo_id | integer | | not null |
|
||||
path | text | | not null |
|
||||
name | text | | not null |
|
||||
Indexes:
|
||||
"rockskip_symbols_pkey" PRIMARY KEY, btree (id)
|
||||
"rockskip_symbols_gin" gin (singleton_integer(repo_id) gin__int_ops, added gin__int_ops, deleted gin__int_ops, singleton(path), path_prefixes(path), singleton(name), name gin_trgm_ops)
|
||||
"rockskip_symbols_repo_id_path_name" btree (repo_id, path, name)
|
||||
|
||||
```
|
||||
|
||||
3
migrations/codeintel/1000000032/down.sql
Normal file
3
migrations/codeintel/1000000032/down.sql
Normal file
@ -0,0 +1,3 @@
|
||||
DROP TABLE IF EXISTS rockskip_ancestry;
|
||||
DROP TABLE IF EXISTS rockskip_symbols;
|
||||
DROP TABLE IF EXISTS rockskip_repos;
|
||||
2
migrations/codeintel/1000000032/metadata.yaml
Normal file
2
migrations/codeintel/1000000032/metadata.yaml
Normal file
@ -0,0 +1,2 @@
|
||||
name: 'rockskip'
|
||||
parent: 1000000031
|
||||
71
migrations/codeintel/1000000032/up.sql
Normal file
71
migrations/codeintel/1000000032/up.sql
Normal file
@ -0,0 +1,71 @@
|
||||
CREATE TABLE IF NOT EXISTS rockskip_repos (
|
||||
id SERIAL PRIMARY KEY,
|
||||
repo TEXT NOT NULL,
|
||||
last_accessed_at TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
UNIQUE (repo)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS rockskip_ancestry (
|
||||
id SERIAL PRIMARY KEY,
|
||||
repo_id INTEGER NOT NULL,
|
||||
commit_id VARCHAR(40) NOT NULL,
|
||||
height INTEGER NOT NULL,
|
||||
ancestor INTEGER NOT NULL,
|
||||
UNIQUE (repo_id, commit_id)
|
||||
);
|
||||
|
||||
-- Insert the null commit. repo_id 0 will not conflict with other repos because SERIAL's MINVALUE
|
||||
-- defaults to 1.
|
||||
INSERT INTO rockskip_ancestry
|
||||
(id, commit_id , repo_id , height, ancestor)
|
||||
VALUES (0 , '0000000000000000000000000000000000000000', 0 , 0 , 0 )
|
||||
ON CONFLICT DO NOTHING;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS rockskip_symbols (
|
||||
-- Globally unique ID of this instance of the symbol.
|
||||
id SERIAL PRIMARY KEY,
|
||||
added INTEGER[] NOT NULL,
|
||||
deleted INTEGER[] NOT NULL,
|
||||
|
||||
-- Since we only support searching by symbol name and we re-parse the file at query time, symbols
|
||||
-- with the same name in the same file only need to be stored once. Upon re-parsing the file at query
|
||||
-- time we will discover all symbols that match.
|
||||
repo_id INTEGER NOT NULL,
|
||||
path TEXT NOT NULL,
|
||||
name TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE OR REPLACE FUNCTION singleton(value TEXT) RETURNS TEXT[] AS $$ BEGIN
|
||||
RETURN ARRAY[value];
|
||||
END; $$ IMMUTABLE language plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION singleton_integer(value INTEGER) RETURNS INTEGER[] AS $$ BEGIN
|
||||
RETURN ARRAY[value];
|
||||
END; $$ IMMUTABLE language plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION path_prefixes(path TEXT) RETURNS TEXT[] AS $$ BEGIN
|
||||
RETURN (
|
||||
SELECT array_agg(array_to_string(components[:len], '/')) prefixes
|
||||
FROM
|
||||
(SELECT regexp_split_to_array(path, E'/') components) t,
|
||||
generate_series(1, array_length(components, 1)) AS len
|
||||
);
|
||||
END; $$ IMMUTABLE language plpgsql;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rockskip_repos_repo ON rockskip_repos(repo);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rockskip_repos_last_accessed_at ON rockskip_repos(last_accessed_at);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rockskip_ancestry_repo_commit_id ON rockskip_ancestry(repo_id, commit_id);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rockskip_symbols_repo_id_path_name ON rockskip_symbols(repo_id, path, name);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS rockskip_symbols_gin ON rockskip_symbols USING GIN (
|
||||
singleton_integer(repo_id) gin__int_ops,
|
||||
added gin__int_ops,
|
||||
deleted gin__int_ops,
|
||||
singleton(path),
|
||||
path_prefixes(path),
|
||||
singleton(name),
|
||||
name gin_trgm_ops
|
||||
);
|
||||
@ -245,11 +245,6 @@ commands:
|
||||
symbols:
|
||||
cmd: .bin/symbols
|
||||
install: |
|
||||
# Remove old pcre libs that might still be lying around.
|
||||
# TODO delete these two lines after 2021-10-24 (1 month after removal of pcre).
|
||||
rm -f libsqlite3-pcre.dylib || true
|
||||
rm -f libsqlite3-pcre.so || true
|
||||
|
||||
if [ -n "$DELVE" ]; then
|
||||
export GCFLAGS='all=-N -l'
|
||||
fi
|
||||
@ -265,6 +260,27 @@ commands:
|
||||
- internal
|
||||
- cmd/symbols
|
||||
|
||||
enterprise-symbols:
|
||||
cmd: .bin/enterprise-symbols
|
||||
install: |
|
||||
if [ -n "$DELVE" ]; then
|
||||
export GCFLAGS='all=-N -l'
|
||||
fi
|
||||
|
||||
./cmd/symbols/build-ctags.sh &&
|
||||
go build -gcflags="$GCFLAGS" -o .bin/enterprise-symbols github.com/sourcegraph/sourcegraph/enterprise/cmd/symbols
|
||||
checkBinary: .bin/enterprise-symbols
|
||||
env:
|
||||
CTAGS_COMMAND: cmd/symbols/universal-ctags-dev
|
||||
CTAGS_PROCESSES: 2
|
||||
USE_ROCKSKIP: 'false'
|
||||
watch:
|
||||
- lib
|
||||
- internal
|
||||
- cmd/symbols
|
||||
- enterprise/cmd/symbols
|
||||
- enterprise/internal/rockskip
|
||||
|
||||
searcher:
|
||||
cmd: .bin/searcher
|
||||
install: |
|
||||
@ -425,8 +441,8 @@ commands:
|
||||
- enterprise/internal
|
||||
- lib/codeintel
|
||||
|
||||
executor-template: &executor_template
|
||||
# TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`.
|
||||
executor-template:
|
||||
&executor_template # TMPDIR is set here so it's not set in the `install` process, which would trip up `go build`.
|
||||
cmd: |
|
||||
env TMPDIR="$HOME/.sourcegraph/executor-temp" .bin/executor
|
||||
install: |
|
||||
@ -450,7 +466,7 @@ commands:
|
||||
env TMPDIR="$HOME/.sourcegraph/indexer-temp" .bin/executor
|
||||
env:
|
||||
EXECUTOR_QUEUE_NAME: codeintel
|
||||
SRC_PROF_HTTP: ":6092"
|
||||
SRC_PROF_HTTP: ':6092'
|
||||
|
||||
batches-executor:
|
||||
<<: *executor_template
|
||||
@ -459,7 +475,7 @@ commands:
|
||||
env:
|
||||
EXECUTOR_QUEUE_NAME: batches
|
||||
EXECUTOR_MAXIMUM_NUM_JOBS: 8
|
||||
SRC_PROF_HTTP: ":6093"
|
||||
SRC_PROF_HTTP: ':6093'
|
||||
|
||||
# If you want to use this, either start it with `sg run batches-executor-firecracker` or
|
||||
# modify the `commandsets.batches` in your local `sg.config.overwrite.yaml`
|
||||
@ -472,7 +488,7 @@ commands:
|
||||
env:
|
||||
EXECUTOR_USE_FIRECRACKER: true
|
||||
EXECUTOR_QUEUE_NAME: batches
|
||||
SRC_PROF_HTTP: ":6093"
|
||||
SRC_PROF_HTTP: ':6093'
|
||||
|
||||
minio:
|
||||
cmd: |
|
||||
@ -604,8 +620,8 @@ commands:
|
||||
CONTAINER: grafana
|
||||
PORT: 3370
|
||||
# docker containers must access things via docker host on non-linux platforms
|
||||
DOCKER_USER: ""
|
||||
ADD_HOST_FLAG: ""
|
||||
DOCKER_USER: ''
|
||||
ADD_HOST_FLAG: ''
|
||||
CACHE: false
|
||||
watch:
|
||||
- monitoring
|
||||
@ -655,11 +671,11 @@ commands:
|
||||
CONTAINER: prometheus
|
||||
PORT: 9090
|
||||
CONFIG_DIR: docker-images/prometheus/config
|
||||
DOCKER_USER: ""
|
||||
DOCKER_NET: ""
|
||||
DOCKER_USER: ''
|
||||
DOCKER_NET: ''
|
||||
PROM_TARGETS: dev/prometheus/all/prometheus_targets.yml
|
||||
SRC_FRONTEND_INTERNAL: host.docker.internal:3090
|
||||
ADD_HOST_FLAG: ""
|
||||
ADD_HOST_FLAG: ''
|
||||
DISABLE_SOURCEGRAPH_CONFIG: false
|
||||
|
||||
postgres_exporter:
|
||||
@ -686,7 +702,7 @@ commands:
|
||||
docker pull index.docker.io/grafana/loki:$LOKI_VERSION
|
||||
env:
|
||||
LOKI_DISK: $HOME/.sourcegraph-dev/data/loki
|
||||
LOKI_VERSION: "2.3.0"
|
||||
LOKI_VERSION: '2.3.0'
|
||||
LOKI_LOG_FILE: $HOME/.sourcegraph-dev/logs/loki/loki.log
|
||||
|
||||
storybook:
|
||||
@ -772,7 +788,7 @@ commandsets:
|
||||
- enterprise-web
|
||||
- gitserver
|
||||
- searcher
|
||||
- symbols
|
||||
- enterprise-symbols
|
||||
- caddy
|
||||
- docsite
|
||||
- syntax-highlighter
|
||||
|
||||
Loading…
Reference in New Issue
Block a user