symbols: Bring this baby into 2021 (#27986)

Co-authored-by: Noah Santschi-Cooney <noah@santschi-cooney.ch>
This commit is contained in:
Eric Fritz 2021-11-30 11:56:45 -06:00 committed by GitHub
parent ca78d2a7bc
commit e2ff2b1f3f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
62 changed files with 3852 additions and 1633 deletions

38
cmd/symbols/config.go Normal file
View File

@ -0,0 +1,38 @@
package main
import (
"os"
"runtime"
"strconv"
"github.com/sourcegraph/sourcegraph/internal/env"
)
type Config struct {
env.BaseConfig
ctagsCommand string
ctagsPatternLengthLimit int
ctagsLogErrors bool
ctagsDebugLogs bool
sanityCheck bool
cacheDir string
cacheSizeMB int
numCtagsProcesses int
}
var config = &Config{}
// Load reads from the environment and stores the transformed data on the config object for later retrieval.
func (c *Config) Load() {
c.ctagsCommand = c.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)")
c.ctagsPatternLengthLimit = c.GetInt("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags")
c.ctagsLogErrors = os.Getenv("DEPLOY_TYPE") == "dev"
c.ctagsDebugLogs = false
c.sanityCheck = c.GetBool("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not")
c.cacheDir = c.Get("CACHE_DIR", "/tmp/symbols-cache", "directory in which to store cached symbols")
c.cacheSizeMB = c.GetInt("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache (in megabytes)")
c.numCtagsProcesses = c.GetInt("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of concurrent parser processes to run")
}

View File

@ -0,0 +1,3 @@
package api
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go

View File

@ -0,0 +1,79 @@
package api
import (
"context"
"encoding/json"
"net/http"
"github.com/cockroachdb/errors"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type apiHandler struct {
cachedDatabaseWriter writer.CachedDatabaseWriter
operations *operations
}
func NewHandler(
cachedDatabaseWriter writer.CachedDatabaseWriter,
observationContext *observation.Context,
) http.Handler {
h := newAPIHandler(cachedDatabaseWriter, observationContext)
mux := http.NewServeMux()
mux.HandleFunc("/search", h.handleSearch)
mux.HandleFunc("/healthz", h.handleHealthCheck)
return mux
}
func newAPIHandler(
cachedDatabaseWriter writer.CachedDatabaseWriter,
observationContext *observation.Context,
) *apiHandler {
return &apiHandler{
cachedDatabaseWriter: cachedDatabaseWriter,
operations: newOperations(observationContext),
}
}
const maxNumSymbolResults = 500
func (h *apiHandler) handleSearch(w http.ResponseWriter, r *http.Request) {
var args types.SearchArgs
if err := json.NewDecoder(r.Body).Decode(&args); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
if args.First < 0 || args.First > maxNumSymbolResults {
args.First = maxNumSymbolResults
}
result, err := h.handleSearchInternal(r.Context(), args)
if err != nil {
// Ignore reporting errors where client disconnected
if r.Context().Err() == context.Canceled && errors.Is(err, context.Canceled) {
return
}
log15.Error("Symbol search failed", "args", args, "error", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
func (h *apiHandler) handleHealthCheck(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
if _, err := w.Write([]byte("OK")); err != nil {
log15.Error("failed to write response to health check, err: %s", err)
}
}

View File

@ -0,0 +1,154 @@
package api
import (
"context"
"net/http/httptest"
"os"
"reflect"
"testing"
"time"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/search/result"
symbolsclient "github.com/sourcegraph/sourcegraph/internal/symbols"
)
func init() {
database.Init()
}
func TestHandler(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer func() { os.RemoveAll(tmpDir) }()
cache := &diskcache.Store{
Dir: tmpDir,
Component: "symbols",
BackgroundTimeout: 20 * time.Minute,
}
parserFactory := func() (ctags.Parser, error) {
return newMockParser("x", "y"), nil
}
parserPool, err := parser.NewParserPool(parserFactory, 15)
if err != nil {
t.Fatal(err)
}
files := map[string]string{
"a.js": "var x = 1",
}
gitserverClient := NewMockGitserverClient()
gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(files))
parser := parser.NewParser(parserPool, fetcher.NewRepositoryFetcher(gitserverClient, 15, &observation.TestContext), &observation.TestContext)
databaseWriter := writer.NewDatabaseWriter(tmpDir, gitserverClient, parser)
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
handler := NewHandler(cachedDatabaseWriter, &observation.TestContext)
server := httptest.NewServer(handler)
defer server.Close()
client := symbolsclient.Client{
URL: server.URL,
HTTPClient: httpcli.InternalDoer,
}
x := result.Symbol{Name: "x", Path: "a.js"}
y := result.Symbol{Name: "y", Path: "a.js"}
testCases := map[string]struct {
args search.SymbolsParameters
expected result.Symbols
}{
"simple": {
args: search.SymbolsParameters{First: 10},
expected: []result.Symbol{x, y},
},
"onematch": {
args: search.SymbolsParameters{Query: "x", First: 10},
expected: []result.Symbol{x},
},
"nomatches": {
args: search.SymbolsParameters{Query: "foo", First: 10},
expected: nil,
},
"caseinsensitiveexactmatch": {
args: search.SymbolsParameters{Query: "^X$", First: 10},
expected: []result.Symbol{x},
},
"casesensitiveexactmatch": {
args: search.SymbolsParameters{Query: "^x$", IsCaseSensitive: true, First: 10},
expected: []result.Symbol{x},
},
"casesensitivenoexactmatch": {
args: search.SymbolsParameters{Query: "^X$", IsCaseSensitive: true, First: 10},
expected: nil,
},
"caseinsensitiveexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, First: 10},
expected: []result.Symbol{x, y},
},
"casesensitiveexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^a.js$"}, IsCaseSensitive: true, First: 10},
expected: []result.Symbol{x, y},
},
"casesensitivenoexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, IsCaseSensitive: true, First: 10},
expected: nil,
},
"exclude": {
args: search.SymbolsParameters{ExcludePattern: "a.js", IsCaseSensitive: true, First: 10},
expected: nil,
},
}
for label, testCase := range testCases {
t.Run(label, func(t *testing.T) {
result, err := client.Search(context.Background(), testCase.args)
if err != nil {
t.Fatalf("unexpected error performing search: %s", err)
}
if result == nil {
if testCase.expected != nil {
t.Errorf("unexpected search result. want=%+v, have=nil", testCase.expected)
}
} else if !reflect.DeepEqual(*result, testCase.expected) {
t.Errorf("unexpected search result. want=%+v, have=%+v", testCase.expected, *result)
}
})
}
}
type mockParser struct {
names []string
}
func newMockParser(names ...string) ctags.Parser {
return &mockParser{names: names}
}
func (m *mockParser) Parse(name string, content []byte) ([]*ctags.Entry, error) {
entries := make([]*ctags.Entry, 0, len(m.names))
for _, name := range m.names {
entries = append(entries, &ctags.Entry{Name: name, Path: "a.js"})
}
return entries, nil
}
func (m *mockParser) Close() {}

View File

@ -0,0 +1,304 @@
// Code generated by go-mockgen 1.1.2; DO NOT EDIT.
package api
import (
"context"
"io"
"sync"
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
api "github.com/sourcegraph/sourcegraph/internal/api"
)
// MockGitserverClient is a mock implementation of the GitserverClient
// interface (from the package
// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used
// for unit testing.
type MockGitserverClient struct {
// FetchTarFunc is an instance of a mock function object controlling the
// behavior of the method FetchTar.
FetchTarFunc *GitserverClientFetchTarFunc
// GitDiffFunc is an instance of a mock function object controlling the
// behavior of the method GitDiff.
GitDiffFunc *GitserverClientGitDiffFunc
}
// NewMockGitserverClient creates a new mock of the GitserverClient
// interface. All methods return zero values for all results, unless
// overwritten.
func NewMockGitserverClient() *MockGitserverClient {
return &MockGitserverClient{
FetchTarFunc: &GitserverClientFetchTarFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
return nil, nil
},
},
GitDiffFunc: &GitserverClientGitDiffFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
return gitserver.Changes{}, nil
},
},
}
}
// NewStrictMockGitserverClient creates a new mock of the GitserverClient
// interface. All methods panic on invocation, unless overwritten.
func NewStrictMockGitserverClient() *MockGitserverClient {
return &MockGitserverClient{
FetchTarFunc: &GitserverClientFetchTarFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
panic("unexpected invocation of MockGitserverClient.FetchTar")
},
},
GitDiffFunc: &GitserverClientGitDiffFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
panic("unexpected invocation of MockGitserverClient.GitDiff")
},
},
}
}
// NewMockGitserverClientFrom creates a new mock of the MockGitserverClient
// interface. All methods delegate to the given implementation, unless
// overwritten.
func NewMockGitserverClientFrom(i gitserver.GitserverClient) *MockGitserverClient {
return &MockGitserverClient{
FetchTarFunc: &GitserverClientFetchTarFunc{
defaultHook: i.FetchTar,
},
GitDiffFunc: &GitserverClientGitDiffFunc{
defaultHook: i.GitDiff,
},
}
}
// GitserverClientFetchTarFunc describes the behavior when the FetchTar
// method of the parent MockGitserverClient instance is invoked.
type GitserverClientFetchTarFunc struct {
defaultHook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)
hooks []func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)
history []GitserverClientFetchTarFuncCall
mutex sync.Mutex
}
// FetchTar delegates to the next hook function in the queue and stores the
// parameter and result values of this invocation.
func (m *MockGitserverClient) FetchTar(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 []string) (io.ReadCloser, error) {
r0, r1 := m.FetchTarFunc.nextHook()(v0, v1, v2, v3)
m.FetchTarFunc.appendCall(GitserverClientFetchTarFuncCall{v0, v1, v2, v3, r0, r1})
return r0, r1
}
// SetDefaultHook sets function that is called when the FetchTar method of
// the parent MockGitserverClient instance is invoked and the hook queue is
// empty.
func (f *GitserverClientFetchTarFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) {
f.defaultHook = hook
}
// PushHook adds a function to the end of hook queue. Each invocation of the
// FetchTar method of the parent MockGitserverClient instance invokes the
// hook at the front of the queue and discards it. After the queue is empty,
// the default hook function is invoked for any future action.
func (f *GitserverClientFetchTarFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) {
f.mutex.Lock()
f.hooks = append(f.hooks, hook)
f.mutex.Unlock()
}
// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns
// the given values.
func (f *GitserverClientFetchTarFunc) SetDefaultReturn(r0 io.ReadCloser, r1 error) {
f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
return r0, r1
})
}
// PushReturn calls PushDefaultHook with a function that returns the given
// values.
func (f *GitserverClientFetchTarFunc) PushReturn(r0 io.ReadCloser, r1 error) {
f.PushHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
return r0, r1
})
}
func (f *GitserverClientFetchTarFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
f.mutex.Lock()
defer f.mutex.Unlock()
if len(f.hooks) == 0 {
return f.defaultHook
}
hook := f.hooks[0]
f.hooks = f.hooks[1:]
return hook
}
func (f *GitserverClientFetchTarFunc) appendCall(r0 GitserverClientFetchTarFuncCall) {
f.mutex.Lock()
f.history = append(f.history, r0)
f.mutex.Unlock()
}
// History returns a sequence of GitserverClientFetchTarFuncCall objects
// describing the invocations of this function.
func (f *GitserverClientFetchTarFunc) History() []GitserverClientFetchTarFuncCall {
f.mutex.Lock()
history := make([]GitserverClientFetchTarFuncCall, len(f.history))
copy(history, f.history)
f.mutex.Unlock()
return history
}
// GitserverClientFetchTarFuncCall is an object that describes an invocation
// of method FetchTar on an instance of MockGitserverClient.
type GitserverClientFetchTarFuncCall struct {
// Arg0 is the value of the 1st argument passed to this method
// invocation.
Arg0 context.Context
// Arg1 is the value of the 2nd argument passed to this method
// invocation.
Arg1 api.RepoName
// Arg2 is the value of the 3rd argument passed to this method
// invocation.
Arg2 api.CommitID
// Arg3 is the value of the 4th argument passed to this method
// invocation.
Arg3 []string
// Result0 is the value of the 1st result returned from this method
// invocation.
Result0 io.ReadCloser
// Result1 is the value of the 2nd result returned from this method
// invocation.
Result1 error
}
// Args returns an interface slice containing the arguments of this
// invocation.
func (c GitserverClientFetchTarFuncCall) Args() []interface{} {
return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3}
}
// Results returns an interface slice containing the results of this
// invocation.
func (c GitserverClientFetchTarFuncCall) Results() []interface{} {
return []interface{}{c.Result0, c.Result1}
}
// GitserverClientGitDiffFunc describes the behavior when the GitDiff method
// of the parent MockGitserverClient instance is invoked.
type GitserverClientGitDiffFunc struct {
defaultHook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)
hooks []func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)
history []GitserverClientGitDiffFuncCall
mutex sync.Mutex
}
// GitDiff delegates to the next hook function in the queue and stores the
// parameter and result values of this invocation.
func (m *MockGitserverClient) GitDiff(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 api.CommitID) (gitserver.Changes, error) {
r0, r1 := m.GitDiffFunc.nextHook()(v0, v1, v2, v3)
m.GitDiffFunc.appendCall(GitserverClientGitDiffFuncCall{v0, v1, v2, v3, r0, r1})
return r0, r1
}
// SetDefaultHook sets function that is called when the GitDiff method of
// the parent MockGitserverClient instance is invoked and the hook queue is
// empty.
func (f *GitserverClientGitDiffFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) {
f.defaultHook = hook
}
// PushHook adds a function to the end of hook queue. Each invocation of the
// GitDiff method of the parent MockGitserverClient instance invokes the
// hook at the front of the queue and discards it. After the queue is empty,
// the default hook function is invoked for any future action.
func (f *GitserverClientGitDiffFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) {
f.mutex.Lock()
f.hooks = append(f.hooks, hook)
f.mutex.Unlock()
}
// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns
// the given values.
func (f *GitserverClientGitDiffFunc) SetDefaultReturn(r0 gitserver.Changes, r1 error) {
f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
return r0, r1
})
}
// PushReturn calls PushDefaultHook with a function that returns the given
// values.
func (f *GitserverClientGitDiffFunc) PushReturn(r0 gitserver.Changes, r1 error) {
f.PushHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
return r0, r1
})
}
func (f *GitserverClientGitDiffFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
f.mutex.Lock()
defer f.mutex.Unlock()
if len(f.hooks) == 0 {
return f.defaultHook
}
hook := f.hooks[0]
f.hooks = f.hooks[1:]
return hook
}
func (f *GitserverClientGitDiffFunc) appendCall(r0 GitserverClientGitDiffFuncCall) {
f.mutex.Lock()
f.history = append(f.history, r0)
f.mutex.Unlock()
}
// History returns a sequence of GitserverClientGitDiffFuncCall objects
// describing the invocations of this function.
func (f *GitserverClientGitDiffFunc) History() []GitserverClientGitDiffFuncCall {
f.mutex.Lock()
history := make([]GitserverClientGitDiffFuncCall, len(f.history))
copy(history, f.history)
f.mutex.Unlock()
return history
}
// GitserverClientGitDiffFuncCall is an object that describes an invocation
// of method GitDiff on an instance of MockGitserverClient.
type GitserverClientGitDiffFuncCall struct {
// Arg0 is the value of the 1st argument passed to this method
// invocation.
Arg0 context.Context
// Arg1 is the value of the 2nd argument passed to this method
// invocation.
Arg1 api.RepoName
// Arg2 is the value of the 3rd argument passed to this method
// invocation.
Arg2 api.CommitID
// Arg3 is the value of the 4th argument passed to this method
// invocation.
Arg3 api.CommitID
// Result0 is the value of the 1st result returned from this method
// invocation.
Result0 gitserver.Changes
// Result1 is the value of the 2nd result returned from this method
// invocation.
Result1 error
}
// Args returns an interface slice containing the arguments of this
// invocation.
func (c GitserverClientGitDiffFuncCall) Args() []interface{} {
return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3}
}
// Results returns an interface slice containing the results of this
// invocation.
func (c GitserverClientGitDiffFuncCall) Results() []interface{} {
return []interface{}{c.Result0, c.Result1}
}

View File

@ -0,0 +1,34 @@
package api
import (
"fmt"
"github.com/sourcegraph/sourcegraph/internal/metrics"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type operations struct {
search *observation.Operation
}
func newOperations(observationContext *observation.Context) *operations {
metrics := metrics.NewREDMetrics(
observationContext.Registerer,
"codeintel_symbols_api",
metrics.WithLabels("op"),
metrics.WithCountHelp("Total number of method invocations."),
metrics.WithDurationBuckets([]float64{1, 2, 5, 10, 30, 60}),
)
op := func(name string) *observation.Operation {
return observationContext.Operation(observation.Op{
Name: fmt.Sprintf("codeintel.symbols.api.%s", name),
MetricLabelValues: []string{name},
Metrics: metrics,
})
}
return &operations{
search: op("Search"),
}
}

View File

@ -0,0 +1,52 @@
package api
import (
"context"
"strings"
"time"
"github.com/cockroachdb/errors"
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
const searchTimeout = 60 * time.Second
func (h *apiHandler) handleSearchInternal(ctx context.Context, args types.SearchArgs) (_ *result.Symbols, err error) {
ctx, traceLog, endObservation := h.operations.search.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("repo", string(args.Repo)),
log.String("commitID", string(args.CommitID)),
log.String("query", args.Query),
log.Bool("isRegExp", args.IsRegExp),
log.Bool("isCaseSensitive", args.IsCaseSensitive),
log.Int("numIncludePatterns", len(args.IncludePatterns)),
log.String("includePatterns", strings.Join(args.IncludePatterns, ":")),
log.String("excludePattern", args.ExcludePattern),
log.Int("first", args.First),
}})
defer endObservation(1, observation.Args{})
ctx, cancel := context.WithTimeout(ctx, searchTimeout)
defer cancel()
dbFile, err := h.cachedDatabaseWriter.GetOrCreateDatabaseFile(ctx, args)
if err != nil {
return nil, errors.Wrap(err, "databaseWriter.GetOrCreateDatabaseFile")
}
traceLog(log.String("dbFile", dbFile))
var results result.Symbols
err = store.WithSQLiteStore(dbFile, func(db store.Store) (err error) {
if results, err = db.Search(ctx, args); err != nil {
return errors.Wrap(err, "store.Search")
}
return nil
})
return &results, err
}

View File

@ -0,0 +1,17 @@
package database
import (
"database/sql"
"regexp"
"github.com/mattn/go-sqlite3"
)
func Init() {
sql.Register("sqlite3_with_regexp",
&sqlite3.SQLiteDriver{
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
return conn.RegisterFunc("REGEXP", regexp.MatchString, true)
},
})
}

View File

@ -0,0 +1,57 @@
package janitor
import (
"context"
"time"
"github.com/cockroachdb/errors"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
)
type cacheEvicter struct {
// cache is the disk backed cache.
cache *diskcache.Store
// maxCacheSizeBytes is the maximum size of the cache in bytes. Note that we can
// be larger than maxCacheSizeBytes temporarily between runs of this handler.
// When we go over maxCacheSizeBytes we trigger delete files until we get below
// maxCacheSizeBytes.
maxCacheSizeBytes int64
metrics *Metrics
}
var _ goroutine.Handler = &cacheEvicter{}
var _ goroutine.ErrorHandler = &cacheEvicter{}
func NewCacheEvicter(interval time.Duration, cache *diskcache.Store, maxCacheSizeBytes int64, metrics *Metrics) goroutine.BackgroundRoutine {
return goroutine.NewPeriodicGoroutine(context.Background(), interval, &cacheEvicter{
cache: cache,
maxCacheSizeBytes: maxCacheSizeBytes,
metrics: metrics,
})
}
// Handle periodically checks the size of the cache and evicts/deletes items.
func (e *cacheEvicter) Handle(ctx context.Context) error {
if e.maxCacheSizeBytes == 0 {
return nil
}
stats, err := e.cache.Evict(e.maxCacheSizeBytes)
if err != nil {
return errors.Wrap(err, "cache.Evict")
}
e.metrics.cacheSizeBytes.Set(float64(stats.CacheSize))
e.metrics.evictions.Add(float64(stats.Evicted))
return nil
}
func (e *cacheEvicter) HandleError(err error) {
e.metrics.errors.Inc()
log15.Error("Failed to evict items from cache", "error", err)
}

View File

@ -0,0 +1,42 @@
package janitor
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type Metrics struct {
cacheSizeBytes prometheus.Gauge
evictions prometheus.Counter
errors prometheus.Counter
}
func NewMetrics(observationContext *observation.Context) *Metrics {
cacheSizeBytes := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "src",
Name: "codeintel_symbols_store_cache_size_bytes",
Help: "The total size of items in the on disk cache.",
})
observationContext.Registerer.MustRegister(cacheSizeBytes)
evictions := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "src",
Name: "codeintel_symbols_store_evictions_total",
Help: "The total number of items evicted from the cache.",
})
observationContext.Registerer.MustRegister(evictions)
errors := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "src",
Name: "codeintel_symbols_store_errors_total",
Help: "The total number of failures evicting items from the cache.",
})
observationContext.Registerer.MustRegister(errors)
return &Metrics{
cacheSizeBytes: cacheSizeBytes,
evictions: evictions,
errors: errors,
}
}

View File

@ -0,0 +1,20 @@
package database
import "github.com/jmoiron/sqlx"
// SanityCheck makes sure that go-sqlite3 was compiled with cgo by seeing if we can actually create a table.
func SanityCheck() error {
db, err := sqlx.Open("sqlite3_with_regexp", ":memory:")
if err != nil {
return err
}
defer db.Close()
// If go-sqlite3 was not compiled with cgo, the error will be:
// > Binary was compiled with 'CGO_ENABLED=0', go-sqlite3 requires cgo to work. This is a stub
if _, err := db.Exec("CREATE TABLE test (col TEXT);"); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,30 @@
package store
import (
"context"
"github.com/keegancsmith/sqlf"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
)
func (w *store) CreateMetaTable(ctx context.Context) error {
return w.Exec(ctx, sqlf.Sprintf(`
CREATE TABLE IF NOT EXISTS meta (
id INTEGER PRIMARY KEY CHECK (id = 0),
revision TEXT NOT NULL
)
`))
}
func (s *store) GetCommit(ctx context.Context) (string, bool, error) {
return basestore.ScanFirstString(s.Query(ctx, sqlf.Sprintf(`SELECT revision FROM meta`)))
}
func (s *store) InsertMeta(ctx context.Context, commitID string) error {
return s.Exec(ctx, sqlf.Sprintf(`INSERT INTO meta (id, revision) VALUES (0, %s)`, commitID))
}
func (s *store) UpdateMeta(ctx context.Context, commitID string) error {
return s.Exec(ctx, sqlf.Sprintf(`UPDATE meta SET revision = %s`, commitID))
}

View File

@ -0,0 +1,143 @@
package store
import (
"context"
"database/sql"
"regexp/syntax"
"strings"
"github.com/cockroachdb/errors"
"github.com/keegancsmith/sqlf"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
func scanSymbols(rows *sql.Rows, queryErr error) (symbols []result.Symbol, err error) {
if queryErr != nil {
return nil, queryErr
}
defer func() { err = basestore.CloseRows(rows, err) }()
for rows.Next() {
var symbol result.Symbol
if err := rows.Scan(
&symbol.Name,
&symbol.Path,
&symbol.Line,
&symbol.Kind,
&symbol.Language,
&symbol.Parent,
&symbol.ParentKind,
&symbol.Signature,
&symbol.Pattern,
&symbol.FileLimited,
); err != nil {
return nil, err
}
symbols = append(symbols, symbol)
}
return symbols, nil
}
func (s *store) Search(ctx context.Context, args types.SearchArgs) ([]result.Symbol, error) {
return scanSymbols(s.Query(ctx, sqlf.Sprintf(
`
SELECT
name,
path,
line,
kind,
language,
parent,
parentkind,
signature,
pattern,
filelimited
FROM symbols
WHERE %s
LIMIT %s
`,
sqlf.Join(makeSearchConditions(args), "AND"),
args.First,
)))
}
func makeSearchConditions(args types.SearchArgs) []*sqlf.Query {
conditions := make([]*sqlf.Query, 0, 2+len(args.IncludePatterns))
conditions = append(conditions, makeSearchCondition("name", args.Query, args.IsCaseSensitive))
conditions = append(conditions, negate(makeSearchCondition("path", args.ExcludePattern, args.IsCaseSensitive)))
for _, includePattern := range args.IncludePatterns {
conditions = append(conditions, makeSearchCondition("path", includePattern, args.IsCaseSensitive))
}
filtered := conditions[:0]
for _, condition := range conditions {
if condition != nil {
filtered = append(filtered, condition)
}
}
if len(filtered) == 0 {
// Ensure we have at least one condition
filtered = append(filtered, sqlf.Sprintf("TRUE"))
}
return filtered
}
func makeSearchCondition(column string, regex string, isCaseSensitive bool) *sqlf.Query {
if regex == "" {
return nil
}
if symbolName, isExact, err := isLiteralEquality(regex); err == nil && isExact {
if isCaseSensitive {
return sqlf.Sprintf(column+" = %s", symbolName)
} else {
return sqlf.Sprintf(column+"lowercase = %s", strings.ToLower(symbolName))
}
}
if !isCaseSensitive {
regex = "(?i:" + regex + ")"
}
return sqlf.Sprintf(column+" REGEXP %s", regex)
}
// isLiteralEquality returns true if the given regex matches literal strings exactly.
// If so, this function returns true along with the literal search query. If not, this
// function returns false.
func isLiteralEquality(expr string) (string, bool, error) {
regexp, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return "", false, errors.Wrap(err, "regexp/syntax.Parse")
}
// want a concat of size 3 which is [begin, literal, end]
if regexp.Op == syntax.OpConcat && len(regexp.Sub) == 3 {
// starts with ^
if regexp.Sub[0].Op == syntax.OpBeginLine || regexp.Sub[0].Op == syntax.OpBeginText {
// is a literal
if regexp.Sub[1].Op == syntax.OpLiteral {
// ends with $
if regexp.Sub[2].Op == syntax.OpEndLine || regexp.Sub[2].Op == syntax.OpEndText {
return string(regexp.Sub[1].Rune), true, nil
}
}
}
}
return "", false, nil
}
func negate(query *sqlf.Query) *sqlf.Query {
if query == nil {
return nil
}
return sqlf.Sprintf("NOT %s", query)
}

View File

@ -0,0 +1,41 @@
package store
import "testing"
func TestIsLiteralEquality(t *testing.T) {
for _, test := range []struct {
regex string
noMatch bool
expectedLiteral string
}{
{regex: `^foo$`, expectedLiteral: "foo"},
{regex: `^[f]oo$`, expectedLiteral: `foo`},
{regex: `^\\$`, expectedLiteral: `\`},
{regex: `^\$`, noMatch: true},
{regex: `^\($`, expectedLiteral: `(`},
{regex: `\\`, noMatch: true},
{regex: `\$`, noMatch: true},
{regex: `\(`, noMatch: true},
{regex: `foo$`, noMatch: true},
{regex: `(^foo$|^bar$)`, noMatch: true},
} {
literal, ok, err := isLiteralEquality(test.regex)
if err != nil {
t.Fatal(err)
}
if !ok {
if !test.noMatch {
t.Errorf("exected a match")
}
} else if test.noMatch {
t.Errorf("did not expect a match")
} else if literal != test.expectedLiteral {
t.Errorf(
"unexpected literal for %q. want=%q have=%q",
test.regex,
test.expectedLiteral,
literal,
)
}
}
}

View File

@ -0,0 +1,87 @@
package store
import (
"context"
"database/sql"
"github.com/inconshreveable/log15"
"github.com/jmoiron/sqlx"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/database/basestore"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
type Store interface {
Close() error
Transact(ctx context.Context) (Store, error)
Done(err error) error
Search(ctx context.Context, args types.SearchArgs) ([]result.Symbol, error)
CreateMetaTable(ctx context.Context) error
GetCommit(ctx context.Context) (string, bool, error)
InsertMeta(ctx context.Context, commitID string) error
UpdateMeta(ctx context.Context, commitID string) error
CreateSymbolsTable(ctx context.Context) error
CreateSymbolIndexes(ctx context.Context) error
DeletePaths(ctx context.Context, paths []string) error
WriteSymbols(ctx context.Context, symbols <-chan result.Symbol) error
}
type store struct {
db *sqlx.DB
*basestore.Store
}
func NewStore(dbFile string) (Store, error) {
db, err := sqlx.Open("sqlite3_with_regexp", dbFile)
if err != nil {
return nil, err
}
return &store{
db: db,
Store: basestore.NewWithDB(db, sql.TxOptions{}),
}, nil
}
func (s *store) Close() error {
return s.db.Close()
}
func (s *store) Transact(ctx context.Context) (Store, error) {
tx, err := s.Store.Transact(ctx)
if err != nil {
return nil, err
}
return &store{db: s.db, Store: tx}, nil
}
func WithSQLiteStore(dbFile string, callback func(db Store) error) error {
db, err := NewStore(dbFile)
if err != nil {
return err
}
defer func() {
if err := db.Close(); err != nil {
log15.Error("Failed to close database", "filename", dbFile, "error", err)
}
}()
return callback(db)
}
func WithSQLiteStoreTransaction(ctx context.Context, dbFile string, callback func(db Store) error) error {
return WithSQLiteStore(dbFile, func(db Store) (err error) {
tx, err := db.Transact(ctx)
if err != nil {
return err
}
defer func() { err = tx.Done(err) }()
return callback(tx)
})
}

View File

@ -0,0 +1,107 @@
package store
import (
"context"
"strings"
"github.com/keegancsmith/sqlf"
"github.com/sourcegraph/sourcegraph/internal/database/batch"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
func (s *store) CreateSymbolsTable(ctx context.Context) error {
return s.Exec(ctx, sqlf.Sprintf(`
CREATE TABLE IF NOT EXISTS symbols (
name VARCHAR(256) NOT NULL,
namelowercase VARCHAR(256) NOT NULL,
path VARCHAR(4096) NOT NULL,
pathlowercase VARCHAR(4096) NOT NULL,
line INT NOT NULL,
kind VARCHAR(255) NOT NULL,
language VARCHAR(255) NOT NULL,
parent VARCHAR(255) NOT NULL,
parentkind VARCHAR(255) NOT NULL,
signature VARCHAR(255) NOT NULL,
pattern VARCHAR(255) NOT NULL,
filelimited BOOLEAN NOT NULL
)
`))
}
func (s *store) CreateSymbolIndexes(ctx context.Context) error {
createIndexQueries := []string{
`CREATE INDEX idx_name ON symbols(name)`,
`CREATE INDEX idx_path ON symbols(path)`,
`CREATE INDEX idx_namelowercase ON symbols(namelowercase)`,
`CREATE INDEX idx_pathlowercase ON symbols(pathlowercase)`,
}
for _, query := range createIndexQueries {
if err := s.Exec(ctx, sqlf.Sprintf(query)); err != nil {
return err
}
}
return nil
}
func (s *store) DeletePaths(ctx context.Context, paths []string) error {
if len(paths) == 0 {
return nil
}
pathQueries := make([]*sqlf.Query, 0, len(paths))
for _, path := range paths {
pathQueries = append(pathQueries, sqlf.Sprintf("%s", path))
}
return s.Exec(ctx, sqlf.Sprintf(`DELETE FROM symbols WHERE path IN (%s)`, sqlf.Join(pathQueries, ",")))
}
func (s *store) WriteSymbols(ctx context.Context, symbols <-chan result.Symbol) (err error) {
rows := make(chan []interface{})
go func() {
defer close(rows)
for symbol := range symbols {
rows <- []interface{}{
symbol.Name,
strings.ToLower(symbol.Name),
symbol.Path,
strings.ToLower(symbol.Path),
symbol.Line,
symbol.Kind,
symbol.Language,
symbol.Parent,
symbol.ParentKind,
symbol.Signature,
symbol.Pattern,
symbol.FileLimited,
}
}
}()
return batch.InsertValues(
ctx,
s.Handle().DB(),
"symbols",
batch.MaxNumSQLiteParameters,
[]string{
"name",
"namelowercase",
"path",
"pathlowercase",
"line",
"kind",
"language",
"parent",
"parentkind",
"signature",
"pattern",
"filelimited",
},
rows,
)
}

View File

@ -0,0 +1,53 @@
package writer
import (
"context"
"fmt"
"github.com/cockroachdb/errors"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
)
type CachedDatabaseWriter interface {
GetOrCreateDatabaseFile(ctx context.Context, args types.SearchArgs) (string, error)
}
type cachedDatabaseWriter struct {
databaseWriter DatabaseWriter
cache *diskcache.Store
}
func NewCachedDatabaseWriter(databaseWriter DatabaseWriter, cache *diskcache.Store) CachedDatabaseWriter {
return &cachedDatabaseWriter{
databaseWriter: databaseWriter,
cache: cache,
}
}
// The version of the symbols database schema. This is included in the database filenames to prevent a
// newer version of the symbols service from attempting to read from a database created by an older and
// likely incompatible symbols service. Increment this when you change the database schema.
const symbolsDBVersion = 4
func (w *cachedDatabaseWriter) GetOrCreateDatabaseFile(ctx context.Context, args types.SearchArgs) (string, error) {
key := []string{
string(args.Repo),
fmt.Sprintf("%s-%d", args.CommitID, symbolsDBVersion),
}
cacheFile, err := w.cache.OpenWithPath(ctx, key, func(fetcherCtx context.Context, tempDBFile string) error {
if err := w.databaseWriter.WriteDBFile(fetcherCtx, args, tempDBFile); err != nil {
return errors.Wrap(err, "databaseWriter.WriteDBFile")
}
return nil
})
if err != nil {
return "", err
}
defer cacheFile.File.Close()
return cacheFile.File.Name(), err
}

View File

@ -0,0 +1,59 @@
package writer
import (
"io"
"os"
"path/filepath"
"strings"
"time"
)
// findNewestFile lists the directory and returns the newest file's path, prepended with dir.
func findNewestFile(dir string) (string, error) {
files, err := os.ReadDir(dir)
if err != nil {
return "", nil
}
var mostRecentTime time.Time
newest := ""
for _, fi := range files {
if fi.Type().IsRegular() {
if !strings.HasSuffix(fi.Name(), ".zip") {
continue
}
info, err := fi.Info()
if err != nil {
return "", err
}
if newest == "" || info.ModTime().After(mostRecentTime) {
mostRecentTime = info.ModTime()
newest = filepath.Join(dir, fi.Name())
}
}
}
return newest, nil
}
func copyFile(from string, to string) error {
fromFile, err := os.Open(from)
if err != nil {
return err
}
defer fromFile.Close()
toFile, err := os.OpenFile(to, os.O_RDWR|os.O_CREATE, 0666)
if err != nil {
return err
}
defer toFile.Close()
if _, err := io.Copy(toFile, fromFile); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,156 @@
package writer
import (
"context"
"path/filepath"
"github.com/cockroachdb/errors"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/store"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
type DatabaseWriter interface {
WriteDBFile(ctx context.Context, args types.SearchArgs, tempDBFile string) error
}
type databaseWriter struct {
path string
gitserverClient gitserver.GitserverClient
parser parser.Parser
}
func NewDatabaseWriter(
path string,
gitserverClient gitserver.GitserverClient,
parser parser.Parser,
) DatabaseWriter {
return &databaseWriter{
path: path,
gitserverClient: gitserverClient,
parser: parser,
}
}
func (w *databaseWriter) WriteDBFile(ctx context.Context, args types.SearchArgs, dbFile string) error {
if newestDBFile, oldCommit, ok, err := w.getNewestCommit(ctx, args); err != nil {
return err
} else if ok {
if ok, err := w.writeFileIncrementally(ctx, args, dbFile, newestDBFile, oldCommit); err != nil || ok {
return err
}
}
return w.writeDBFile(ctx, args, dbFile)
}
func (w *databaseWriter) getNewestCommit(ctx context.Context, args types.SearchArgs) (dbFile string, commit string, ok bool, err error) {
newest, err := findNewestFile(filepath.Join(w.path, diskcache.EncodeKeyComponent(string(args.Repo))))
if err != nil || newest == "" {
return "", "", false, err
}
err = store.WithSQLiteStore(newest, func(db store.Store) (err error) {
if commit, ok, err = db.GetCommit(ctx); err != nil {
return errors.Wrap(err, "store.GetCommit")
}
return nil
})
return newest, commit, ok, err
}
func (w *databaseWriter) writeDBFile(ctx context.Context, args types.SearchArgs, dbFile string) error {
return w.parseAndWriteInTransaction(ctx, args, nil, dbFile, func(tx store.Store, symbols <-chan result.Symbol) error {
if err := tx.CreateMetaTable(ctx); err != nil {
return errors.Wrap(err, "store.CreateMetaTable")
}
if err := tx.CreateSymbolsTable(ctx); err != nil {
return errors.Wrap(err, "store.CreateSymbolsTable")
}
if err := tx.InsertMeta(ctx, string(args.CommitID)); err != nil {
return errors.Wrap(err, "store.InsertMeta")
}
if err := tx.WriteSymbols(ctx, symbols); err != nil {
return errors.Wrap(err, "store.WriteSymbols")
}
if err := tx.CreateSymbolIndexes(ctx); err != nil {
return errors.Wrap(err, "store.CreateSymbolIndexes")
}
return nil
})
}
// The maximum number of paths when doing incremental indexing. Diffs with more paths than this will
// not be incrementally indexed, and instead we will process all symbols.
const maxTotalPaths = 999
// The maximum sum of bytes in paths in a diff when doing incremental indexing. Diffs bigger than this
// will not be incrementally indexed, and instead we will process all symbols. Without this limit, we
// could hit HTTP 431 (header fields too large) when sending the list of paths `git archive paths...`.
// The actual limit is somewhere between 372KB and 450KB, and we want to be well under that.
// 100KB seems safe.
const maxTotalPathsLength = 100000
func (w *databaseWriter) writeFileIncrementally(ctx context.Context, args types.SearchArgs, dbFile, newestDBFile, oldCommit string) (bool, error) {
changes, err := w.gitserverClient.GitDiff(ctx, args.Repo, api.CommitID(oldCommit), args.CommitID)
if err != nil {
return false, errors.Wrap(err, "gitserverClient.GitDiff")
}
// Paths to re-parse
addedOrModifiedPaths := append(changes.Added, changes.Modified...)
// Paths to modify in the database
addedModifiedOrDeletedPaths := append(addedOrModifiedPaths, changes.Deleted...)
// Too many entries
if len(addedModifiedOrDeletedPaths) > maxTotalPaths {
return false, nil
}
totalPathsLength := 0
for _, path := range addedModifiedOrDeletedPaths {
totalPathsLength += len(path)
}
// Argument lists too long
if totalPathsLength > maxTotalPathsLength {
return false, nil
}
if err := copyFile(newestDBFile, dbFile); err != nil {
return false, err
}
return true, w.parseAndWriteInTransaction(ctx, args, addedOrModifiedPaths, dbFile, func(tx store.Store, symbols <-chan result.Symbol) error {
if err := tx.UpdateMeta(ctx, string(args.CommitID)); err != nil {
return errors.Wrap(err, "store.UpdateMeta")
}
if err := tx.DeletePaths(ctx, addedModifiedOrDeletedPaths); err != nil {
return errors.Wrap(err, "store.DeletePaths")
}
if err := tx.WriteSymbols(ctx, symbols); err != nil {
return errors.Wrap(err, "store.WriteSymbols")
}
return nil
})
}
func (w *databaseWriter) parseAndWriteInTransaction(ctx context.Context, args types.SearchArgs, paths []string, dbFile string, callback func(tx store.Store, symbols <-chan result.Symbol) error) error {
symbols, err := w.parser.Parse(ctx, args, paths)
if err != nil {
return errors.Wrap(err, "parser.Parse")
}
return store.WithSQLiteStoreTransaction(ctx, dbFile, func(tx store.Store) error {
return callback(tx, symbols)
})
}

View File

@ -0,0 +1,3 @@
package fetcher
//go:generate ../../../../dev/mockgen.sh github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver -i GitServerClient -o mock_iface_test.go

View File

@ -0,0 +1,304 @@
// Code generated by go-mockgen 1.1.2; DO NOT EDIT.
package fetcher
import (
"context"
"io"
"sync"
gitserver "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
api "github.com/sourcegraph/sourcegraph/internal/api"
)
// MockGitserverClient is a mock implementation of the GitserverClient
// interface (from the package
// github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver) used
// for unit testing.
type MockGitserverClient struct {
// FetchTarFunc is an instance of a mock function object controlling the
// behavior of the method FetchTar.
FetchTarFunc *GitserverClientFetchTarFunc
// GitDiffFunc is an instance of a mock function object controlling the
// behavior of the method GitDiff.
GitDiffFunc *GitserverClientGitDiffFunc
}
// NewMockGitserverClient creates a new mock of the GitserverClient
// interface. All methods return zero values for all results, unless
// overwritten.
func NewMockGitserverClient() *MockGitserverClient {
return &MockGitserverClient{
FetchTarFunc: &GitserverClientFetchTarFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
return nil, nil
},
},
GitDiffFunc: &GitserverClientGitDiffFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
return gitserver.Changes{}, nil
},
},
}
}
// NewStrictMockGitserverClient creates a new mock of the GitserverClient
// interface. All methods panic on invocation, unless overwritten.
func NewStrictMockGitserverClient() *MockGitserverClient {
return &MockGitserverClient{
FetchTarFunc: &GitserverClientFetchTarFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
panic("unexpected invocation of MockGitserverClient.FetchTar")
},
},
GitDiffFunc: &GitserverClientGitDiffFunc{
defaultHook: func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
panic("unexpected invocation of MockGitserverClient.GitDiff")
},
},
}
}
// NewMockGitserverClientFrom creates a new mock of the MockGitserverClient
// interface. All methods delegate to the given implementation, unless
// overwritten.
func NewMockGitserverClientFrom(i gitserver.GitserverClient) *MockGitserverClient {
return &MockGitserverClient{
FetchTarFunc: &GitserverClientFetchTarFunc{
defaultHook: i.FetchTar,
},
GitDiffFunc: &GitserverClientGitDiffFunc{
defaultHook: i.GitDiff,
},
}
}
// GitserverClientFetchTarFunc describes the behavior when the FetchTar
// method of the parent MockGitserverClient instance is invoked.
type GitserverClientFetchTarFunc struct {
defaultHook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)
hooks []func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)
history []GitserverClientFetchTarFuncCall
mutex sync.Mutex
}
// FetchTar delegates to the next hook function in the queue and stores the
// parameter and result values of this invocation.
func (m *MockGitserverClient) FetchTar(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 []string) (io.ReadCloser, error) {
r0, r1 := m.FetchTarFunc.nextHook()(v0, v1, v2, v3)
m.FetchTarFunc.appendCall(GitserverClientFetchTarFuncCall{v0, v1, v2, v3, r0, r1})
return r0, r1
}
// SetDefaultHook sets function that is called when the FetchTar method of
// the parent MockGitserverClient instance is invoked and the hook queue is
// empty.
func (f *GitserverClientFetchTarFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) {
f.defaultHook = hook
}
// PushHook adds a function to the end of hook queue. Each invocation of the
// FetchTar method of the parent MockGitserverClient instance invokes the
// hook at the front of the queue and discards it. After the queue is empty,
// the default hook function is invoked for any future action.
func (f *GitserverClientFetchTarFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)) {
f.mutex.Lock()
f.hooks = append(f.hooks, hook)
f.mutex.Unlock()
}
// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns
// the given values.
func (f *GitserverClientFetchTarFunc) SetDefaultReturn(r0 io.ReadCloser, r1 error) {
f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
return r0, r1
})
}
// PushReturn calls PushDefaultHook with a function that returns the given
// values.
func (f *GitserverClientFetchTarFunc) PushReturn(r0 io.ReadCloser, r1 error) {
f.PushHook(func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
return r0, r1
})
}
func (f *GitserverClientFetchTarFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
f.mutex.Lock()
defer f.mutex.Unlock()
if len(f.hooks) == 0 {
return f.defaultHook
}
hook := f.hooks[0]
f.hooks = f.hooks[1:]
return hook
}
func (f *GitserverClientFetchTarFunc) appendCall(r0 GitserverClientFetchTarFuncCall) {
f.mutex.Lock()
f.history = append(f.history, r0)
f.mutex.Unlock()
}
// History returns a sequence of GitserverClientFetchTarFuncCall objects
// describing the invocations of this function.
func (f *GitserverClientFetchTarFunc) History() []GitserverClientFetchTarFuncCall {
f.mutex.Lock()
history := make([]GitserverClientFetchTarFuncCall, len(f.history))
copy(history, f.history)
f.mutex.Unlock()
return history
}
// GitserverClientFetchTarFuncCall is an object that describes an invocation
// of method FetchTar on an instance of MockGitserverClient.
type GitserverClientFetchTarFuncCall struct {
// Arg0 is the value of the 1st argument passed to this method
// invocation.
Arg0 context.Context
// Arg1 is the value of the 2nd argument passed to this method
// invocation.
Arg1 api.RepoName
// Arg2 is the value of the 3rd argument passed to this method
// invocation.
Arg2 api.CommitID
// Arg3 is the value of the 4th argument passed to this method
// invocation.
Arg3 []string
// Result0 is the value of the 1st result returned from this method
// invocation.
Result0 io.ReadCloser
// Result1 is the value of the 2nd result returned from this method
// invocation.
Result1 error
}
// Args returns an interface slice containing the arguments of this
// invocation.
func (c GitserverClientFetchTarFuncCall) Args() []interface{} {
return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3}
}
// Results returns an interface slice containing the results of this
// invocation.
func (c GitserverClientFetchTarFuncCall) Results() []interface{} {
return []interface{}{c.Result0, c.Result1}
}
// GitserverClientGitDiffFunc describes the behavior when the GitDiff method
// of the parent MockGitserverClient instance is invoked.
type GitserverClientGitDiffFunc struct {
defaultHook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)
hooks []func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)
history []GitserverClientGitDiffFuncCall
mutex sync.Mutex
}
// GitDiff delegates to the next hook function in the queue and stores the
// parameter and result values of this invocation.
func (m *MockGitserverClient) GitDiff(v0 context.Context, v1 api.RepoName, v2 api.CommitID, v3 api.CommitID) (gitserver.Changes, error) {
r0, r1 := m.GitDiffFunc.nextHook()(v0, v1, v2, v3)
m.GitDiffFunc.appendCall(GitserverClientGitDiffFuncCall{v0, v1, v2, v3, r0, r1})
return r0, r1
}
// SetDefaultHook sets function that is called when the GitDiff method of
// the parent MockGitserverClient instance is invoked and the hook queue is
// empty.
func (f *GitserverClientGitDiffFunc) SetDefaultHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) {
f.defaultHook = hook
}
// PushHook adds a function to the end of hook queue. Each invocation of the
// GitDiff method of the parent MockGitserverClient instance invokes the
// hook at the front of the queue and discards it. After the queue is empty,
// the default hook function is invoked for any future action.
func (f *GitserverClientGitDiffFunc) PushHook(hook func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error)) {
f.mutex.Lock()
f.hooks = append(f.hooks, hook)
f.mutex.Unlock()
}
// SetDefaultReturn calls SetDefaultDefaultHook with a function that returns
// the given values.
func (f *GitserverClientGitDiffFunc) SetDefaultReturn(r0 gitserver.Changes, r1 error) {
f.SetDefaultHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
return r0, r1
})
}
// PushReturn calls PushDefaultHook with a function that returns the given
// values.
func (f *GitserverClientGitDiffFunc) PushReturn(r0 gitserver.Changes, r1 error) {
f.PushHook(func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
return r0, r1
})
}
func (f *GitserverClientGitDiffFunc) nextHook() func(context.Context, api.RepoName, api.CommitID, api.CommitID) (gitserver.Changes, error) {
f.mutex.Lock()
defer f.mutex.Unlock()
if len(f.hooks) == 0 {
return f.defaultHook
}
hook := f.hooks[0]
f.hooks = f.hooks[1:]
return hook
}
func (f *GitserverClientGitDiffFunc) appendCall(r0 GitserverClientGitDiffFuncCall) {
f.mutex.Lock()
f.history = append(f.history, r0)
f.mutex.Unlock()
}
// History returns a sequence of GitserverClientGitDiffFuncCall objects
// describing the invocations of this function.
func (f *GitserverClientGitDiffFunc) History() []GitserverClientGitDiffFuncCall {
f.mutex.Lock()
history := make([]GitserverClientGitDiffFuncCall, len(f.history))
copy(history, f.history)
f.mutex.Unlock()
return history
}
// GitserverClientGitDiffFuncCall is an object that describes an invocation
// of method GitDiff on an instance of MockGitserverClient.
type GitserverClientGitDiffFuncCall struct {
// Arg0 is the value of the 1st argument passed to this method
// invocation.
Arg0 context.Context
// Arg1 is the value of the 2nd argument passed to this method
// invocation.
Arg1 api.RepoName
// Arg2 is the value of the 3rd argument passed to this method
// invocation.
Arg2 api.CommitID
// Arg3 is the value of the 4th argument passed to this method
// invocation.
Arg3 api.CommitID
// Result0 is the value of the 1st result returned from this method
// invocation.
Result0 gitserver.Changes
// Result1 is the value of the 2nd result returned from this method
// invocation.
Result1 error
}
// Args returns an interface slice containing the arguments of this
// invocation.
func (c GitserverClientGitDiffFuncCall) Args() []interface{} {
return []interface{}{c.Arg0, c.Arg1, c.Arg2, c.Arg3}
}
// Results returns an interface slice containing the results of this
// invocation.
func (c GitserverClientGitDiffFuncCall) Results() []interface{} {
return []interface{}{c.Result0, c.Result1}
}

View File

@ -0,0 +1,53 @@
package fetcher
import (
"fmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/sourcegraph/internal/metrics"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type operations struct {
fetching prometheus.Gauge
fetchQueueSize prometheus.Gauge
fetchRepositoryArchive *observation.Operation
}
func newOperations(observationContext *observation.Context) *operations {
fetching := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "src",
Name: "codeintel_symbols_fetching",
Help: "The number of fetches currently running.",
})
observationContext.Registerer.MustRegister(fetching)
fetchQueueSize := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "src",
Name: "codeintel_symbols_fetch_queue_size",
Help: "The number of fetch jobs enqueued.",
})
observationContext.Registerer.MustRegister(fetchQueueSize)
operationMetrics := metrics.NewREDMetrics(
observationContext.Registerer,
"codeintel_symbols_repository_fetcher",
metrics.WithLabels("op"),
metrics.WithCountHelp("Total number of method invocations."),
)
op := func(name string) *observation.Operation {
return observationContext.Operation(observation.Op{
Name: fmt.Sprintf("codeintel.symbols.parser.%s", name),
MetricLabelValues: []string{name},
Metrics: operationMetrics,
})
}
return &operations{
fetching: fetching,
fetchQueueSize: fetchQueueSize,
fetchRepositoryArchive: op("FetchRepositoryArchive"),
}
}

View File

@ -0,0 +1,196 @@
package fetcher
import (
"archive/tar"
"bytes"
"context"
"io"
"path"
"strings"
"github.com/cockroachdb/errors"
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type RepositoryFetcher interface {
FetchRepositoryArchive(ctx context.Context, args types.SearchArgs, paths []string) <-chan parseRequestOrError
}
type repositoryFetcher struct {
gitserverClient gitserver.GitserverClient
fetchSem chan int
operations *operations
}
type ParseRequest struct {
Path string
Data []byte
}
type parseRequestOrError struct {
ParseRequest ParseRequest
Err error
}
func NewRepositoryFetcher(gitserverClient gitserver.GitserverClient, maximumConcurrentFetches int, observationContext *observation.Context) RepositoryFetcher {
return &repositoryFetcher{
gitserverClient: gitserverClient,
fetchSem: make(chan int, maximumConcurrentFetches),
operations: newOperations(observationContext),
}
}
func (f *repositoryFetcher) FetchRepositoryArchive(ctx context.Context, args types.SearchArgs, paths []string) <-chan parseRequestOrError {
requestCh := make(chan parseRequestOrError)
go func() {
defer close(requestCh)
if err := f.fetchRepositoryArchive(ctx, args, paths, func(request ParseRequest) {
requestCh <- parseRequestOrError{ParseRequest: request}
}); err != nil {
requestCh <- parseRequestOrError{Err: err}
}
}()
return requestCh
}
func (f *repositoryFetcher) fetchRepositoryArchive(ctx context.Context, args types.SearchArgs, paths []string, callback func(request ParseRequest)) (err error) {
ctx, traceLog, endObservation := f.operations.fetchRepositoryArchive.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("repo", string(args.Repo)),
log.String("commitID", string(args.CommitID)),
log.Int("paths", len(paths)),
log.String("paths", strings.Join(paths, ":")),
}})
defer endObservation(1, observation.Args{})
onDefer, err := f.limitConcurrentFetches(ctx)
if err != nil {
return err
}
defer onDefer()
traceLog(log.Event("acquired fetch semaphore"))
f.operations.fetching.Inc()
defer f.operations.fetching.Dec()
rc, err := f.gitserverClient.FetchTar(ctx, args.Repo, args.CommitID, paths)
if err != nil {
return errors.Wrap(err, "gitserverClient.FetchTar")
}
defer rc.Close()
return readTar(ctx, tar.NewReader(rc), callback, traceLog)
}
func (f *repositoryFetcher) limitConcurrentFetches(ctx context.Context) (func(), error) {
f.operations.fetchQueueSize.Inc()
defer f.operations.fetchQueueSize.Dec()
select {
case f.fetchSem <- 1:
return func() { <-f.fetchSem }, nil
case <-ctx.Done():
return func() {}, ctx.Err()
}
}
func readTar(ctx context.Context, tarReader *tar.Reader, callback func(request ParseRequest), traceLog observation.TraceLogger) error {
for {
if ctx.Err() != nil {
return ctx.Err()
}
tarHeader, err := tarReader.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
readTarHeader(tarReader, tarHeader, callback, traceLog)
}
}
func readTarHeader(tarReader *tar.Reader, tarHeader *tar.Header, callback func(request ParseRequest), traceLog observation.TraceLogger) error {
if !shouldParse(tarHeader) {
return nil
}
// 32MB is the same size used by io.Copy
buffer := make([]byte, 32*1024)
traceLog(log.Event("reading tar header prefix"))
// Read first chunk of tar header contents
n, err := tarReader.Read(buffer)
if err != nil && err != io.EOF {
return err
}
traceLog(log.Int("n", n))
if n == 0 {
// Empty file, nothing to parse
return nil
}
// Check to see if first 256 bytes contain a 0x00. If so, we'll assume that
// the file is binary and skip parsing. Otherwise, we'll have some non-zero
// contents that passed our filters above to parse.
m := 256
if n < m {
m = n
}
if bytes.IndexByte(buffer[:m], 0x00) >= 0 {
return nil
}
// Copy buffer into appropriately-sized slice for return
data := make([]byte, int(tarHeader.Size))
copy(data, buffer[:n])
if n < int(tarHeader.Size) {
traceLog(log.Event("reading remaining tar header content"))
// Read the remaining contents
if _, err := io.ReadFull(tarReader, data[n:]); err != nil {
return err
}
traceLog(log.Int("n", int(tarHeader.Size)-n))
}
request := ParseRequest{Path: tarHeader.Name, Data: data}
callback(request)
return nil
}
// maxFileSize (512KB) is the maximum size of files we attempt to parse.
const maxFileSize = 1 << 19
func shouldParse(tarHeader *tar.Header) bool {
// We do not search large files
if tarHeader.Size > maxFileSize {
return false
}
// We only care about files
if tarHeader.Typeflag != tar.TypeReg && tarHeader.Typeflag != tar.TypeRegA {
return false
}
// JSON files are symbol-less
if path.Ext(tarHeader.Name) == ".json" {
return false
}
return true
}

View File

@ -0,0 +1,82 @@
package fetcher
import (
"context"
"strings"
"testing"
"github.com/google/go-cmp/cmp"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
func TestRepositoryFetcher(t *testing.T) {
validParseRequests := map[string]string{
"a.txt": strings.Repeat("payload a", 1<<8),
"b.txt": strings.Repeat("payload b", 1<<9),
"c.txt": strings.Repeat("payload c", 1<<10),
"d.txt": strings.Repeat("payload d", 1<<11),
"e.txt": strings.Repeat("payload e", 1<<12),
"f.txt": strings.Repeat("payload f", 1<<13),
"g.txt": strings.Repeat("payload g", 1<<14),
}
tarContents := map[string]string{}
for name, content := range validParseRequests {
tarContents[name] = content
}
// JSON is ignored
tarContents["ignored.json"] = "{}"
// Large files are ignored
tarContents["payloads.txt"] = strings.Repeat("oversized load", maxFileSize)
gitserverClient := NewMockGitserverClient()
gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(tarContents))
repositoryFetcher := NewRepositoryFetcher(gitserverClient, 15, &observation.TestContext)
args := types.SearchArgs{Repo: api.RepoName("foo"), CommitID: api.CommitID("deadbeef")}
t.Run("all paths", func(t *testing.T) {
paths := []string(nil)
ch := repositoryFetcher.FetchRepositoryArchive(context.Background(), args, paths)
parseRequests := consumeParseRequests(t, ch)
expectedParseRequests := validParseRequests
if diff := cmp.Diff(expectedParseRequests, parseRequests); diff != "" {
t.Errorf("unexpected parse requests (-want +got):\n%s", diff)
}
})
t.Run("selected paths", func(t *testing.T) {
paths := []string{"a.txt", "b.txt", "c.txt"}
ch := repositoryFetcher.FetchRepositoryArchive(context.Background(), args, paths)
parseRequests := consumeParseRequests(t, ch)
expectedParseRequests := map[string]string{
"a.txt": validParseRequests["a.txt"],
"b.txt": validParseRequests["b.txt"],
"c.txt": validParseRequests["c.txt"],
}
if diff := cmp.Diff(expectedParseRequests, parseRequests); diff != "" {
t.Errorf("unexpected parse requests (-want +got):\n%s", diff)
}
})
}
func consumeParseRequests(t *testing.T, ch <-chan parseRequestOrError) map[string]string {
parseRequests := map[string]string{}
for v := range ch {
if v.Err != nil {
t.Fatalf("unexpected fetch error: %s", v.Err)
}
parseRequests[v.ParseRequest.Path] = string(v.ParseRequest.Data)
}
return parseRequests
}

View File

@ -0,0 +1,104 @@
package gitserver
import (
"bytes"
"context"
"fmt"
"io"
"strings"
"github.com/cockroachdb/errors"
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/gitserver"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/vcs/git"
)
type GitserverClient interface {
// FetchTar returns an io.ReadCloser to a tar archive of a repository at the specified Git
// remote URL and commit ID. If the error implements "BadRequest() bool", it will be used to
// determine if the error is a bad request (eg invalid repo).
FetchTar(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)
// GitDiff returns the paths that have changed between two commits.
GitDiff(context.Context, api.RepoName, api.CommitID, api.CommitID) (Changes, error)
}
// Changes are added, deleted, and modified paths.
type Changes struct {
Added []string
Modified []string
Deleted []string
}
type gitserverClient struct {
operations *operations
}
func NewClient(observationContext *observation.Context) GitserverClient {
return &gitserverClient{
operations: newOperations(observationContext),
}
}
func (c *gitserverClient) FetchTar(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (_ io.ReadCloser, err error) {
ctx, endObservation := c.operations.fetchTar.With(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("repo", string(repo)),
log.String("commit", string(commit)),
log.Int("paths", len(paths)),
log.String("paths", strings.Join(paths, ":")),
}})
defer endObservation(1, observation.Args{})
opts := gitserver.ArchiveOptions{
Treeish: string(commit),
Format: "tar",
Paths: paths,
}
return gitserver.DefaultClient.Archive(ctx, repo, opts)
}
func (c *gitserverClient) GitDiff(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) (_ Changes, err error) {
ctx, endObservation := c.operations.gitDiff.With(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("repo", string(repo)),
log.String("commitA", string(commitA)),
log.String("commitB", string(commitB)),
}})
defer endObservation(1, observation.Args{})
output, err := git.DiffSymbols(ctx, repo, commitA, commitB)
changes, err := parseGitDiffOutput(output)
if err != nil {
return Changes{}, errors.Wrap(err, "failed to parse git diff output")
}
return changes, nil
}
var NUL = []byte{0}
// parseGitDiffOutput parses the output of a git diff command, which consists
// of a repeated sequence of `<status> NUL <path> NUL` where NUL is the 0 byte.
func parseGitDiffOutput(output []byte) (changes Changes, _ error) {
slices := bytes.Split(bytes.TrimRight(output, string(NUL)), NUL)
if len(slices)%2 != 0 {
return changes, fmt.Errorf("uneven pairs")
}
for i := 0; i < len(slices); i += 2 {
switch slices[i][0] {
case 'A':
changes.Added = append(changes.Added, string(slices[i+1]))
case 'M':
changes.Modified = append(changes.Modified, string(slices[i+1]))
case 'D':
changes.Deleted = append(changes.Deleted, string(slices[i+1]))
}
}
return changes, nil
}

View File

@ -0,0 +1,65 @@
package gitserver
import (
"testing"
"github.com/google/go-cmp/cmp"
)
func TestParseGitDiffOutput(t *testing.T) {
testCases := []struct {
output []byte
expectedChanges Changes
shouldError bool
}{
{
output: combineBytes(
[]byte("A"), NUL, []byte("added1.json"), NUL,
[]byte("M"), NUL, []byte("modified1.json"), NUL,
[]byte("D"), NUL, []byte("deleted1.json"), NUL,
[]byte("A"), NUL, []byte("added2.json"), NUL,
[]byte("M"), NUL, []byte("modified2.json"), NUL,
[]byte("D"), NUL, []byte("deleted2.json"), NUL,
[]byte("A"), NUL, []byte("added3.json"), NUL,
[]byte("M"), NUL, []byte("modified3.json"), NUL,
[]byte("D"), NUL, []byte("deleted3.json"), NUL,
),
expectedChanges: Changes{
Added: []string{"added1.json", "added2.json", "added3.json"},
Modified: []string{"modified1.json", "modified2.json", "modified3.json"},
Deleted: []string{"deleted1.json", "deleted2.json", "deleted3.json"},
},
},
{
output: combineBytes(
[]byte("A"), NUL, []byte("added1.json"), NUL,
[]byte("M"), NUL, []byte("modified1.json"), NUL,
[]byte("D"), NUL,
),
shouldError: true,
},
}
for _, testCase := range testCases {
changes, err := parseGitDiffOutput(testCase.output)
if err != nil {
if !testCase.shouldError {
t.Fatalf("unexpected error parsing git diff output: %s", err)
}
} else if testCase.shouldError {
t.Fatalf("expected error, got none")
}
if diff := cmp.Diff(testCase.expectedChanges, changes); diff != "" {
t.Errorf("unexpected changes (-want +got):\n%s", diff)
}
}
}
func combineBytes(bss ...[]byte) (combined []byte) {
for _, bs := range bss {
combined = append(combined, bs...)
}
return combined
}

View File

@ -0,0 +1,35 @@
package gitserver
import (
"fmt"
"github.com/sourcegraph/sourcegraph/internal/metrics"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type operations struct {
fetchTar *observation.Operation
gitDiff *observation.Operation
}
func newOperations(observationContext *observation.Context) *operations {
metrics := metrics.NewREDMetrics(
observationContext.Registerer,
"codeintel_symbols_gitserver",
metrics.WithLabels("op"),
metrics.WithCountHelp("Total number of method invocations."),
)
op := func(name string) *observation.Operation {
return observationContext.Operation(observation.Op{
Name: fmt.Sprintf("codeintel.symbols.gitserver.%s", name),
MetricLabelValues: []string{name},
Metrics: metrics,
})
}
return &operations{
fetchTar: op("FetchTar"),
gitDiff: op("GitDiff"),
}
}

View File

@ -0,0 +1,49 @@
package gitserver
import (
"archive/tar"
"bytes"
"context"
"io"
"github.com/sourcegraph/sourcegraph/internal/api"
)
func CreateTestFetchTarFunc(tarContents map[string]string) func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error) {
return func(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (io.ReadCloser, error) {
var buffer bytes.Buffer
tarWriter := tar.NewWriter(&buffer)
for name, content := range tarContents {
if paths != nil {
found := false
for _, path := range paths {
if path == name {
found = true
}
}
if !found {
continue
}
}
tarHeader := &tar.Header{
Name: name,
Mode: 0o600,
Size: int64(len(content)),
}
if err := tarWriter.WriteHeader(tarHeader); err != nil {
return nil, err
}
if _, err := tarWriter.Write([]byte(content)); err != nil {
return nil, err
}
}
if err := tarWriter.Close(); err != nil {
return nil, err
}
return io.NopCloser(bytes.NewReader(buffer.Bytes())), nil
}
}

View File

@ -0,0 +1,74 @@
package parser
import (
"fmt"
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/sourcegraph/internal/metrics"
"github.com/sourcegraph/sourcegraph/internal/observation"
)
type operations struct {
parsing prometheus.Gauge
parseQueueSize prometheus.Gauge
parseQueueTimeouts prometheus.Counter
parseFailed prometheus.Counter
parse *observation.Operation
handleParseRequest *observation.Operation
}
func newOperations(observationContext *observation.Context) *operations {
parsing := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "src",
Name: "codeintel_symbols_parsing",
Help: "The number of parse jobs currently running.",
})
observationContext.Registerer.MustRegister(parsing)
parseQueueSize := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "src",
Name: "codeintel_symbols_parse_queue_size",
Help: "The number of parse jobs enqueued.",
})
observationContext.Registerer.MustRegister(parseQueueSize)
parseQueueTimeouts := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "src",
Name: "codeintel_symbols_parse_queue_timeouts_total",
Help: "The total number of parse jobs that timed out while enqueued.",
})
observationContext.Registerer.MustRegister(parseQueueTimeouts)
parseFailed := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "src",
Name: "codeintel_symbols_parse_failed_total",
Help: "The total number of parse jobs that failed.",
})
observationContext.Registerer.MustRegister(parseFailed)
operationMetrics := metrics.NewREDMetrics(
observationContext.Registerer,
"codeintel_symbols_parser",
metrics.WithLabels("op"),
metrics.WithCountHelp("Total number of method invocations."),
metrics.WithDurationBuckets([]float64{1, 5, 10, 60, 300, 1200}),
)
op := func(name string) *observation.Operation {
return observationContext.Operation(observation.Op{
Name: fmt.Sprintf("codeintel.symbols.parser.%s", name),
MetricLabelValues: []string{name},
Metrics: operationMetrics,
})
}
return &operations{
parsing: parsing,
parseQueueSize: parseQueueSize,
parseQueueTimeouts: parseQueueTimeouts,
parseFailed: parseFailed,
parse: op("Parse"),
handleParseRequest: op("HandleParseRequest"),
}
}

View File

@ -0,0 +1,205 @@
package parser
import (
"context"
"strings"
"sync"
"sync/atomic"
"github.com/cockroachdb/errors"
"github.com/inconshreveable/log15"
"github.com/opentracing/opentracing-go/log"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/types"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
type Parser interface {
Parse(ctx context.Context, args types.SearchArgs, paths []string) (<-chan result.Symbol, error)
}
type parser struct {
parserPool ParserPool
repositoryFetcher fetcher.RepositoryFetcher
operations *operations
}
func NewParser(
parserPool ParserPool,
repositoryFetcher fetcher.RepositoryFetcher,
observationContext *observation.Context,
) Parser {
return &parser{
parserPool: parserPool,
repositoryFetcher: repositoryFetcher,
operations: newOperations(observationContext),
}
}
func (p *parser) Parse(ctx context.Context, args types.SearchArgs, paths []string) (_ <-chan result.Symbol, err error) {
ctx, traceLog, endObservation := p.operations.parse.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
log.String("repo", string(args.Repo)),
log.String("commitID", string(args.CommitID)),
log.Int("paths", len(paths)),
log.String("paths", strings.Join(paths, ":")),
}})
// NOTE: We call endObservation synchronously within this function when we
// return an error. Once we get on the success-only path, we install it to
// run on defer of a background routine, which indicates when the returned
// symbols channel is closed.
parseRequestOrErrors := p.repositoryFetcher.FetchRepositoryArchive(ctx, args, paths)
if err != nil {
endObservation(1, observation.Args{})
return nil, errors.Wrap(err, "repositoryFetcher.FetchRepositoryArchive")
}
defer func() {
if err != nil {
go func() {
// Drain channel on early exit
for range parseRequestOrErrors {
}
}()
}
}()
var wg sync.WaitGroup
var totalSymbols uint32
symbols := make(chan result.Symbol)
defer func() {
go func() {
defer func() {
endObservation(1, observation.Args{LogFields: []log.Field{
log.Int("numSymbols", int(totalSymbols)),
}})
}()
wg.Wait()
close(symbols)
}()
}()
totalRequests := 0
for v := range parseRequestOrErrors {
if v.Err != nil {
return nil, v.Err
}
wg.Add(1)
totalRequests++
go func(parseRequest fetcher.ParseRequest) {
defer wg.Done()
_ = p.handleParseRequest(ctx, symbols, parseRequest, &totalSymbols)
}(v.ParseRequest)
}
traceLog(log.Int("numRequests", totalRequests))
return symbols, nil
}
func (p *parser) handleParseRequest(ctx context.Context, symbols chan<- result.Symbol, parseRequest fetcher.ParseRequest, totalSymbols *uint32) (err error) {
ctx, traceLog, endObservation := p.operations.handleParseRequest.WithAndLogger(ctx, &err, observation.Args{LogFields: []log.Field{
log.Int("fileSize", len(parseRequest.Data)),
}})
defer endObservation(1, observation.Args{})
parser, err := p.parserFromPool(ctx)
if err != nil {
return err
}
traceLog(log.Event("acquired parser from pool"))
defer func() {
if err == nil {
if e := recover(); e != nil {
err = errors.Errorf("panic: %s", e)
}
}
if err == nil {
p.parserPool.Done(parser)
} else {
// Close parser and return nil to pool, indicating that the next receiver should create a new parser
log15.Error("Closing failed parser", "error", err)
parser.Close()
p.parserPool.Done(nil)
p.operations.parseFailed.Inc()
}
}()
p.operations.parsing.Inc()
defer p.operations.parsing.Dec()
entries, err := parser.Parse(parseRequest.Path, parseRequest.Data)
if err != nil {
return errors.Wrap(err, "parser.Parse")
}
traceLog(log.Int("numEntries", len(entries)))
for _, e := range entries {
if !shouldPersistEntry(e) {
continue
}
symbol := result.Symbol{
Name: e.Name,
Path: e.Path,
Line: e.Line,
Kind: e.Kind,
Language: e.Language,
Parent: e.Parent,
ParentKind: e.ParentKind,
Signature: e.Signature,
Pattern: e.Pattern,
FileLimited: e.FileLimited,
}
select {
case symbols <- symbol:
atomic.AddUint32(totalSymbols, 1)
case <-ctx.Done():
return ctx.Err()
}
}
return nil
}
func (p *parser) parserFromPool(ctx context.Context) (ctags.Parser, error) {
p.operations.parseQueueSize.Inc()
defer p.operations.parseQueueSize.Dec()
parser, err := p.parserPool.Get(ctx)
if err != nil {
if err == context.DeadlineExceeded {
p.operations.parseQueueTimeouts.Inc()
}
if err != ctx.Err() {
err = errors.Wrap(err, "failed to create parser")
}
}
return parser, err
}
func shouldPersistEntry(e *ctags.Entry) bool {
if e.Name == "" {
return false
}
for _, value := range []string{"__anon", "AnonymousFunction"} {
if strings.HasPrefix(e.Name, value) || strings.HasPrefix(e.Parent, value) {
return false
}
}
return true
}

View File

@ -0,0 +1,5 @@
package parser
import "github.com/sourcegraph/go-ctags"
type ParserFactory func() (ctags.Parser, error)

View File

@ -0,0 +1,23 @@
package parser
import (
"log"
"os"
"github.com/sourcegraph/go-ctags"
)
func NewCtagsParserFactory(ctagsCommand string, patternLengthLimit int, logErrors, debugLogs bool) ParserFactory {
options := ctags.Options{
Bin: ctagsCommand,
PatternLengthLimit: patternLengthLimit,
}
if logErrors {
options.Info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
}
if debugLogs {
options.Debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
}
return func() (ctags.Parser, error) { return ctags.New(options) }
}

View File

@ -1,4 +1,4 @@
package symbols
package parser
import (
"os/exec"
@ -10,13 +10,13 @@ import (
"github.com/sourcegraph/go-ctags"
)
func TestParser(t *testing.T) {
func TestCtagsParser(t *testing.T) {
// TODO(sqs): find a way to make it easy to run these tests in local dev (w/o needing to install universal-ctags) and CI
if _, err := exec.LookPath(ctagsCommand); err != nil {
if _, err := exec.LookPath("universal-ctags"); err != nil {
t.Skip("command not in PATH: universal-ctags")
}
p, err := NewParser()
p, err := NewCtagsParserFactory("universal-ctags", 250, false, false)()
if err != nil {
t.Fatal(err)
}

View File

@ -0,0 +1,57 @@
package parser
import (
"context"
"github.com/sourcegraph/go-ctags"
)
type ParserPool interface {
Get(ctx context.Context) (ctags.Parser, error)
Done(parser ctags.Parser)
}
type parserPool struct {
newParser ParserFactory
pool chan ctags.Parser
}
func NewParserPool(newParser ParserFactory, numParserProcesses int) (ParserPool, error) {
pool := make(chan ctags.Parser, numParserProcesses)
for i := 0; i < numParserProcesses; i++ {
parser, err := newParser()
if err != nil {
return nil, err
}
pool <- parser
}
return &parserPool{
newParser: newParser,
pool: pool,
}, nil
}
// Get a parser from the pool. Once this parser is no longer in use, the Done method
// MUST be called with either the parser or a nil value (when countering an error).
// Nil values will be recreated on-demand via the factory supplied when constructing
// the pool. This method always returns a non-nil parser with a nil error value.
//
// This method blocks until a parser is available or the given context is canceled.
func (p *parserPool) Get(ctx context.Context) (ctags.Parser, error) {
select {
case parser := <-p.pool:
if parser != nil {
return parser, nil
}
return p.newParser()
case <-ctx.Done():
return nil, ctx.Err()
}
}
func (p *parserPool) Done(parser ctags.Parser) {
p.pool <- parser
}

View File

@ -1,48 +0,0 @@
package symbols
import (
"log"
"os"
"strconv"
"github.com/cockroachdb/errors"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/internal/env"
)
const debugLogs = false
var logErrors = os.Getenv("DEPLOY_TYPE") == "dev"
var ctagsCommand = env.Get("CTAGS_COMMAND", "universal-ctags", "ctags command (should point to universal-ctags executable compiled with JSON and seccomp support)")
// Increasing this value may increase the size of the symbols cache, but will also stop long lines containing symbols from
// being highlighted improperly. See https://github.com/sourcegraph/sourcegraph/issues/7668.
var rawPatternLengthLimit = env.Get("CTAGS_PATTERN_LENGTH_LIMIT", "250", "the maximum length of the patterns output by ctags")
// NewParser runs the ctags command from the CTAGS_COMMAND environment
// variable, falling back to `universal-ctags`.
func NewParser() (ctags.Parser, error) {
patternLengthLimit, err := strconv.Atoi(rawPatternLengthLimit)
if err != nil {
return nil, errors.Errorf("invalid pattern length limit: %s", rawPatternLengthLimit)
}
var info *log.Logger
if logErrors {
info = log.New(os.Stderr, "ctags: ", log.LstdFlags)
}
var debug *log.Logger
if debugLogs {
debug = log.New(os.Stderr, "DBUG ctags: ", log.LstdFlags)
}
return ctags.New(ctags.Options{
Bin: ctagsCommand,
PatternLengthLimit: patternLengthLimit,
Info: info,
Debug: debug,
})
}

View File

@ -1,151 +0,0 @@
package symbols
import (
"archive/tar"
"bytes"
"context"
"io"
"path"
"github.com/opentracing/opentracing-go/ext"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
)
type parseRequest struct {
path string
data []byte
}
func (s *Service) fetchRepositoryArchive(ctx context.Context, repo api.RepoName, commitID api.CommitID, paths []string) (<-chan parseRequest, <-chan error, error) {
fetchQueueSize.Inc()
s.fetchSem <- 1 // acquire concurrent fetches semaphore
fetchQueueSize.Dec()
fetching.Inc()
span, ctx := ot.StartSpanFromContext(ctx, "Store.fetch")
ext.Component.Set(span, "store")
span.SetTag("repo", repo)
span.SetTag("commit", commitID)
requestCh := make(chan parseRequest, s.NumParserProcesses)
errCh := make(chan error, 1)
// Done is called when the returned reader is closed, or if this function
// returns an error. It should always be called once.
doneCalled := false
done := func(err error) {
if doneCalled {
panic("Store.fetch.done called twice")
}
doneCalled = true
if err != nil {
errCh <- err
}
<-s.fetchSem // release concurrent fetches semaphore
close(requestCh)
close(errCh)
if err != nil {
ext.Error.Set(span, true)
span.SetTag("err", err.Error())
fetchFailed.Inc()
}
fetching.Dec()
span.Finish()
}
r, err := s.FetchTar(ctx, repo, commitID, paths)
if err != nil {
done(err)
return nil, nil, err
}
// After this point we are not allowed to return an error. Instead we can
// return an error via the errChan we return. If you do want to update this
// code please ensure we still always call done once.
go func() {
defer r.Close()
buf := make([]byte, 32*1024) // 32*1024 is the same size used by io.Copy
tr := tar.NewReader(r)
for {
if ctx.Err() != nil {
done(ctx.Err())
return
}
hdr, err := tr.Next()
if err == io.EOF {
done(nil)
return
}
if err != nil {
done(err)
return
}
if path.Ext(hdr.Name) == ".json" {
continue
}
// We only care about files
if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
continue
}
// We do not search large files
if hdr.Size > maxFileSize {
continue
}
// Heuristic: Assume file is binary if first 256 bytes contain a 0x00. Best effort, so ignore err.
n, err := tr.Read(buf)
if n > 0 && bytes.IndexByte(buf[:n], 0x00) >= 0 {
continue
}
switch err {
case io.EOF:
if n == 0 {
continue
}
case nil:
default:
done(err)
return
}
// Read the file's contents.
data := make([]byte, int(hdr.Size))
copy(data, buf[:n])
if n < int(hdr.Size) {
_, err = io.ReadFull(tr, data[n:])
if err != nil {
done(err)
return
}
}
requestCh <- parseRequest{path: hdr.Name, data: data}
}
}()
return requestCh, errCh, nil
}
var (
fetching = promauto.NewGauge(prometheus.GaugeOpts{
Name: "symbols_store_fetching",
Help: "The number of fetches currently running.",
})
fetchQueueSize = promauto.NewGauge(prometheus.GaugeOpts{
Name: "symbols_store_fetch_queue_size",
Help: "The number of fetch jobs enqueued.",
})
fetchFailed = promauto.NewCounter(prometheus.CounterOpts{
Name: "symbols_store_fetch_failed",
Help: "The total number of archive fetches that failed.",
})
)

View File

@ -1,212 +0,0 @@
package symbols
import (
"context"
"runtime"
"strings"
"sync"
"github.com/cockroachdb/errors"
"github.com/inconshreveable/log15"
"github.com/opentracing/opentracing-go/ext"
otlog "github.com/opentracing/opentracing-go/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
nettrace "golang.org/x/net/trace"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
)
// startParsers starts the parser process pool.
func (s *Service) startParsers() error {
n := s.NumParserProcesses
if n == 0 {
n = runtime.GOMAXPROCS(0)
}
s.parsers = make(chan ctags.Parser, n)
for i := 0; i < n; i++ {
parser, err := s.NewParser()
if err != nil {
return errors.Wrap(err, "NewParser")
}
s.parsers <- parser
}
return nil
}
func (s *Service) parseUncached(ctx context.Context, repo api.RepoName, commitID api.CommitID, paths []string, callback func(symbol result.Symbol) error) (err error) {
span, ctx := ot.StartSpanFromContext(ctx, "parseUncached")
defer func() {
if err != nil {
ext.Error.Set(span, true)
span.LogFields(otlog.Error(err))
}
span.Finish()
}()
span.SetTag("repo", string(repo))
span.SetTag("commit", string(commitID))
tr := nettrace.New("parseUncached", string(repo))
tr.LazyPrintf("commitID: %s", commitID)
totalSymbols := 0
defer func() {
tr.LazyPrintf("symbols=%d", totalSymbols)
if err != nil {
tr.LazyPrintf("error: %s", err)
tr.SetError()
}
tr.Finish()
}()
tr.LazyPrintf("fetch")
parseRequests, errChan, err := s.fetchRepositoryArchive(ctx, repo, commitID, paths)
tr.LazyPrintf("fetch (returned chans)")
if err != nil {
return err
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
var (
mu sync.Mutex // protects symbols and err
wg sync.WaitGroup
sem = make(chan struct{}, runtime.GOMAXPROCS(0))
)
tr.LazyPrintf("parse")
totalParseRequests := 0
for req := range parseRequests {
totalParseRequests++
if ctx.Err() != nil {
// Drain parseRequests
go func() {
for range parseRequests {
}
}()
return ctx.Err()
}
sem <- struct{}{}
wg.Add(1)
go func(req parseRequest) {
defer func() {
wg.Done()
<-sem
}()
entries, parseErr := s.parse(ctx, req)
if parseErr != nil && parseErr != context.Canceled && parseErr != context.DeadlineExceeded {
log15.Error("Error parsing symbols.", "repo", repo, "commitID", commitID, "path", req.path, "dataSize", len(req.data), "error", parseErr)
}
if len(entries) > 0 {
mu.Lock()
defer mu.Unlock()
for _, e := range entries {
if e.Name == "" || strings.HasPrefix(e.Name, "__anon") || strings.HasPrefix(e.Parent, "__anon") || strings.HasPrefix(e.Name, "AnonymousFunction") || strings.HasPrefix(e.Parent, "AnonymousFunction") {
continue
}
totalSymbols++
err = callback(entryToSymbol(e))
if err != nil {
log15.Error("Failed to add symbol", "symbol", e, "error", err)
return
}
}
}
}(req)
}
wg.Wait()
tr.LazyPrintf("parse (done) totalParseRequests=%d symbols=%d", totalParseRequests, totalSymbols)
return <-errChan
}
// parse gets a parser from the pool and uses it to satisfy the parse request.
func (s *Service) parse(ctx context.Context, req parseRequest) (entries []*ctags.Entry, err error) {
parseQueueSize.Inc()
select {
case <-ctx.Done():
parseQueueSize.Dec()
if ctx.Err() == context.DeadlineExceeded {
parseQueueTimeouts.Inc()
}
return nil, ctx.Err()
case parser, ok := <-s.parsers:
parseQueueSize.Dec()
if !ok {
return nil, nil
}
if parser == nil {
// The parser failed for some previous receiver (who returned a nil parser to the channel). Try
// creating a parser.
var err error
parser, err = s.NewParser()
if err != nil {
return nil, err
}
}
defer func() {
if err == nil {
if e := recover(); e != nil {
err = errors.Errorf("panic: %s", e)
}
}
if err == nil {
// Return parser to pool.
s.parsers <- parser
} else {
// Close parser and return nil to pool, indicating that the next receiver should create a new
// parser.
log15.Error("Closing failed parser and creating a new one.", "path", req.path, "error", err)
parseFailed.Inc()
parser.Close()
s.parsers <- nil
}
}()
parsing.Inc()
defer parsing.Dec()
return parser.Parse(req.path, req.data)
}
}
func entryToSymbol(e *ctags.Entry) result.Symbol {
return result.Symbol{
Name: e.Name,
Path: e.Path,
Line: e.Line,
Kind: e.Kind,
Language: e.Language,
Parent: e.Parent,
ParentKind: e.ParentKind,
Signature: e.Signature,
Pattern: e.Pattern,
FileLimited: e.FileLimited,
}
}
var (
parsing = promauto.NewGauge(prometheus.GaugeOpts{
Name: "symbols_parse_parsing",
Help: "The number of parse jobs currently running.",
})
parseQueueSize = promauto.NewGauge(prometheus.GaugeOpts{
Name: "symbols_parse_parse_queue_size",
Help: "The number of parse jobs enqueued.",
})
parseQueueTimeouts = promauto.NewCounter(prometheus.CounterOpts{
Name: "symbols_parse_parse_queue_timeouts",
Help: "The total number of parse jobs that timed out while enqueued.",
})
parseFailed = promauto.NewCounter(prometheus.CounterOpts{
Name: "symbols_parse_parse_failed",
Help: "The total number of parse jobs that failed.",
})
)

View File

@ -1,599 +0,0 @@
package symbols
import (
"context"
"database/sql"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"regexp"
"regexp/syntax"
"strings"
"time"
"github.com/mattn/go-sqlite3"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/inconshreveable/log15"
"github.com/jmoiron/sqlx"
"github.com/keegancsmith/sqlf"
"github.com/opentracing/opentracing-go/ext"
otlog "github.com/opentracing/opentracing-go/log"
nettrace "golang.org/x/net/trace"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/protocol"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
"github.com/sourcegraph/sourcegraph/internal/search/result"
)
func init() {
sql.Register("sqlite3_with_regexp",
&sqlite3.SQLiteDriver{
ConnectHook: func(conn *sqlite3.SQLiteConn) error {
return conn.RegisterFunc("REGEXP", regexp.MatchString, true)
},
})
}
// maxFileSize is the limit on file size in bytes. Only files smaller than this are processed.
const maxFileSize = 1 << 19 // 512KB
func (s *Service) handleSearch(w http.ResponseWriter, r *http.Request) {
var args protocol.SearchArgs
if err := json.NewDecoder(r.Body).Decode(&args); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
result, err := s.search(r.Context(), args)
if err != nil {
if err == context.Canceled && r.Context().Err() == context.Canceled {
return // client went away
}
log15.Error("Symbol search failed", "args", args, "error", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := json.NewEncoder(w).Encode(result); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}
func (s *Service) search(ctx context.Context, args protocol.SearchArgs) (*result.Symbols, error) {
var err error
ctx, cancel := context.WithTimeout(ctx, 60*time.Second)
defer cancel()
log15.Debug("Symbol search", "repo", args.Repo, "query", args.Query)
span, ctx := ot.StartSpanFromContext(ctx, "search")
span.SetTag("repo", args.Repo)
span.SetTag("commitID", args.CommitID)
span.SetTag("query", args.Query)
span.SetTag("first", args.First)
defer func() {
if err != nil {
ext.Error.Set(span, true)
span.LogFields(otlog.Error(err))
}
span.Finish()
}()
tr := nettrace.New("symbols.search", fmt.Sprintf("args:%+v", args))
defer func() {
if err != nil {
tr.LazyPrintf("error: %v", err)
tr.SetError()
}
tr.Finish()
}()
dbFile, err := s.getDBFile(ctx, args)
if err != nil {
return nil, err
}
db, err := sqlx.Open("sqlite3_with_regexp", dbFile)
if err != nil {
return nil, err
}
defer db.Close()
result, err := filterSymbols(ctx, db, args)
if err != nil {
return nil, err
}
return &result, nil
}
// getDBFile returns the path to the sqlite3 database for the repo@commit
// specified in `args`. If the database doesn't already exist in the disk cache,
// it will create a new one and write all the symbols into it.
func (s *Service) getDBFile(ctx context.Context, args protocol.SearchArgs) (string, error) {
diskcacheFile, err := s.cache.OpenWithPath(ctx, []string{string(args.Repo), fmt.Sprintf("%s-%d", args.CommitID, symbolsDBVersion)}, func(fetcherCtx context.Context, tempDBFile string) error {
newest, commit, err := findNewestFile(filepath.Join(s.cache.Dir, diskcache.EncodeKeyComponent(string(args.Repo))))
if err != nil {
return err
}
var changes *Changes
if commit != "" && s.GitDiff != nil {
var err error
changes, err = s.GitDiff(ctx, args.Repo, commit, args.CommitID)
if err != nil {
return err
}
// Avoid sending more files than will fit in HTTP headers.
totalPathsLength := 0
paths := []string{}
paths = append(paths, changes.Added...)
paths = append(paths, changes.Modified...)
paths = append(paths, changes.Deleted...)
for _, path := range paths {
totalPathsLength += len(path)
}
if totalPathsLength > MAX_TOTAL_PATHS_LENGTH {
changes = nil
}
}
if changes == nil {
// There are no existing SQLite DBs to reuse, or the diff is too big, so write a completely
// new one.
err := s.writeAllSymbolsToNewDB(fetcherCtx, tempDBFile, args.Repo, args.CommitID)
if err != nil {
if err == context.Canceled {
log15.Error("Unable to parse repository symbols within the context", "repo", args.Repo, "commit", args.CommitID, "query", args.Query)
}
return err
}
} else {
// Copy the existing DB to a new DB and update the new DB
err = copyFile(newest, tempDBFile)
if err != nil {
return err
}
err = s.updateSymbols(fetcherCtx, tempDBFile, args.Repo, args.CommitID, *changes)
if err != nil {
if err == context.Canceled {
log15.Error("updateSymbols: unable to parse repository symbols within the context", "repo", args.Repo, "commit", args.CommitID, "query", args.Query)
}
return err
}
}
return nil
})
if err != nil {
return "", err
}
defer diskcacheFile.File.Close()
return diskcacheFile.File.Name(), err
}
// isLiteralEquality checks if the given regex matches literal strings exactly.
// Returns whether or not the regex is exact, along with the literal string if
// so.
func isLiteralEquality(expr string) (ok bool, lit string, err error) {
r, err := syntax.Parse(expr, syntax.Perl)
if err != nil {
return false, "", err
}
// Want a Concat of size 3 which is [Begin, Literal, End]
if r.Op != syntax.OpConcat || len(r.Sub) != 3 || // size 3 concat
!(r.Sub[0].Op == syntax.OpBeginLine || r.Sub[0].Op == syntax.OpBeginText) || // Starts with ^
!(r.Sub[2].Op == syntax.OpEndLine || r.Sub[2].Op == syntax.OpEndText) || // Ends with $
r.Sub[1].Op != syntax.OpLiteral { // is a literal
return false, "", nil
}
return true, string(r.Sub[1].Rune), nil
}
func filterSymbols(ctx context.Context, db *sqlx.DB, args protocol.SearchArgs) (res []result.Symbol, err error) {
span, _ := ot.StartSpanFromContext(ctx, "filterSymbols")
defer func() {
if err != nil {
ext.Error.Set(span, true)
span.LogFields(otlog.Error(err))
}
span.Finish()
}()
const maxFirst = 500
if args.First < 0 || args.First > maxFirst {
args.First = maxFirst
}
makeCondition := func(column string, regex string) []*sqlf.Query {
conditions := []*sqlf.Query{}
if regex == "" {
return conditions
}
if isExact, symbolName, err := isLiteralEquality(regex); isExact && err == nil {
// It looks like the user is asking for exact matches, so use `=` to
// get the speed boost from the index on the column.
if args.IsCaseSensitive {
conditions = append(conditions, sqlf.Sprintf(column+" = %s", symbolName))
} else {
conditions = append(conditions, sqlf.Sprintf(column+"lowercase = %s", strings.ToLower(symbolName)))
}
} else {
if !args.IsCaseSensitive {
regex = "(?i:" + regex + ")"
}
conditions = append(conditions, sqlf.Sprintf(column+" REGEXP %s", regex))
}
return conditions
}
negateAll := func(oldConditions []*sqlf.Query) []*sqlf.Query {
newConditions := []*sqlf.Query{}
for _, oldCondition := range oldConditions {
newConditions = append(newConditions, sqlf.Sprintf("NOT %s", oldCondition))
}
return newConditions
}
var conditions []*sqlf.Query
conditions = append(conditions, makeCondition("name", args.Query)...)
for _, includePattern := range args.IncludePatterns {
conditions = append(conditions, makeCondition("path", includePattern)...)
}
conditions = append(conditions, negateAll(makeCondition("path", args.ExcludePattern))...)
var sqlQuery *sqlf.Query
if len(conditions) == 0 {
sqlQuery = sqlf.Sprintf("SELECT * FROM symbols LIMIT %s", args.First)
} else {
sqlQuery = sqlf.Sprintf("SELECT * FROM symbols WHERE %s LIMIT %s", sqlf.Join(conditions, "AND"), args.First)
}
var symbolsInDB []symbolInDB
err = db.Select(&symbolsInDB, sqlQuery.Query(sqlf.PostgresBindVar), sqlQuery.Args()...)
if err != nil {
return nil, err
}
for _, symbolInDB := range symbolsInDB {
res = append(res, symbolInDBToSymbol(symbolInDB))
}
span.SetTag("hits", len(res))
return res, nil
}
// The version of the symbols database schema. This is included in the database
// filenames to prevent a newer version of the symbols service from attempting
// to read from a database created by an older (and likely incompatible) symbols
// service. Increment this when you change the database schema.
const symbolsDBVersion = 4
// symbolInDB is the same as `protocol.Symbol`, but with two additional columns:
// namelowercase and pathlowercase, which enable indexed case insensitive
// queries.
type symbolInDB struct {
Name string
NameLowercase string // derived from `Name`
Path string
PathLowercase string // derived from `Path`
Line int
Kind string
Language string
Parent string
ParentKind string
Signature string
Pattern string
// Whether or not the symbol is local to the file.
FileLimited bool
}
func symbolToSymbolInDB(symbol result.Symbol) symbolInDB {
return symbolInDB{
Name: symbol.Name,
NameLowercase: strings.ToLower(symbol.Name),
Path: symbol.Path,
PathLowercase: strings.ToLower(symbol.Path),
Line: symbol.Line,
Kind: symbol.Kind,
Language: symbol.Language,
Parent: symbol.Parent,
ParentKind: symbol.ParentKind,
Signature: symbol.Signature,
Pattern: symbol.Pattern,
FileLimited: symbol.FileLimited,
}
}
func symbolInDBToSymbol(symbolInDB symbolInDB) result.Symbol {
return result.Symbol{
Name: symbolInDB.Name,
Path: symbolInDB.Path,
Line: symbolInDB.Line,
Kind: symbolInDB.Kind,
Language: symbolInDB.Language,
Parent: symbolInDB.Parent,
ParentKind: symbolInDB.ParentKind,
Signature: symbolInDB.Signature,
Pattern: symbolInDB.Pattern,
FileLimited: symbolInDB.FileLimited,
}
}
// writeAllSymbolsToNewDB fetches the repo@commit from gitserver, parses all the
// symbols, and writes them to the blank database file `dbFile`.
func (s *Service) writeAllSymbolsToNewDB(ctx context.Context, dbFile string, repoName api.RepoName, commitID api.CommitID) (err error) {
db, err := sqlx.Open("sqlite3_with_regexp", dbFile)
if err != nil {
return err
}
defer db.Close()
// Writing a bunch of rows into sqlite3 is much faster in a transaction.
tx, err := db.Beginx()
if err != nil {
return err
}
defer func() {
if err != nil {
_ = tx.Rollback()
return
}
err = tx.Commit()
}()
_, err = tx.Exec(
`CREATE TABLE IF NOT EXISTS meta (
id INTEGER PRIMARY KEY CHECK (id = 0),
revision TEXT NOT NULL
)`)
if err != nil {
return err
}
_, err = tx.Exec(
`INSERT INTO meta (id, revision) VALUES (0, ?)`,
string(commitID))
if err != nil {
return err
}
// The column names are the lowercase version of fields in `symbolInDB`
// because sqlx lowercases struct fields by default. See
// http://jmoiron.github.io/sqlx/#query
_, err = tx.Exec(
`CREATE TABLE IF NOT EXISTS symbols (
name VARCHAR(256) NOT NULL,
namelowercase VARCHAR(256) NOT NULL,
path VARCHAR(4096) NOT NULL,
pathlowercase VARCHAR(4096) NOT NULL,
line INT NOT NULL,
kind VARCHAR(255) NOT NULL,
language VARCHAR(255) NOT NULL,
parent VARCHAR(255) NOT NULL,
parentkind VARCHAR(255) NOT NULL,
signature VARCHAR(255) NOT NULL,
pattern VARCHAR(255) NOT NULL,
filelimited BOOLEAN NOT NULL
)`)
if err != nil {
return err
}
_, err = tx.Exec(`CREATE INDEX name_index ON symbols(name);`)
if err != nil {
return err
}
_, err = tx.Exec(`CREATE INDEX path_index ON symbols(path);`)
if err != nil {
return err
}
// `*lowercase_index` enables indexed case insensitive queries.
_, err = tx.Exec(`CREATE INDEX namelowercase_index ON symbols(namelowercase);`)
if err != nil {
return err
}
_, err = tx.Exec(`CREATE INDEX pathlowercase_index ON symbols(pathlowercase);`)
if err != nil {
return err
}
insertStatement, err := tx.PrepareNamed(insertQuery)
if err != nil {
return err
}
return s.parseUncached(ctx, repoName, commitID, []string{}, func(symbol result.Symbol) error {
symbolInDBValue := symbolToSymbolInDB(symbol)
_, err := insertStatement.Exec(&symbolInDBValue)
return err
})
}
// updateSymbols adds/removes rows from the DB based on a `git diff` between the meta.revision within the
// DB and the given commitID.
func (s *Service) updateSymbols(ctx context.Context, dbFile string, repoName api.RepoName, commitID api.CommitID, changes Changes) (err error) {
db, err := sqlx.Open("sqlite3_with_regexp", dbFile)
if err != nil {
return err
}
defer db.Close()
// Writing a bunch of rows into sqlite3 is much faster in a transaction.
tx, err := db.Beginx()
if err != nil {
return err
}
defer func() {
if err != nil {
_ = tx.Rollback()
return
}
err = tx.Commit()
}()
// Write new commit
_, err = tx.Exec(`UPDATE meta SET revision = ?`, string(commitID))
if err != nil {
return err
}
deleteStatement, err := tx.Prepare("DELETE FROM symbols WHERE path = ?")
if err != nil {
return err
}
insertStatement, err := tx.PrepareNamed(insertQuery)
if err != nil {
return err
}
paths := []string{}
paths = append(paths, changes.Added...)
paths = append(paths, changes.Modified...)
paths = append(paths, changes.Deleted...)
for _, path := range paths {
_, err := deleteStatement.Exec(path)
if err != nil {
return err
}
}
return s.parseUncached(ctx, repoName, commitID, append(changes.Added, changes.Modified...), func(symbol result.Symbol) error {
symbolInDBValue := symbolToSymbolInDB(symbol)
_, err := insertStatement.Exec(&symbolInDBValue)
return err
})
}
const insertQuery = `
INSERT INTO symbols ( name, namelowercase, path, pathlowercase, line, kind, language, parent, parentkind, signature, pattern, filelimited)
VALUES (:name, :namelowercase, :path, :pathlowercase, :line, :kind, :language, :parent, :parentkind, :signature, :pattern, :filelimited)`
// SanityCheck makes sure that go-sqlite3 was compiled with cgo by
// seeing if we can actually create a table.
func SanityCheck() error {
db, err := sqlx.Open("sqlite3_with_regexp", ":memory:")
if err != nil {
return err
}
defer db.Close()
_, err = db.Exec("CREATE TABLE test (col TEXT);")
if err != nil {
// If go-sqlite3 was not compiled with cgo, the error will be:
//
// > Binary was compiled with 'CGO_ENABLED=0', go-sqlite3 requires cgo to work. This is a stub
return err
}
return nil
}
// findNewestFile lists the directory and returns the newest file's path (prepended with dir) and the
// commit.
func findNewestFile(dir string) (string, api.CommitID, error) {
files, err := os.ReadDir(dir)
if err != nil {
return "", "", nil
}
var mostRecentTime time.Time
newest := ""
for _, fi := range files {
if fi.Type().IsRegular() {
if !strings.HasSuffix(fi.Name(), ".zip") {
continue
}
info, err := fi.Info()
if err != nil {
return "", "", err
}
if newest == "" || info.ModTime().After(mostRecentTime) {
mostRecentTime = info.ModTime()
newest = filepath.Join(dir, fi.Name())
}
}
}
if newest == "" {
return "", "", nil
}
db, err := sqlx.Open("sqlite3_with_regexp", newest)
if err != nil {
return "", "", err
}
defer db.Close()
// Read old commit
row := db.QueryRow(`SELECT revision FROM meta`)
commit := api.CommitID("")
if err = row.Scan(&commit); err != nil {
return "", "", err
}
return newest, commit, nil
}
// copyFile is like the cp command.
func copyFile(from string, to string) error {
fromFile, err := os.Open(from)
if err != nil {
return err
}
defer fromFile.Close()
toFile, err := os.OpenFile(to, os.O_RDWR|os.O_CREATE, 0666)
if err != nil {
return err
}
defer toFile.Close()
_, err = io.Copy(toFile, fromFile)
return err
}
// Changes are added and deleted paths.
type Changes struct {
Added []string
Modified []string
Deleted []string
}
func NewChanges() Changes {
return Changes{
Added: []string{},
Modified: []string{},
Deleted: []string{},
}
}
// The maximum sum of bytes in paths in a diff when doing incremental indexing. Diffs bigger than this
// will not be incrementally indexed, and instead we will process all symbols. Without this limit, we
// could hit HTTP 431 (header fields too large) when sending the list of paths `git archive paths...`.
// The actual limit is somewhere between 372KB and 450KB, and we want to be well under that. 100KB seems
// safe.
const MAX_TOTAL_PATHS_LENGTH = 100000

View File

@ -1,82 +0,0 @@
package symbols
import (
"context"
"fmt"
"os"
"path"
"testing"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/protocol"
"github.com/sourcegraph/sourcegraph/internal/testutil"
)
func BenchmarkSearch(b *testing.B) {
log15.Root().SetHandler(log15.LvlFilterHandler(log15.LvlError, log15.Root().GetHandler()))
service := Service{
FetchTar: testutil.FetchTarFromGithubWithPaths,
NewParser: NewParser,
Path: "/tmp/symbols-cache",
}
if err := service.Start(); err != nil {
b.Fatal(err)
}
ctx := context.Background()
b.ResetTimer()
indexTests := []protocol.SearchArgs{
{Repo: "github.com/sourcegraph/go-langserver", CommitID: "391a062a7d9977510e7e883e412769b07fed8b5e"},
{Repo: "github.com/moby/moby", CommitID: "6e5c2d639f67ae70f54d9f2285f3261440b074aa"},
}
queryTests := []protocol.SearchArgs{
{Repo: "github.com/sourcegraph/go-langserver", CommitID: "391a062a7d9977510e7e883e412769b07fed8b5e", Query: "^sortedImportRecord$", First: 10},
{Repo: "github.com/sourcegraph/go-langserver", CommitID: "391a062a7d9977510e7e883e412769b07fed8b5e", Query: "1234doesnotexist1234", First: 1},
{Repo: "github.com/moby/moby", CommitID: "6e5c2d639f67ae70f54d9f2285f3261440b074aa", Query: "^fsCache$", First: 10},
{Repo: "github.com/moby/moby", CommitID: "6e5c2d639f67ae70f54d9f2285f3261440b074aa", Query: "1234doesnotexist1234", First: 1},
}
runIndexTest := func(test protocol.SearchArgs) {
b.Run(fmt.Sprintf("indexing %s@%s", path.Base(string(test.Repo)), test.CommitID[:3]), func(b *testing.B) {
for n := 0; n < b.N; n++ {
tempFile, err := os.CreateTemp("", "")
if err != nil {
b.Fatal(err)
}
defer os.Remove(tempFile.Name())
err = service.writeAllSymbolsToNewDB(ctx, tempFile.Name(), test.Repo, test.CommitID)
if err != nil {
b.Fatal(err)
}
}
})
}
runQueryTest := func(test protocol.SearchArgs) {
b.Run(fmt.Sprintf("searching %s@%s %s", path.Base(string(test.Repo)), test.CommitID[:3], test.Query), func(b *testing.B) {
_, err := service.search(ctx, test)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := service.search(ctx, test)
if err != nil {
b.Fatal(err)
}
}
})
}
for _, test := range indexTests {
runIndexTest(test)
}
for _, test := range queryTests {
runQueryTest(test)
}
}

View File

@ -1,131 +0,0 @@
// Package symbols implements the symbol search service.
package symbols
import (
"context"
"io"
"log"
"net/http"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
)
// Service is the symbols service.
type Service struct {
// FetchTar returns an io.ReadCloser to a tar archive of a repository at the specified Git
// remote URL and commit ID. If the error implements "BadRequest() bool", it will be used to
// determine if the error is a bad request (eg invalid repo).
FetchTar func(context.Context, api.RepoName, api.CommitID, []string) (io.ReadCloser, error)
// GitDiff returns the paths that have changed between two commits.
GitDiff func(context.Context, api.RepoName, api.CommitID, api.CommitID) (*Changes, error)
// MaxConcurrentFetchTar is the maximum number of concurrent calls allowed
// to FetchTar. It defaults to 15.
MaxConcurrentFetchTar int
NewParser func() (ctags.Parser, error)
// NumParserProcesses is the maximum number of ctags parser child processes to run.
NumParserProcesses int
// Path is the directory in which to store the cache.
Path string
// MaxCacheSizeBytes is the maximum size of the cache in bytes. Note:
// We can temporarily be larger than MaxCacheSizeBytes. When we go
// over MaxCacheSizeBytes we trigger delete files until we get below
// MaxCacheSizeBytes.
MaxCacheSizeBytes int64
// cache is the disk backed cache.
cache *diskcache.Store
// fetchSem is a semaphore to limit concurrent calls to FetchTar. The
// semaphore size is controlled by MaxConcurrentFetchTar
fetchSem chan int
// pool of ctags parser child processes
parsers chan ctags.Parser
}
// Start must be called before any requests are handled.
func (s *Service) Start() error {
if err := s.startParsers(); err != nil {
return err
}
if s.MaxConcurrentFetchTar == 0 {
s.MaxConcurrentFetchTar = 15
}
s.fetchSem = make(chan int, s.MaxConcurrentFetchTar)
s.cache = &diskcache.Store{
Dir: s.Path,
Component: "symbols",
BackgroundTimeout: 20 * time.Minute,
}
go s.watchAndEvict()
return nil
}
// Handler returns the http.Handler that should be used to serve requests.
func (s *Service) Handler() http.Handler {
if s.parsers == nil {
panic("must call StartParserPool first")
}
mux := http.NewServeMux()
mux.HandleFunc("/search", s.handleSearch)
mux.HandleFunc("/healthz", s.handleHealthCheck)
return mux
}
func (s *Service) handleHealthCheck(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
_, err := w.Write([]byte("Ok"))
if err != nil {
log.Printf("failed to write response to health check, err: %s", err)
}
}
// watchAndEvict is a loop which periodically checks the size of the cache and
// evicts/deletes items if the store gets too large.
func (s *Service) watchAndEvict() {
if s.MaxCacheSizeBytes == 0 {
return
}
for {
time.Sleep(10 * time.Second)
stats, err := s.cache.Evict(s.MaxCacheSizeBytes)
if err != nil {
log.Printf("failed to Evict: %s", err)
continue
}
cacheSizeBytes.Set(float64(stats.CacheSize))
evictions.Add(float64(stats.Evicted))
}
}
var (
cacheSizeBytes = promauto.NewGauge(prometheus.GaugeOpts{
Name: "symbols_store_cache_size_bytes",
Help: "The total size of items in the on disk cache.",
})
evictions = promauto.NewCounter(prometheus.CounterOpts{
Name: "symbols_store_evictions",
Help: "The total number of items evicted from the cache.",
})
)

View File

@ -1,184 +0,0 @@
package symbols
import (
"archive/tar"
"bytes"
"context"
"io"
"net/http/httptest"
"os"
"reflect"
"testing"
"github.com/sourcegraph/go-ctags"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/httpcli"
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/search/result"
symbolsclient "github.com/sourcegraph/sourcegraph/internal/symbols"
)
func TestIsLiteralEquality(t *testing.T) {
type TestCase struct {
Regex string
WantOk bool
WantLiteral string
}
for _, test := range []TestCase{
{Regex: `^foo$`, WantLiteral: "foo", WantOk: true},
{Regex: `^[f]oo$`, WantLiteral: `foo`, WantOk: true},
{Regex: `^\\$`, WantLiteral: `\`, WantOk: true},
{Regex: `^\$`, WantOk: false},
{Regex: `^\($`, WantLiteral: `(`, WantOk: true},
{Regex: `\\`, WantOk: false},
{Regex: `\$`, WantOk: false},
{Regex: `\(`, WantOk: false},
{Regex: `foo$`, WantOk: false},
{Regex: `(^foo$|^bar$)`, WantOk: false},
} {
gotOk, gotLiteral, err := isLiteralEquality(test.Regex)
if err != nil {
t.Fatal(err)
}
if gotOk != test.WantOk {
t.Errorf("isLiteralEquality(%s) returned %t, wanted %t", test.Regex, gotOk, test.WantOk)
}
if gotLiteral != test.WantLiteral {
t.Errorf(
"isLiteralEquality(%s) returned the literal %s, wanted %s",
test.Regex,
gotLiteral,
test.WantLiteral,
)
}
}
}
func TestService(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "")
if err != nil {
t.Fatal(err)
}
defer func() { os.RemoveAll(tmpDir) }()
files := map[string]string{"a.js": "var x = 1"}
service := Service{
FetchTar: func(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (io.ReadCloser, error) {
return createTar(files)
},
NewParser: func() (ctags.Parser, error) {
return mockParser{"x", "y"}, nil
},
Path: tmpDir,
}
if err := service.Start(); err != nil {
t.Fatal(err)
}
server := httptest.NewServer(service.Handler())
defer server.Close()
client := symbolsclient.Client{
URL: server.URL,
HTTPClient: httpcli.InternalDoer,
}
x := result.Symbol{Name: "x", Path: "a.js"}
y := result.Symbol{Name: "y", Path: "a.js"}
tests := map[string]struct {
args search.SymbolsParameters
want result.Symbols
}{
"simple": {
args: search.SymbolsParameters{First: 10},
want: []result.Symbol{x, y},
},
"onematch": {
args: search.SymbolsParameters{Query: "x", First: 10},
want: []result.Symbol{x},
},
"nomatches": {
args: search.SymbolsParameters{Query: "foo", First: 10},
want: nil,
},
"caseinsensitiveexactmatch": {
args: search.SymbolsParameters{Query: "^X$", First: 10},
want: []result.Symbol{x},
},
"casesensitiveexactmatch": {
args: search.SymbolsParameters{Query: "^x$", IsCaseSensitive: true, First: 10},
want: []result.Symbol{x},
},
"casesensitivenoexactmatch": {
args: search.SymbolsParameters{Query: "^X$", IsCaseSensitive: true, First: 10},
want: nil,
},
"caseinsensitiveexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, First: 10},
want: []result.Symbol{x, y},
},
"casesensitiveexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^a.js$"}, IsCaseSensitive: true, First: 10},
want: []result.Symbol{x, y},
},
"casesensitivenoexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, IsCaseSensitive: true, First: 10},
want: nil,
},
"exclude": {
args: search.SymbolsParameters{ExcludePattern: "a.js", IsCaseSensitive: true, First: 10},
want: nil,
},
}
for label, test := range tests {
t.Run(label, func(t *testing.T) {
result, err := client.Search(context.Background(), test.args)
if err != nil {
t.Fatal(err)
}
if result != nil && !reflect.DeepEqual(*result, test.want) {
t.Errorf("got %+v, want %+v", *result, test.want)
}
if result == nil && test.want != nil {
t.Errorf("got nil, want %+v", test.want)
}
})
}
}
func createTar(files map[string]string) (io.ReadCloser, error) {
buf := new(bytes.Buffer)
w := tar.NewWriter(buf)
for name, body := range files {
hdr := &tar.Header{
Name: name,
Mode: 0o600,
Size: int64(len(body)),
}
if err := w.WriteHeader(hdr); err != nil {
return nil, err
}
if _, err := w.Write([]byte(body)); err != nil {
return nil, err
}
}
err := w.Close()
if err != nil {
return nil, err
}
return io.NopCloser(bytes.NewReader(buf.Bytes())), nil
}
type mockParser []string
func (m mockParser) Parse(name string, content []byte) ([]*ctags.Entry, error) {
entries := make([]*ctags.Entry, len(m))
for i, name := range m {
entries[i] = &ctags.Entry{Name: name, Path: "a.js"}
}
return entries, nil
}
func (mockParser) Close() {}

View File

@ -1,4 +1,4 @@
package protocol
package types
import (
"github.com/sourcegraph/sourcegraph/internal/api"

View File

@ -3,48 +3,65 @@
package main
import (
"bytes"
"context"
"fmt"
"io"
"log"
"net"
"net/http"
"os"
"os/signal"
"runtime"
"strconv"
"time"
"github.com/inconshreveable/log15"
"github.com/opentracing/opentracing-go"
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/symbols"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/api"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
sqlite "github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/janitor"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/database/writer"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/internal/conf"
"github.com/sourcegraph/sourcegraph/internal/debugserver"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
"github.com/sourcegraph/sourcegraph/internal/env"
"github.com/sourcegraph/sourcegraph/internal/gitserver"
"github.com/sourcegraph/sourcegraph/internal/goroutine"
"github.com/sourcegraph/sourcegraph/internal/httpserver"
"github.com/sourcegraph/sourcegraph/internal/logging"
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/profiler"
"github.com/sourcegraph/sourcegraph/internal/sentry"
"github.com/sourcegraph/sourcegraph/internal/trace"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
"github.com/sourcegraph/sourcegraph/internal/tracer"
"github.com/sourcegraph/sourcegraph/internal/vcs/git"
)
const port = "3184"
const addr = ":3184"
func main() {
var (
cacheDir = env.Get("CACHE_DIR", "/tmp/symbols-cache", "directory to store cached symbols")
cacheSizeMB = env.Get("SYMBOLS_CACHE_SIZE_MB", "100000", "maximum size of the disk cache in megabytes")
ctagsProcesses = env.Get("CTAGS_PROCESSES", strconv.Itoa(runtime.GOMAXPROCS(0)), "number of ctags child processes to run")
sanityCheck = env.Get("SANITY_CHECK", "false", "check that go-sqlite3 works then exit 0 if it's ok or 1 if not")
)
config.Load()
if sanityCheck == "true" {
// Set up Google Cloud Profiler when running in Cloud
if err := profiler.Init(); err != nil {
log.Fatalf("Failed to start profiler: %v", err)
}
env.Lock()
env.HandleHelpFlag()
conf.Init()
logging.Init()
tracer.Init(conf.DefaultClient())
sentry.Init(conf.DefaultClient())
trace.Init()
if err := config.Validate(); err != nil {
log.Fatalf("Failed to load configuration: %s", err)
}
if config.sanityCheck {
fmt.Print("Running sanity check...")
if err := symbols.SanityCheck(); err != nil {
if err := sqlite.SanityCheck(); err != nil {
fmt.Println("failed ❌", err)
os.Exit(1)
}
@ -53,112 +70,54 @@ func main() {
os.Exit(0)
}
env.Lock()
env.HandleHelpFlag()
log.SetFlags(0)
conf.Init()
logging.Init()
tracer.Init(conf.DefaultClient())
sentry.Init(conf.DefaultClient())
trace.Init()
// Initialize tracing/metrics
observationContext := &observation.Context{
Logger: log15.Root(),
Tracer: &trace.Tracer{Tracer: opentracing.GlobalTracer()},
Registerer: prometheus.DefaultRegisterer,
}
// Ready immediately
// Start debug server
ready := make(chan struct{})
close(ready)
go debugserver.NewServerRoutine(ready).Start()
service := symbols.Service{
FetchTar: func(ctx context.Context, repo api.RepoName, commit api.CommitID, paths []string) (io.ReadCloser, error) {
return gitserver.DefaultClient.Archive(ctx, repo, gitserver.ArchiveOptions{Treeish: string(commit), Format: "tar", Paths: paths})
},
GitDiff: func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) (*symbols.Changes, error) {
output, err := git.DiffSymbols(ctx, repo, commitA, commitB)
if err != nil {
return nil, err
}
ctagsParserFactory := parser.NewCtagsParserFactory(
config.ctagsCommand,
config.ctagsPatternLengthLimit,
config.ctagsLogErrors,
config.ctagsDebugLogs,
)
// The output is a repeated sequence of:
//
// <status> NUL <path> NUL
//
// where NUL is the 0 byte.
//
// Example:
//
// M NUL cmd/symbols/internal/symbols/fetch.go NUL
changes := symbols.NewChanges()
slices := bytes.Split(output, []byte{0})
for i := 0; i < len(slices)-1; i += 2 {
statusIdx := i
fileIdx := i + 1
if len(slices[statusIdx]) == 0 {
return nil, fmt.Errorf("unrecognized git diff output (from repo %q, commitA %q, commitB %q): status was empty at index %d", repo, commitA, commitB, i)
}
status := slices[statusIdx][0]
path := string(slices[fileIdx])
switch status {
case 'A':
changes.Added = append(changes.Added, path)
case 'M':
changes.Modified = append(changes.Modified, path)
case 'D':
changes.Deleted = append(changes.Deleted, path)
}
}
return &changes, nil
},
NewParser: symbols.NewParser,
Path: cacheDir,
cache := &diskcache.Store{
Dir: config.cacheDir,
Component: "symbols",
BackgroundTimeout: 20 * time.Minute,
}
if mb, err := strconv.ParseInt(cacheSizeMB, 10, 64); err != nil {
log.Fatalf("Invalid SYMBOLS_CACHE_SIZE_MB: %s", err)
} else {
service.MaxCacheSizeBytes = mb * 1000 * 1000
}
var err error
service.NumParserProcesses, err = strconv.Atoi(ctagsProcesses)
parserPool, err := parser.NewParserPool(ctagsParserFactory, config.numCtagsProcesses)
if err != nil {
log.Fatalf("Invalid CTAGS_PROCESSES: %s", err)
}
if err := service.Start(); err != nil {
log.Fatalln("Start:", err)
log.Fatalf("Failed to parser pool: %s", err)
}
handler := ot.Middleware(trace.HTTPTraceMiddleware(service.Handler(), conf.DefaultClient()))
database.Init()
gitserverClient := gitserver.NewClient(observationContext)
repositoryFetcher := fetcher.NewRepositoryFetcher(gitserverClient, 15, observationContext)
parser := parser.NewParser(parserPool, repositoryFetcher, observationContext)
databaseWriter := writer.NewDatabaseWriter(config.cacheDir, gitserverClient, parser)
cachedDatabaseWriter := writer.NewCachedDatabaseWriter(databaseWriter, cache)
apiHandler := api.NewHandler(cachedDatabaseWriter, observationContext)
host := ""
if env.InsecureDev {
host = "127.0.0.1"
}
addr := net.JoinHostPort(host, port)
server := &http.Server{
server := httpserver.NewFromAddr(addr, &http.Server{
ReadTimeout: 75 * time.Second,
WriteTimeout: 10 * time.Minute,
Addr: addr,
Handler: handler,
}
go shutdownOnSIGINT(server)
Handler: ot.Middleware(trace.HTTPTraceMiddleware(apiHandler, conf.DefaultClient())),
})
log15.Info("symbols: listening", "addr", addr)
err = server.ListenAndServe()
if err != http.ErrServerClosed {
log.Fatal(err)
}
}
evictionInterval := time.Second * 10
cacheSizeBytes := int64(config.cacheSizeMB) * 1000 * 1000
cacheEvicter := janitor.NewCacheEvicter(evictionInterval, cache, cacheSizeBytes, janitor.NewMetrics(observationContext))
func shutdownOnSIGINT(s *http.Server) {
c := make(chan os.Signal, 1)
signal.Notify(c, os.Interrupt)
<-c
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := s.Shutdown(ctx)
if err != nil {
log.Fatal("graceful server shutdown failed, will exit:", err)
}
// Mark health server as ready and go!
close(ready)
goroutine.MonitorBackgroundRoutines(context.Background(), server, cacheEvicter)
}

View File

@ -19,6 +19,8 @@ allowed_prefix=(
github.com/sourcegraph/sourcegraph/enterprise/cmd/worker
github.com/sourcegraph/sourcegraph/enterprise/cmd/repo-updater
github.com/sourcegraph/sourcegraph/enterprise/cmd/precise-code-intel-
# Doesn't connect but uses db internals for use with sqlite
github.com/sourcegraph/sourcegraph/cmd/symbols
)
# Create regex ^(a|b|c)

View File

@ -4145,52 +4145,6 @@ with your code hosts connections or networking issues affecting communication wi
<br />
## symbols: store_fetch_failures
<p class="subtitle">store fetch failures every 5m</p>
**Descriptions**
- <span class="badge badge-warning">warning</span> symbols: 5+ store fetch failures every 5m
**Possible solutions**
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-store-fetch-failures).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"warning_symbols_store_fetch_failures"
]
```
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<br />
## symbols: current_fetch_queue_size
<p class="subtitle">current fetch queue size</p>
**Descriptions**
- <span class="badge badge-warning">warning</span> symbols: 25+ current fetch queue size
**Possible solutions**
- Learn more about the related dashboard panel in the [dashboards reference](./dashboards.md#symbols-current-fetch-queue-size).
- **Silence this alert:** If you are aware of this alert and want to silence notifications for it, add the following to your site configuration and set a reminder to re-evaluate the alert:
```json
"observability.silenceAlerts": [
"warning_symbols_current_fetch_queue_size"
]
```
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<br />
## symbols: frontend_internal_api_error_responses
<p class="subtitle">frontend-internal API error responses every 5m by route</p>

View File

@ -10689,11 +10689,13 @@ Query: `sum by(app) (up{app=~".*searcher"}) / count by (app) (up{app=~".*searche
To see this dashboard, visit `/-/debug/grafana/d/symbols/symbols` on your Sourcegraph instance.
#### symbols: store_fetch_failures
### Symbols: Codeintel: Symbols API
<p class="subtitle">Store fetch failures every 5m</p>
#### symbols: codeintel_symbols_api_total
Refer to the [alert solutions reference](./alert_solutions.md#symbols-store-fetch-failures) for 1 alert related to this panel.
<p class="subtitle">Aggregate API operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100000` on your Sourcegraph instance.
@ -10702,17 +10704,17 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100000` o
<details>
<summary>Technical details</summary>
Query: `sum(increase(symbols_store_fetch_failed[5m]))`
Query: `sum(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: current_fetch_queue_size
#### symbols: codeintel_symbols_api_99th_percentile_duration
<p class="subtitle">Current fetch queue size</p>
<p class="subtitle">Aggregate successful API operation duration distribution over 5m</p>
Refer to the [alert solutions reference](./alert_solutions.md#symbols-current-fetch-queue-size) for 1 alert related to this panel.
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100001` on your Sourcegraph instance.
@ -10721,7 +10723,762 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100001` o
<details>
<summary>Technical details</summary>
Query: `sum(symbols_store_fetch_queue_size)`
Query: `sum by (le)(rate(src_codeintel_symbols_api_duration_seconds_bucket{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_api_errors_total
<p class="subtitle">Aggregate API operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100002` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_api_error_rate
<p class="subtitle">Aggregate API operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100003` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
<br />
#### symbols: codeintel_symbols_api_total
<p class="subtitle">API operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100010` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_api_99th_percentile_duration
<p class="subtitle">99th percentile successful API operation duration over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100011` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_api_duration_seconds_bucket{job=~"^symbols.*"}[5m])))`
</details>
<br />
#### symbols: codeintel_symbols_api_errors_total
<p class="subtitle">API operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100012` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_api_error_rate
<p class="subtitle">API operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100013` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_api_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_api_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
<br />
### Symbols: Codeintel: Symbols parser
#### symbols: symbols
<p class="subtitle">In-flight parse jobs</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100100` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `max(src_codeintel_symbols_parsing{job=~"^symbols.*"})`
</details>
<br />
#### symbols: symbols
<p class="subtitle">Parser queue size</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100101` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `max(src_codeintel_symbols_parse_queue_size{job=~"^symbols.*"})`
</details>
<br />
#### symbols: symbols
<p class="subtitle">Parse queue timeouts</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100102` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `max(src_codeintel_symbols_parse_queue_timeouts_total{job=~"^symbols.*"})`
</details>
<br />
#### symbols: symbols
<p class="subtitle">Parse failures every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100103` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `rate(src_codeintel_symbols_parse_failed_total{job=~"^symbols.*"}[5m])`
</details>
<br />
#### symbols: codeintel_symbols_parser_total
<p class="subtitle">Aggregate parser operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100110` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_parser_99th_percentile_duration
<p class="subtitle">Aggregate successful parser operation duration distribution over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100111` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (le)(rate(src_codeintel_symbols_parser_duration_seconds_bucket{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_parser_errors_total
<p class="subtitle">Aggregate parser operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100112` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_parser_error_rate
<p class="subtitle">Aggregate parser operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100113` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
<br />
#### symbols: codeintel_symbols_parser_total
<p class="subtitle">Parser operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100120` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_parser_99th_percentile_duration
<p class="subtitle">99th percentile successful parser operation duration over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100121` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_parser_duration_seconds_bucket{job=~"^symbols.*"}[5m])))`
</details>
<br />
#### symbols: codeintel_symbols_parser_errors_total
<p class="subtitle">Parser operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100122` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_parser_error_rate
<p class="subtitle">Parser operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100123` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_parser_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_parser_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
<br />
### Symbols: Codeintel: Symbols cache janitor
#### symbols: symbols
<p class="subtitle">Size in bytes of the on-disk cache</p>
no
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100200` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `src_codeintel_symbols_store_cache_size_bytes`
</details>
<br />
#### symbols: symbols
<p class="subtitle">Cache eviction operations every 5m</p>
no
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100201` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `rate(src_codeintel_symbols_store_evictions_total[5m])`
</details>
<br />
#### symbols: symbols
<p class="subtitle">Cache eviction operation errors every 5m</p>
no
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100202` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `rate(src_codeintel_symbols_store_errors_total[5m])`
</details>
<br />
### Symbols: Codeintel: Symbols repository fetcher
#### symbols: symbols
<p class="subtitle">In-flight repository fetch operations</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100300` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `src_codeintel_symbols_fetching`
</details>
<br />
#### symbols: symbols
<p class="subtitle">Repository fetch queue size</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100301` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `max(src_codeintel_symbols_fetch_queue_size{job=~"^symbols.*"})`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_total
<p class="subtitle">Aggregate fetcher operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100310` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_99th_percentile_duration
<p class="subtitle">Aggregate successful fetcher operation duration distribution over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100311` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (le)(rate(src_codeintel_symbols_repository_fetcher_duration_seconds_bucket{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_errors_total
<p class="subtitle">Aggregate fetcher operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100312` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_error_rate
<p class="subtitle">Aggregate fetcher operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100313` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_total
<p class="subtitle">Fetcher operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100320` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_99th_percentile_duration
<p class="subtitle">99th percentile successful fetcher operation duration over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100321` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_repository_fetcher_duration_seconds_bucket{job=~"^symbols.*"}[5m])))`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_errors_total
<p class="subtitle">Fetcher operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100322` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_repository_fetcher_error_rate
<p class="subtitle">Fetcher operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100323` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_repository_fetcher_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_repository_fetcher_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
<br />
### Symbols: Codeintel: Symbols gitserver client
#### symbols: codeintel_symbols_gitserver_total
<p class="subtitle">Aggregate gitserver client operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100400` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_gitserver_99th_percentile_duration
<p class="subtitle">Aggregate successful gitserver client operation duration distribution over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100401` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (le)(rate(src_codeintel_symbols_gitserver_duration_seconds_bucket{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_gitserver_errors_total
<p class="subtitle">Aggregate gitserver client operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100402` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_gitserver_error_rate
<p class="subtitle">Aggregate gitserver client operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100403` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m])) / (sum(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m])) + sum(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
<br />
#### symbols: codeintel_symbols_gitserver_total
<p class="subtitle">Gitserver client operations every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100410` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_gitserver_99th_percentile_duration
<p class="subtitle">99th percentile successful gitserver client operation duration over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100411` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `histogram_quantile(0.99, sum by (le,op)(rate(src_codeintel_symbols_gitserver_duration_seconds_bucket{job=~"^symbols.*"}[5m])))`
</details>
<br />
#### symbols: codeintel_symbols_gitserver_errors_total
<p class="subtitle">Gitserver client operation errors every 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100412` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))`
</details>
<br />
#### symbols: codeintel_symbols_gitserver_error_rate
<p class="subtitle">Gitserver client operation error rate over 5m</p>
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100413` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
<details>
<summary>Technical details</summary>
Query: `sum by (op)(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m])) / (sum by (op)(increase(src_codeintel_symbols_gitserver_total{job=~"^symbols.*"}[5m])) + sum by (op)(increase(src_codeintel_symbols_gitserver_errors_total{job=~"^symbols.*"}[5m]))) * 100`
</details>
@ -10735,7 +11492,7 @@ Query: `sum(symbols_store_fetch_queue_size)`
Refer to the [alert solutions reference](./alert_solutions.md#symbols-frontend-internal-api-error-responses) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100100` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100500` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10766,7 +11523,7 @@ value change independent of deployment events (such as an upgrade), it could ind
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100200` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100600` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10785,7 +11542,7 @@ Query: `count by(name) ((time() - container_last_seen{name=~"^symbols.*"}) > 60)
Refer to the [alert solutions reference](./alert_solutions.md#symbols-container-cpu-usage) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100201` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100601` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10804,7 +11561,7 @@ Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^symbols.*"}`
Refer to the [alert solutions reference](./alert_solutions.md#symbols-container-memory-usage) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100202` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100602` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10826,7 +11583,7 @@ When extremely high, this can indicate a resource usage problem, or can cause pr
This panel has no related alerts.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100203` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100603` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Core application team](https://handbook.sourcegraph.com/engineering/core-application).*</sub>
@ -10847,7 +11604,7 @@ Query: `sum by(name) (rate(container_fs_reads_total{name=~"^symbols.*"}[1h]) + r
Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100300` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100700` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10866,7 +11623,7 @@ Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{na
Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-long-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100301` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100701` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10885,7 +11642,7 @@ Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^s
Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100310` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100710` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10904,7 +11661,7 @@ Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^symb
Refer to the [alert solutions reference](./alert_solutions.md#symbols-provisioning-container-memory-usage-short-term) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100311` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100711` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10927,7 +11684,7 @@ A high value here indicates a possible goroutine leak.
Refer to the [alert solutions reference](./alert_solutions.md#symbols-go-goroutines) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100400` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100800` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10946,7 +11703,7 @@ Query: `max by(instance) (go_goroutines{job=~".*symbols"})`
Refer to the [alert solutions reference](./alert_solutions.md#symbols-go-gc-duration-seconds) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100401` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100801` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>
@ -10967,7 +11724,7 @@ Query: `max by(instance) (go_gc_duration_seconds{job=~".*symbols"})`
Refer to the [alert solutions reference](./alert_solutions.md#symbols-pods-available-percentage) for 1 alert related to this panel.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100500` on your Sourcegraph instance.
To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100900` on your Sourcegraph instance.
<sub>*Managed by the [Sourcegraph Code-intel team](https://handbook.sourcegraph.com/engineering/code-intelligence).*</sub>

View File

@ -134,6 +134,7 @@ func (s *Store) CreateBatchSpecWorkspace(ctx context.Context, ws ...*btypes.Batc
ctx,
s.Handle().DB(),
"batch_spec_workspaces",
batch.MaxNumPostgresParameters,
batchSpecWorkspaceInsertColumns,
"",
BatchSpecWorkspaceColums,

View File

@ -107,6 +107,7 @@ func (s *Store) CreateChangesetJob(ctx context.Context, cs ...*btypes.ChangesetJ
ctx,
s.Handle().DB(),
"changeset_jobs",
batch.MaxNumPostgresParameters,
changesetJobInsertColumns,
"",
changesetJobColumns,

View File

@ -127,6 +127,7 @@ func (s *Store) CreateChangesetSpec(ctx context.Context, cs ...*btypes.Changeset
ctx,
s.Handle().DB(),
"changeset_specs",
batch.MaxNumPostgresParameters,
changesetSpecInsertColumns,
"",
changesetSpecColumns,

View File

@ -84,6 +84,7 @@ func CreateBatchSpecWorkspaceExecutionJob(ctx context.Context, s createBatchSpec
ctx,
s.Handle().DB(),
"batch_spec_workspace_execution_jobs",
batch.MaxNumPostgresParameters,
[]string{"batch_spec_workspace_id", "created_at", "updated_at"},
"",
[]string{

View File

@ -423,6 +423,7 @@ func (s *Store) writeVisibleUploads(ctx context.Context, sanitizedInput *sanitiz
ctx,
s.Handle().DB(),
"t_lsif_nearest_uploads",
batch.MaxNumPostgresParameters,
[]string{"commit_bytea", "uploads"},
sanitizedInput.nearestUploadsRowValues,
)
@ -436,6 +437,7 @@ func (s *Store) writeVisibleUploads(ctx context.Context, sanitizedInput *sanitiz
ctx,
s.Handle().DB(),
"t_lsif_nearest_uploads_links",
batch.MaxNumPostgresParameters,
[]string{"commit_bytea", "ancestor_commit_bytea", "distance"},
sanitizedInput.nearestUploadsLinksRowValues,
)
@ -448,6 +450,7 @@ func (s *Store) writeVisibleUploads(ctx context.Context, sanitizedInput *sanitiz
ctx,
s.Handle().DB(),
"t_lsif_uploads_visible_at_tip",
batch.MaxNumPostgresParameters,
[]string{"upload_id", "branch_or_tag_name", "is_default_branch"},
sanitizedInput.uploadsVisibleAtTipRowValues,
)

View File

@ -38,6 +38,7 @@ func (s *Store) UpdatePackages(ctx context.Context, dumpID int, packages []preci
ctx,
tx.Handle().DB(),
"t_lsif_packages",
batch.MaxNumPostgresParameters,
[]string{"scheme", "name", "version"},
loadPackagesChannel(packages),
); err != nil {

View File

@ -38,6 +38,7 @@ func (s *Store) UpdatePackageReferences(ctx context.Context, dumpID int, referen
ctx,
tx.Handle().DB(),
"t_lsif_references",
batch.MaxNumPostgresParameters,
[]string{"scheme", "name", "version", "filter"},
loadReferencesChannel(references),
); err != nil {

View File

@ -303,6 +303,6 @@ ON CONFLICT DO NOTHING
// the maximum number of CPUs that can be executing simultaneously.
func withBatchInserter(ctx context.Context, db dbutil.DB, tableName string, columns []string, f func(inserter *batch.Inserter) error) (err error) {
return goroutine.RunWorkers(goroutine.SimplePoolWorker(func() error {
return batch.WithInserter(ctx, db, tableName, columns, f)
return batch.WithInserter(ctx, db, tableName, batch.MaxNumPostgresParameters, columns, f)
}))
}

View File

@ -434,6 +434,7 @@ func (s *Store) upsertTags(ctx context.Context, tags []string, tableSuffix strin
ctx,
tx.Handle().DB(),
"t_lsif_data_docs_search_tags_"+tableSuffix,
batch.MaxNumPostgresParameters,
[]string{"tags", "tsv"},
inserter,
); err != nil {
@ -566,6 +567,7 @@ func (s *Store) replaceSearchRecords(
ctx,
tx.Handle().DB(),
"t_lsif_data_docs_search_"+tableSuffix,
batch.MaxNumPostgresParameters,
[]string{
"path_id",
"detail",

View File

@ -366,6 +366,7 @@ func (m *Migrator) updateBatch(ctx context.Context, tx *lsifstore.Store, dumpID,
ctx,
tx.Handle().DB(),
temporaryTableName,
batch.MaxNumPostgresParameters,
m.temporaryTableFieldNames,
rowValues,
); err != nil {

View File

@ -34,8 +34,8 @@ type ReturningScanner func(rows *sql.Rows) error
// column names, then reads from the given channel as if they specify values for a single row.
// The inserter will be flushed and any error that occurred during insertion or flush will be
// returned.
func InsertValues(ctx context.Context, db dbutil.DB, tableName string, columnNames []string, values <-chan []interface{}) error {
return WithInserter(ctx, db, tableName, columnNames, func(inserter *Inserter) error {
func InsertValues(ctx context.Context, db dbutil.DB, tableName string, maxNumParameters int, columnNames []string, values <-chan []interface{}) error {
return WithInserter(ctx, db, tableName, maxNumParameters, columnNames, func(inserter *Inserter) error {
outer:
for {
select {
@ -66,10 +66,11 @@ func WithInserter(
ctx context.Context,
db dbutil.DB,
tableName string,
maxNumParameters int,
columnNames []string,
f func(inserter *Inserter) error,
) (err error) {
inserter := NewInserter(ctx, db, tableName, columnNames...)
inserter := NewInserter(ctx, db, tableName, maxNumParameters, columnNames...)
return with(ctx, inserter, f)
}
@ -82,13 +83,14 @@ func WithInserterWithReturn(
ctx context.Context,
db dbutil.DB,
tableName string,
maxNumParameters int,
columnNames []string,
onConflictClause string,
returningColumnNames []string,
returningScanner ReturningScanner,
f func(inserter *Inserter) error,
) (err error) {
inserter := NewInserterWithReturn(ctx, db, tableName, columnNames, onConflictClause, returningColumnNames, returningScanner)
inserter := NewInserterWithReturn(ctx, db, tableName, maxNumParameters, columnNames, onConflictClause, returningColumnNames, returningScanner)
return with(ctx, inserter, f)
}
@ -104,8 +106,8 @@ func with(ctx context.Context, inserter *Inserter, f func(inserter *Inserter) er
// NewInserter creates a new batch inserter using the given database handle, table name,
// and column names. For performance and atomicity, handle should be a transaction.
func NewInserter(ctx context.Context, db dbutil.DB, tableName string, columnNames ...string) *Inserter {
return NewInserterWithReturn(ctx, db, tableName, columnNames, "", nil, nil)
func NewInserter(ctx context.Context, db dbutil.DB, tableName string, maxNumParameters int, columnNames ...string) *Inserter {
return NewInserterWithReturn(ctx, db, tableName, maxNumParameters, columnNames, "", nil, nil)
}
// NewInserterWithReturn creates a new batch inserter using the given database handle, table
@ -118,15 +120,16 @@ func NewInserterWithReturn(
ctx context.Context,
db dbutil.DB,
tableName string,
maxNumParameters int,
columnNames []string,
onConflictClause string,
returningColumnNames []string,
returningScanner ReturningScanner,
) *Inserter {
numColumns := len(columnNames)
maxBatchSize := getMaxBatchSize(numColumns)
maxBatchSize := getMaxBatchSize(numColumns, maxNumParameters)
queryPrefix := makeQueryPrefix(tableName, columnNames)
querySuffix := makeQuerySuffix(numColumns)
querySuffix := makeQuerySuffix(numColumns, maxNumParameters)
onConflictSuffix := makeOnConflictSuffix(onConflictClause)
returningSuffix := makeReturningSuffix(returningColumnNames)
@ -215,13 +218,17 @@ func (i *Inserter) makeQuery(numValues int) string {
return i.queryPrefix + i.querySuffix[:suffixLength] + i.onConflictSuffix + i.returningSuffix
}
// maxNumPostgresParameters is the maximum number of placeholder variables allowed by Postgres
// MaxNumPostgresParameters is the maximum number of placeholder variables allowed by Postgres
// in a single insert statement.
const maxNumParameters = 32767
const MaxNumPostgresParameters = 32767
// MaxNumSQLiteParameters is the maximum number of placeholder variables allowed by SQLite
// in a single insert statement.
const MaxNumSQLiteParameters = 999
// getMaxBatchSize returns the number of rows that can be inserted into a single table with the
// given number of columns via a single insert statement.
func getMaxBatchSize(numColumns int) int {
func getMaxBatchSize(numColumns, maxNumParameters int) int {
return (maxNumParameters / numColumns) * numColumns
}
@ -249,7 +256,7 @@ var querySuffixCacheMutex sync.Mutex
// substring index is efficient.
//
// This method is memoized.
func makeQuerySuffix(numColumns int) string {
func makeQuerySuffix(numColumns, maxNumParameters int) string {
querySuffixCacheMutex.Lock()
defer querySuffixCacheMutex.Unlock()
if cache, ok := querySuffixCache[numColumns]; ok {

View File

@ -51,7 +51,7 @@ func TestBatchInserterWithReturn(t *testing.T) {
setupTestTable(t, db)
tableSizeFactor := 2
numRows := maxNumParameters * tableSizeFactor
numRows := MaxNumPostgresParameters * tableSizeFactor
expectedValues := makeTestValues(tableSizeFactor, 0)
var expectedIDs []int
@ -73,7 +73,7 @@ func TestBatchInserterWithReturnWithConflicts(t *testing.T) {
tableSizeFactor := 2
duplicationFactor := 2
numRows := maxNumParameters * tableSizeFactor
numRows := MaxNumPostgresParameters * tableSizeFactor
expectedValues := makeTestValues(tableSizeFactor, 0)
var expectedIDs []int
@ -134,7 +134,7 @@ func setupTestTable(t testing.TB, db *sql.DB) {
func makeTestValues(tableSizeFactor, payloadSize int) [][]interface{} {
var expectedValues [][]interface{}
for i := 0; i < maxNumParameters*tableSizeFactor; i++ {
for i := 0; i < MaxNumPostgresParameters*tableSizeFactor; i++ {
expectedValues = append(expectedValues, []interface{}{
i,
i + 1,
@ -159,7 +159,7 @@ func makePayload(size int) string {
func testInsert(t testing.TB, db *sql.DB, expectedValues [][]interface{}) {
ctx := context.Background()
inserter := NewInserter(ctx, db, "batch_inserter_test", "col1", "col2", "col3", "col4", "col5")
inserter := NewInserter(ctx, db, "batch_inserter_test", MaxNumPostgresParameters, "col1", "col2", "col3", "col4", "col5")
for _, values := range expectedValues {
if err := inserter.Insert(ctx, values...); err != nil {
t.Fatalf("unexpected error inserting values: %s", err)
@ -178,6 +178,7 @@ func testInsertWithReturn(t testing.TB, db *sql.DB, expectedValues [][]interface
ctx,
db,
"batch_inserter_test",
MaxNumPostgresParameters,
[]string{"col1", "col2", "col3", "col4", "col5"},
"",
[]string{"id"},
@ -212,6 +213,7 @@ func testInsertWithReturnWithConflicts(t testing.TB, db *sql.DB, n int, expected
ctx,
db,
"batch_inserter_test",
MaxNumPostgresParameters,
[]string{"id", "col1", "col2", "col3", "col4", "col5"},
"ON CONFLICT DO NOTHING",
[]string{"id"},

View File

@ -226,6 +226,7 @@ func (l *eventLogStore) BulkInsert(ctx context.Context, events []*Event) error {
ctx,
l.Handle().DB(),
"event_logs",
batch.MaxNumPostgresParameters,
[]string{
"name",
"url",

View File

@ -848,3 +848,221 @@ func (codeIntelligence) NewDependencyReposStoreGroup(containerName string) monit
},
})
}
func (codeIntelligence) NewSymbolsAPIGroup(containerName string) monitoring.Group {
return Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{
GroupConstructorOptions: GroupConstructorOptions{
Namespace: "codeintel",
DescriptionRoot: "Symbols API",
Hidden: false,
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "codeintel_symbols_api",
MetricDescriptionRoot: "API",
Filters: []string{},
By: []string{"op"},
},
},
SharedObservationGroupOptions: SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
Aggregate: &SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
})
}
func (codeIntelligence) NewSymbolsParserGroup(containerName string) monitoring.Group {
group := Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{
GroupConstructorOptions: GroupConstructorOptions{
Namespace: "codeintel",
DescriptionRoot: "Symbols parser",
Hidden: false,
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "codeintel_symbols_parser",
MetricDescriptionRoot: "parser",
Filters: []string{},
By: []string{"op"},
},
},
SharedObservationGroupOptions: SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
Aggregate: &SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
})
queueRow := monitoring.Row{
{
Name: containerName,
Description: "in-flight parse jobs",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "max(src_codeintel_symbols_parsing{job=~\"^symbols.*\"})",
NoAlert: true,
Interpretation: "none",
Panel: monitoring.Panel(),
},
{
Name: containerName,
Description: "parser queue size",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "max(src_codeintel_symbols_parse_queue_size{job=~\"^symbols.*\"})",
NoAlert: true,
Interpretation: "none",
Panel: monitoring.Panel(),
},
{
Name: containerName,
Description: "parse queue timeouts",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "max(src_codeintel_symbols_parse_queue_timeouts_total{job=~\"^symbols.*\"})",
NoAlert: true,
Interpretation: "none",
Panel: monitoring.Panel(),
},
{
Name: containerName,
Description: "parse failures every 5m",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "rate(src_codeintel_symbols_parse_failed_total{job=~\"^symbols.*\"}[5m])",
NoAlert: true,
Interpretation: "none",
Panel: monitoring.Panel(),
},
}
group.Rows = append([]monitoring.Row{queueRow}, group.Rows...)
return group
}
func (codeIntelligence) NewSymbolsCacheJanitorGroup(containerName string) monitoring.Group {
return monitoring.Group{
Title: fmt.Sprintf("%s: %s", "Codeintel", "Symbols cache janitor"),
Hidden: true,
Rows: []monitoring.Row{
{
{
Name: containerName,
Description: "size in bytes of the on-disk cache",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "src_codeintel_symbols_store_cache_size_bytes",
NoAlert: true,
Interpretation: "no",
Panel: monitoring.Panel().Unit(monitoring.Bytes),
},
{
Name: containerName,
Description: "cache eviction operations every 5m",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "rate(src_codeintel_symbols_store_evictions_total[5m])",
NoAlert: true,
Interpretation: "no",
Panel: monitoring.Panel(),
},
{
Name: containerName,
Description: "cache eviction operation errors every 5m",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "rate(src_codeintel_symbols_store_errors_total[5m])",
NoAlert: true,
Interpretation: "no",
Panel: monitoring.Panel(),
},
},
},
}
}
func (codeIntelligence) NewSymbolsRepositoryFetcherGroup(containerName string) monitoring.Group {
group := Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{
GroupConstructorOptions: GroupConstructorOptions{
Namespace: "codeintel",
DescriptionRoot: "Symbols repository fetcher",
Hidden: true,
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "codeintel_symbols_repository_fetcher",
MetricDescriptionRoot: "fetcher",
Filters: []string{},
By: []string{"op"},
},
},
SharedObservationGroupOptions: SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
Aggregate: &SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
})
queueRow := monitoring.Row{
{
Name: containerName,
Description: "in-flight repository fetch operations",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "src_codeintel_symbols_fetching",
NoAlert: true,
Interpretation: "none",
Panel: monitoring.Panel(),
},
{
Name: containerName,
Description: "repository fetch queue size",
Owner: monitoring.ObservableOwnerCodeIntel,
Query: "max(src_codeintel_symbols_fetch_queue_size{job=~\"^symbols.*\"})",
NoAlert: true,
Interpretation: "none",
Panel: monitoring.Panel(),
},
}
group.Rows = append([]monitoring.Row{queueRow}, group.Rows...)
return group
}
func (codeIntelligence) NewSymbolsGitserverClientGroup(containerName string) monitoring.Group {
return Observation.NewGroup(containerName, monitoring.ObservableOwnerCodeIntel, ObservationGroupOptions{
GroupConstructorOptions: GroupConstructorOptions{
Namespace: "codeintel",
DescriptionRoot: "Symbols gitserver client",
Hidden: true,
ObservableConstructorOptions: ObservableConstructorOptions{
MetricNameRoot: "codeintel_symbols_gitserver",
MetricDescriptionRoot: "gitserver client",
Filters: []string{},
By: []string{"op"},
},
},
SharedObservationGroupOptions: SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
Aggregate: &SharedObservationGroupOptions{
Total: NoAlertsOption("none"),
Duration: NoAlertsOption("none"),
Errors: NoAlertsOption("none"),
ErrorRate: NoAlertsOption("none"),
},
})
}

View File

@ -13,31 +13,11 @@ func Symbols() *monitoring.Container {
Title: "Symbols",
Description: "Handles symbol searches for unindexed branches.",
Groups: []monitoring.Group{
{
Title: "General",
Rows: []monitoring.Row{
{
{
Name: "store_fetch_failures",
Description: "store fetch failures every 5m",
Query: `sum(increase(symbols_store_fetch_failed[5m]))`,
Warning: monitoring.Alert().GreaterOrEqual(5, nil),
Panel: monitoring.Panel().LegendFormat("failures"),
Owner: monitoring.ObservableOwnerCodeIntel,
PossibleSolutions: "none",
},
{
Name: "current_fetch_queue_size",
Description: "current fetch queue size",
Query: `sum(symbols_store_fetch_queue_size)`,
Warning: monitoring.Alert().GreaterOrEqual(25, nil),
Panel: monitoring.Panel().LegendFormat("size"),
Owner: monitoring.ObservableOwnerCodeIntel,
PossibleSolutions: "none",
},
},
},
},
shared.CodeIntelligence.NewSymbolsAPIGroup(containerName),
shared.CodeIntelligence.NewSymbolsParserGroup(containerName),
shared.CodeIntelligence.NewSymbolsCacheJanitorGroup(containerName),
shared.CodeIntelligence.NewSymbolsRepositoryFetcherGroup(containerName),
shared.CodeIntelligence.NewSymbolsGitserverClientGroup(containerName),
shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),
shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerCodeIntel, nil),

View File

@ -804,6 +804,9 @@ commandsets:
- minio
- precise-code-intel-worker
- codeintel-executor
- jaeger
- grafana
- prometheus
enterprise-codeinsights:
requiresDevPrivate: true