mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 15:51:43 +00:00
3068 lines
98 KiB
Go
3068 lines
98 KiB
Go
// Package server implements the gitserver service.
|
|
package server
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"container/list"
|
|
"context"
|
|
"crypto/sha256"
|
|
"encoding/gob"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"sync/atomic"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/opentracing/opentracing-go/ext"
|
|
otlog "github.com/opentracing/opentracing-go/log"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
|
"go.opentelemetry.io/otel/attribute"
|
|
"golang.org/x/sync/errgroup"
|
|
"golang.org/x/sync/semaphore"
|
|
"golang.org/x/time/rate"
|
|
|
|
"github.com/sourcegraph/log"
|
|
|
|
"github.com/sourcegraph/sourcegraph/cmd/gitserver/server/internal/accesslog"
|
|
"github.com/sourcegraph/sourcegraph/internal/actor"
|
|
"github.com/sourcegraph/sourcegraph/internal/api"
|
|
"github.com/sourcegraph/sourcegraph/internal/conf"
|
|
"github.com/sourcegraph/sourcegraph/internal/database"
|
|
"github.com/sourcegraph/sourcegraph/internal/env"
|
|
"github.com/sourcegraph/sourcegraph/internal/fileutil"
|
|
"github.com/sourcegraph/sourcegraph/internal/gitserver"
|
|
"github.com/sourcegraph/sourcegraph/internal/gitserver/adapters"
|
|
"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"
|
|
"github.com/sourcegraph/sourcegraph/internal/gitserver/protocol"
|
|
"github.com/sourcegraph/sourcegraph/internal/gitserver/search"
|
|
"github.com/sourcegraph/sourcegraph/internal/honey"
|
|
"github.com/sourcegraph/sourcegraph/internal/lazyregexp"
|
|
"github.com/sourcegraph/sourcegraph/internal/mutablelimiter"
|
|
"github.com/sourcegraph/sourcegraph/internal/observation"
|
|
"github.com/sourcegraph/sourcegraph/internal/ratelimit"
|
|
streamhttp "github.com/sourcegraph/sourcegraph/internal/search/streaming/http"
|
|
"github.com/sourcegraph/sourcegraph/internal/syncx"
|
|
"github.com/sourcegraph/sourcegraph/internal/trace"
|
|
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
|
|
"github.com/sourcegraph/sourcegraph/internal/types"
|
|
"github.com/sourcegraph/sourcegraph/internal/vcs"
|
|
"github.com/sourcegraph/sourcegraph/internal/wrexec"
|
|
"github.com/sourcegraph/sourcegraph/lib/errors"
|
|
)
|
|
|
|
// tempDirName is the name used for the temporary directory under ReposDir.
|
|
const tempDirName = ".tmp"
|
|
|
|
// P4HomeName is the name used for the directory that git p4 will use as $HOME
|
|
// and where it will store cache data.
|
|
const P4HomeName = ".p4home"
|
|
|
|
// traceLogs is controlled via the env SRC_GITSERVER_TRACE. If true we trace
|
|
// logs to stderr
|
|
var traceLogs bool
|
|
|
|
var (
|
|
lastCheckAt = make(map[api.RepoName]time.Time)
|
|
lastCheckMutex sync.Mutex
|
|
)
|
|
|
|
// debounce() provides some filtering to prevent spammy requests for the same
|
|
// repository. If the last fetch of the repository was within the given
|
|
// duration, returns false, otherwise returns true and updates the last
|
|
// fetch stamp.
|
|
func debounce(name api.RepoName, since time.Duration) bool {
|
|
lastCheckMutex.Lock()
|
|
defer lastCheckMutex.Unlock()
|
|
if t, ok := lastCheckAt[name]; ok && time.Now().Before(t.Add(since)) {
|
|
return false
|
|
}
|
|
lastCheckAt[name] = time.Now()
|
|
return true
|
|
}
|
|
|
|
func init() {
|
|
traceLogs, _ = strconv.ParseBool(env.Get("SRC_GITSERVER_TRACE", "false", "Toggles trace logging to stderr"))
|
|
}
|
|
|
|
// runCommandMock is set by tests. When non-nil it is run instead of
|
|
// runCommand
|
|
var runCommandMock func(context.Context, *exec.Cmd) (int, error)
|
|
|
|
// runCommand runs the command and returns the exit status. All clients of this function should set the context
|
|
// in cmd themselves, but we have to pass the context separately here for the sake of tracing.
|
|
func runCommand(ctx context.Context, cmd wrexec.Cmder) (exitCode int, err error) {
|
|
if runCommandMock != nil {
|
|
return runCommandMock(ctx, cmd.Unwrap())
|
|
}
|
|
span, _ := ot.StartSpanFromContext(ctx, "runCommand") //nolint:staticcheck // OT is deprecated
|
|
span.SetTag("path", cmd.Unwrap().Path)
|
|
span.SetTag("args", cmd.Unwrap().Args)
|
|
span.SetTag("dir", cmd.Unwrap().Dir)
|
|
defer func() {
|
|
if err != nil {
|
|
ext.Error.Set(span, true)
|
|
span.SetTag("err", err.Error())
|
|
span.SetTag("exitCode", exitCode)
|
|
}
|
|
span.Finish()
|
|
}()
|
|
|
|
err = cmd.Run()
|
|
exitStatus := -10810 // sentinel value to indicate not set
|
|
if cmd.Unwrap().ProcessState != nil { // is nil if process failed to start
|
|
exitStatus = cmd.Unwrap().ProcessState.Sys().(syscall.WaitStatus).ExitStatus()
|
|
}
|
|
return exitStatus, err
|
|
}
|
|
|
|
// runCommandGraceful runs the command and returns the exit status. If the
|
|
// supplied context is cancelled we attempt to send SIGINT to the command to
|
|
// allow it to gracefully shutdown. All clients of this function should pass in a
|
|
// command *without* a context.
|
|
func runCommandGraceful(ctx context.Context, logger log.Logger, cmd wrexec.Cmder) (exitCode int, err error) {
|
|
span, _ := ot.StartSpanFromContext(ctx, "runCommandGraceful") //nolint:staticcheck // OT is deprecated
|
|
c := cmd.Unwrap()
|
|
span.SetTag("path", c.Path)
|
|
span.SetTag("args", c.Args)
|
|
span.SetTag("dir", c.Dir)
|
|
defer func() {
|
|
if err != nil {
|
|
ext.Error.Set(span, true)
|
|
span.SetTag("err", err.Error())
|
|
span.SetTag("exitCode", exitCode)
|
|
}
|
|
span.Finish()
|
|
}()
|
|
|
|
exitCode = -10810 // sentinel value to indicate not set
|
|
err = cmd.Start()
|
|
if err != nil {
|
|
return exitCode, err
|
|
}
|
|
|
|
done := make(chan struct{})
|
|
go func() {
|
|
defer close(done)
|
|
err = cmd.Wait()
|
|
if err != nil {
|
|
logger.Error("running command", log.Error(err))
|
|
}
|
|
}()
|
|
|
|
// Wait for command to exit or context to be done
|
|
select {
|
|
case <-ctx.Done():
|
|
logger.Debug("context cancelled, sending SIGINT")
|
|
// Attempt to send SIGINT
|
|
if err := cmd.Unwrap().Process.Signal(syscall.SIGINT); err != nil {
|
|
logger.Warn("Sending SIGINT to command", log.Error(err))
|
|
if err := cmd.Unwrap().Process.Kill(); err != nil {
|
|
logger.Warn("killing process", log.Error(err))
|
|
}
|
|
return exitCode, err
|
|
}
|
|
// Now, continue waiting for command for up to two seconds before killing it
|
|
timer := time.NewTimer(2 * time.Second)
|
|
select {
|
|
case <-done:
|
|
logger.Debug("process exited after SIGINT sent")
|
|
timer.Stop()
|
|
if err == nil {
|
|
exitCode = 0
|
|
}
|
|
case <-timer.C:
|
|
logger.Debug("timed out, killing process")
|
|
if err := cmd.Unwrap().Process.Kill(); err != nil {
|
|
logger.Warn("killing process", log.Error(err))
|
|
}
|
|
logger.Debug("process killed, waiting for done")
|
|
// Wait again to ensure we can access cmd.ProcessState below
|
|
<-done
|
|
}
|
|
|
|
if exitError, ok := err.(*exec.ExitError); ok {
|
|
exitCode = exitError.ExitCode()
|
|
}
|
|
err = ctx.Err()
|
|
return exitCode, err
|
|
case <-done:
|
|
// Happy path, command exits
|
|
}
|
|
|
|
if exitError, ok := err.(*exec.ExitError); ok {
|
|
exitCode = exitError.ExitCode()
|
|
}
|
|
if err == nil {
|
|
exitCode = 0
|
|
}
|
|
return exitCode, err
|
|
}
|
|
|
|
// cloneJob abstracts away a repo and necessary metadata to clone it. In the future it may be
|
|
// possible to simplify this, but to do that, doClone will need to do a lot less than it does at the
|
|
// moment.
|
|
type cloneJob struct {
|
|
repo api.RepoName
|
|
dir GitDir
|
|
syncer VCSSyncer
|
|
|
|
// TODO: cloneJobConsumer should acquire a new lock. We are trying to keep the changes simple
|
|
// for the time being. When we start using the new approach of using long lived goroutines for
|
|
// cloning we will refactor doClone to acquire a new lock.
|
|
lock *RepositoryLock
|
|
|
|
remoteURL *vcs.URL
|
|
options *cloneOptions
|
|
}
|
|
|
|
// cloneQueue is a threadsafe list.List of cloneJobs that functions as a queue in practice.
|
|
type cloneQueue struct {
|
|
mu sync.Mutex
|
|
jobs *list.List
|
|
|
|
cmu sync.Mutex
|
|
cond *sync.Cond
|
|
}
|
|
|
|
// push will queue the cloneJob to the end of the queue.
|
|
func (c *cloneQueue) push(cj *cloneJob) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
c.jobs.PushBack(cj)
|
|
c.cond.Signal()
|
|
}
|
|
|
|
// pop will return the next cloneJob. If there's no next job available, it returns nil.
|
|
func (c *cloneQueue) pop() *cloneJob {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
next := c.jobs.Front()
|
|
if next == nil {
|
|
return nil
|
|
}
|
|
|
|
return c.jobs.Remove(next).(*cloneJob)
|
|
}
|
|
|
|
func (c *cloneQueue) empty() bool {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
return c.jobs.Len() == 0
|
|
}
|
|
|
|
// NewCloneQueue initializes a new cloneQueue.
|
|
func NewCloneQueue(jobs *list.List) *cloneQueue {
|
|
cq := cloneQueue{jobs: jobs}
|
|
cq.cond = sync.NewCond(&cq.cmu)
|
|
|
|
return &cq
|
|
}
|
|
|
|
// Server is a gitserver server.
|
|
type Server struct {
|
|
// Logger should be used for all logging and logger creation.
|
|
Logger log.Logger
|
|
|
|
// ObservationCtx is used to initialize an operations struct
|
|
// with the appropriate metrics register etc.
|
|
ObservationCtx *observation.Context
|
|
|
|
// ReposDir is the path to the base directory for gitserver storage.
|
|
ReposDir string
|
|
|
|
// DesiredPercentFree is the desired percentage of disk space to keep free.
|
|
DesiredPercentFree int
|
|
|
|
// DiskSizer tells how much disk is free and how large the disk is.
|
|
DiskSizer DiskSizer
|
|
|
|
// GetRemoteURLFunc is a function which returns the remote URL for a
|
|
// repository. This is used when cloning or fetching a repository. In
|
|
// production this will speak to the database to look up the clone URL. In
|
|
// tests this is usually set to clone a local repository or intentionally
|
|
// error.
|
|
//
|
|
// Note: internal uses should call getRemoteURL which will handle
|
|
// GetRemoteURLFunc being nil.
|
|
GetRemoteURLFunc func(context.Context, api.RepoName) (string, error)
|
|
|
|
// GetVCSSyncer is a function which returns the VCS syncer for a repository.
|
|
// This is used when cloning or fetching a repository. In production this will
|
|
// speak to the database to determine the code host type. In tests this is
|
|
// usually set to return a GitRepoSyncer.
|
|
GetVCSSyncer func(context.Context, api.RepoName) (VCSSyncer, error)
|
|
|
|
// Hostname is how we identify this instance of gitserver. Generally it is the
|
|
// actual hostname but can also be overridden by the HOSTNAME environment variable.
|
|
Hostname string
|
|
|
|
// shared db handle
|
|
DB database.DB
|
|
|
|
// CloneQueue is a threadsafe queue used by DoBackgroundClones to process incoming clone
|
|
// requests asynchronously.
|
|
CloneQueue *cloneQueue
|
|
|
|
// skipCloneForTests is set by tests to avoid clones.
|
|
skipCloneForTests bool
|
|
|
|
// ctx is the context we use for all background jobs. It is done when the
|
|
// server is stopped. Do not directly call this, rather call
|
|
// Server.context()
|
|
ctx context.Context
|
|
cancel context.CancelFunc // used to shutdown background jobs
|
|
cancelMu sync.Mutex // protects canceled
|
|
canceled bool
|
|
wg sync.WaitGroup // tracks running background jobs
|
|
|
|
locker *RepositoryLocker
|
|
|
|
// cloneLimiter and cloneableLimiter limits the number of concurrent
|
|
// clones and ls-remotes respectively. Use s.acquireCloneLimiter() and
|
|
// s.acquireClonableLimiter() instead of using these directly.
|
|
cloneLimiter *mutablelimiter.Limiter
|
|
cloneableLimiter *mutablelimiter.Limiter
|
|
|
|
// rpsLimiter limits the remote code host git operations done per second
|
|
// per gitserver instance
|
|
rpsLimiter *ratelimit.InstrumentedLimiter
|
|
|
|
repoUpdateLocksMu sync.Mutex // protects the map below and also updates to locks.once
|
|
repoUpdateLocks map[api.RepoName]*locks
|
|
|
|
// GlobalBatchLogSemaphore is a semaphore shared between all requests to ensure that a
|
|
// maximum number of Git subprocesses are active for all /batch-log requests combined.
|
|
GlobalBatchLogSemaphore *semaphore.Weighted
|
|
|
|
// operations provide uniform observability via internal/observation. This value is
|
|
// set by RegisterMetrics when compiled as part of the gitserver binary. The server
|
|
// method ensureOperations should be used in all references to avoid a nil pointer
|
|
// dereferencs.
|
|
operations *operations
|
|
|
|
// recordingCommandFactory is a factory that creates recordable commands by wrapping os/exec.Commands.
|
|
// The factory creates recordable commands with a set predicate, which is used to determine whether a
|
|
// particular command should be recorded or not.
|
|
recordingCommandFactory *wrexec.RecordingCommandFactory
|
|
}
|
|
|
|
type locks struct {
|
|
once *sync.Once // consolidates multiple waiting updates
|
|
mu *sync.Mutex // prevents updates running in parallel
|
|
}
|
|
|
|
// shortGitCommandTimeout returns the timeout for git commands that should not
|
|
// take a long time. Some commands such as "git archive" are allowed more time
|
|
// than "git rev-parse", so this will return an appropriate timeout given the
|
|
// command.
|
|
func shortGitCommandTimeout(args []string) time.Duration {
|
|
if len(args) < 1 {
|
|
return time.Minute
|
|
}
|
|
switch args[0] {
|
|
case "archive":
|
|
// This is a long time, but this never blocks a user request for this
|
|
// long. Even repos that are not that large can take a long time, for
|
|
// example a search over all repos in an organization may have several
|
|
// large repos. All of those repos will be competing for IO => we need
|
|
// a larger timeout.
|
|
return conf.GitLongCommandTimeout()
|
|
|
|
case "ls-remote":
|
|
return 30 * time.Second
|
|
|
|
default:
|
|
return time.Minute
|
|
}
|
|
}
|
|
|
|
// shortGitCommandSlow returns the threshold for regarding an git command as
|
|
// slow. Some commands such as "git archive" are inherently slower than "git
|
|
// rev-parse", so this will return an appropriate threshold given the command.
|
|
func shortGitCommandSlow(args []string) time.Duration {
|
|
if len(args) < 1 {
|
|
return time.Second
|
|
}
|
|
switch args[0] {
|
|
case "archive":
|
|
return 1 * time.Minute
|
|
|
|
case "blame", "ls-tree", "log", "show":
|
|
return 5 * time.Second
|
|
|
|
default:
|
|
return 2500 * time.Millisecond
|
|
}
|
|
}
|
|
|
|
// 🚨 SECURITY: headerXRequestedWithMiddleware will ensure that the X-Requested-With
|
|
// header contains the correct value. See "What does X-Requested-With do, anyway?" in
|
|
// https://github.com/sourcegraph/sourcegraph/pull/27931.
|
|
func headerXRequestedWithMiddleware(next http.Handler) http.HandlerFunc {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
l := log.Scoped("gitserver", "headerXRequestedWithMiddleware")
|
|
|
|
// Do not apply the middleware to /ping and /git endpoints.
|
|
//
|
|
// 1. /ping is used by health check services who most likely don't set this header
|
|
// at all.
|
|
//
|
|
// 2. /git may be used to run "git fetch" from another gitserver instance over
|
|
// HTTP and the fetchCommand does not set this header yet.
|
|
if strings.HasPrefix(r.URL.Path, "/ping") || strings.HasPrefix(r.URL.Path, "/git") {
|
|
next.ServeHTTP(w, r)
|
|
return
|
|
}
|
|
|
|
if value := r.Header.Get("X-Requested-With"); value != "Sourcegraph" {
|
|
l.Error("header X-Requested-With is not set or is invalid", log.String("path", r.URL.Path))
|
|
http.Error(w, "header X-Requested-With is not set or is invalid", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
next.ServeHTTP(w, r)
|
|
}
|
|
}
|
|
|
|
// recordCommandsOnRepos returns a ShouldRecordFunc which determines whether the given command should be recorded
|
|
// for a particular repository.
|
|
func recordCommandsOnRepos(repos []string) wrexec.ShouldRecordFunc {
|
|
// empty repos, means we should never record since there is nothing to match on
|
|
if len(repos) == 0 {
|
|
return func(ctx context.Context, c *exec.Cmd) bool {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// we won't record any git commands with these commands since they are considered to be not destructive
|
|
ignoredGitCommands := map[string]struct{}{
|
|
"show": {},
|
|
"rev-parse": {},
|
|
"log": {},
|
|
"diff": {},
|
|
"ls-tree": {},
|
|
}
|
|
return func(ctx context.Context, cmd *exec.Cmd) bool {
|
|
base := filepath.Base(cmd.Path)
|
|
if base != "git" {
|
|
return false
|
|
}
|
|
|
|
repoMatch := false
|
|
for _, repo := range repos {
|
|
if strings.Contains(cmd.Dir, repo) {
|
|
repoMatch = true
|
|
break
|
|
}
|
|
}
|
|
|
|
// If the repo doesn't match, no use in checking if it is a command we should record.
|
|
if !repoMatch {
|
|
return false
|
|
}
|
|
// we have to scan the Args, since it isn't guaranteed that the Arg at index 1 is the git command:
|
|
// git -c "protocol.version=2" remote show
|
|
for _, arg := range cmd.Args {
|
|
if _, ok := ignoredGitCommands[arg]; ok {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
}
|
|
|
|
// Handler returns the http.Handler that should be used to serve requests.
|
|
func (s *Server) Handler() http.Handler {
|
|
s.ctx, s.cancel = context.WithCancel(context.Background())
|
|
s.locker = &RepositoryLocker{}
|
|
s.repoUpdateLocks = make(map[api.RepoName]*locks)
|
|
|
|
s.recordingCommandFactory = wrexec.NewRecordingCommandFactory(nil, 0)
|
|
conf.Watch(func() {
|
|
// We update the factory with a predicate func. Each subsequent recordable command will use this predicate
|
|
// to determine whether a command should be recorded or not.
|
|
recordingConf := conf.Get().SiteConfig().GitRecorder
|
|
if recordingConf == nil {
|
|
return
|
|
}
|
|
s.recordingCommandFactory.Update(recordCommandsOnRepos(recordingConf.Repos), recordingConf.Size)
|
|
})
|
|
|
|
// GitMaxConcurrentClones controls the maximum number of clones that
|
|
// can happen at once on a single gitserver.
|
|
// Used to prevent throttle limits from a code host. Defaults to 5.
|
|
//
|
|
// The new repo-updater scheduler enforces the rate limit across all gitserver,
|
|
// so ideally this logic could be removed here; however, ensureRevision can also
|
|
// cause an update to happen and it is called on every exec command.
|
|
maxConcurrentClones := conf.GitMaxConcurrentClones()
|
|
s.cloneLimiter = mutablelimiter.New(maxConcurrentClones)
|
|
s.cloneableLimiter = mutablelimiter.New(maxConcurrentClones)
|
|
|
|
conf.Watch(func() {
|
|
limit := conf.GitMaxConcurrentClones()
|
|
s.cloneLimiter.SetLimit(limit)
|
|
s.cloneableLimiter.SetLimit(limit)
|
|
})
|
|
|
|
s.rpsLimiter = ratelimit.NewInstrumentedLimiter("RpsLimiter", rate.NewLimiter(rate.Inf, 10))
|
|
setRPSLimiter := func() {
|
|
if maxRequestsPerSecond := conf.GitMaxCodehostRequestsPerSecond(); maxRequestsPerSecond == -1 {
|
|
// As a special case, -1 means no limiting
|
|
s.rpsLimiter.SetLimit(rate.Inf)
|
|
s.rpsLimiter.SetBurst(10)
|
|
} else if maxRequestsPerSecond == 0 {
|
|
// A limiter with zero limit but a non-zero burst is not rejecting all events
|
|
// because the bucket is initially full with N tokens and refilled N tokens
|
|
// every second, where N is the burst size. See
|
|
// https://github.com/golang/go/issues/18763 for details.
|
|
s.rpsLimiter.SetLimit(0)
|
|
s.rpsLimiter.SetBurst(0)
|
|
} else {
|
|
s.rpsLimiter.SetLimit(rate.Limit(maxRequestsPerSecond))
|
|
s.rpsLimiter.SetBurst(10)
|
|
}
|
|
}
|
|
conf.Watch(func() {
|
|
setRPSLimiter()
|
|
})
|
|
|
|
mux := http.NewServeMux()
|
|
mux.HandleFunc("/archive", trace.WithRouteName("archive", accesslog.HTTPMiddleware(
|
|
s.Logger.Scoped("archive.accesslog", "archive endpoint access log"),
|
|
conf.DefaultClient(),
|
|
s.handleArchive,
|
|
)))
|
|
mux.HandleFunc("/exec", trace.WithRouteName("exec", accesslog.HTTPMiddleware(
|
|
s.Logger.Scoped("exec.accesslog", "exec endpoint access log"),
|
|
conf.DefaultClient(),
|
|
s.handleExec,
|
|
)))
|
|
mux.HandleFunc("/search", trace.WithRouteName("search", s.handleSearch))
|
|
mux.HandleFunc("/batch-log", trace.WithRouteName("batch-log", s.handleBatchLog))
|
|
mux.HandleFunc("/p4-exec", trace.WithRouteName("p4-exec", accesslog.HTTPMiddleware(
|
|
s.Logger.Scoped("p4-exec.accesslog", "p4-exec endpoint access log"),
|
|
conf.DefaultClient(),
|
|
s.handleP4Exec,
|
|
)))
|
|
mux.HandleFunc("/list-gitolite", trace.WithRouteName("list-gitolite", s.handleListGitolite))
|
|
mux.HandleFunc("/is-repo-cloneable", trace.WithRouteName("is-repo-cloneable", s.handleIsRepoCloneable))
|
|
mux.HandleFunc("/repos-stats", trace.WithRouteName("repos-stats", s.handleReposStats))
|
|
mux.HandleFunc("/repo-clone-progress", trace.WithRouteName("repo-clone-progress", s.handleRepoCloneProgress))
|
|
mux.HandleFunc("/delete", trace.WithRouteName("delete", s.handleRepoDelete))
|
|
mux.HandleFunc("/repo-update", trace.WithRouteName("repo-update", s.handleRepoUpdate))
|
|
mux.HandleFunc("/repo-clone", trace.WithRouteName("repo-clone", s.handleRepoClone))
|
|
mux.HandleFunc("/create-commit-from-patch-binary", trace.WithRouteName("create-commit-from-patch-binary", s.handleCreateCommitFromPatchBinary))
|
|
mux.HandleFunc("/create-commit-from-patch", trace.WithRouteName("create-commit-from-patch", s.handleCreateCommitFromPatch))
|
|
mux.HandleFunc("/ping", trace.WithRouteName("ping", func(w http.ResponseWriter, _ *http.Request) {
|
|
w.WriteHeader(http.StatusOK)
|
|
}))
|
|
|
|
// This endpoint allows us to expose gitserver itself as a "git service"
|
|
// (ETOOMANYGITS!) that allows other services to run commands like "git fetch"
|
|
// directly against a gitserver replica and treat it as a git remote.
|
|
//
|
|
// Example use case for this is a repo migration from one replica to another during
|
|
// scaling events and the new destination gitserver replica can directly clone from
|
|
// the gitserver replica which hosts the repository currently.
|
|
mux.HandleFunc("/git/", trace.WithRouteName("git", accesslog.HTTPMiddleware(
|
|
s.Logger.Scoped("git.accesslog", "git endpoint access log"),
|
|
conf.DefaultClient(),
|
|
func(rw http.ResponseWriter, r *http.Request) {
|
|
http.StripPrefix("/git", s.gitServiceHandler()).ServeHTTP(rw, r)
|
|
},
|
|
)))
|
|
|
|
// Migration to hexagonal architecture starting here:
|
|
|
|
gitAdapter := &adapters.Git{
|
|
ReposDir: s.ReposDir,
|
|
}
|
|
getObjectService := gitdomain.GetObjectService{
|
|
RevParse: gitAdapter.RevParse,
|
|
GetObjectType: gitAdapter.GetObjectType,
|
|
}
|
|
getObjectFunc := gitdomain.GetObjectFunc(func(ctx context.Context, repo api.RepoName, objectName string) (*gitdomain.GitObject, error) {
|
|
// Tracing is server concern, so add it here. Once generics lands we should be
|
|
// able to create some simple wrappers
|
|
span, ctx := ot.StartSpanFromContext(ctx, "Git: GetObject") //nolint:staticcheck // OT is deprecated
|
|
span.SetTag("objectName", objectName)
|
|
defer span.Finish()
|
|
return getObjectService.GetObject(ctx, repo, objectName)
|
|
})
|
|
|
|
mux.HandleFunc("/commands/get-object", trace.WithRouteName("commands/get-object",
|
|
accesslog.HTTPMiddleware(
|
|
s.Logger.Scoped("commands/get-object.accesslog", "commands/get-object endpoint access log"),
|
|
conf.DefaultClient(),
|
|
handleGetObject(s.Logger.Scoped("commands/get-object", "handles get object"), getObjectFunc),
|
|
)))
|
|
|
|
// 🚨 SECURITY: This must be wrapped in headerXRequestedWithMiddleware.
|
|
return headerXRequestedWithMiddleware(mux)
|
|
}
|
|
|
|
// Janitor does clean up tasks over s.ReposDir and is expected to run in a
|
|
// background goroutine.
|
|
func (s *Server) Janitor(ctx context.Context, interval time.Duration) {
|
|
for {
|
|
gitserverAddrs := currentGitserverAddresses()
|
|
s.cleanupRepos(actor.WithInternalActor(ctx), gitserverAddrs)
|
|
time.Sleep(interval)
|
|
}
|
|
}
|
|
|
|
// SyncRepoState syncs state on disk to the database for all repos and is
|
|
// expected to run in a background goroutine. We perform a full sync if the known
|
|
// gitserver addresses has changed since the last run. Otherwise, we only sync
|
|
// repos that have not yet been assigned a shard.
|
|
func (s *Server) SyncRepoState(interval time.Duration, batchSize, perSecond int) {
|
|
var previousAddrs string
|
|
var previousPinned string
|
|
for {
|
|
gitServerAddrs := currentGitserverAddresses()
|
|
addrs := gitServerAddrs.Addresses
|
|
// We turn addrs into a string here for easy comparison and storage of previous
|
|
// addresses since we'd need to take a copy of the slice anyway.
|
|
currentAddrs := strings.Join(addrs, ",")
|
|
fullSync := currentAddrs != previousAddrs
|
|
previousAddrs = currentAddrs
|
|
|
|
// We turn PinnedServers into a string here for easy comparison and storage
|
|
// of previous pins.
|
|
pinnedServerPairs := make([]string, 0, len(gitServerAddrs.PinnedServers))
|
|
for k, v := range gitServerAddrs.PinnedServers {
|
|
pinnedServerPairs = append(pinnedServerPairs, fmt.Sprintf("%s=%s", k, v))
|
|
}
|
|
sort.Strings(pinnedServerPairs)
|
|
currentPinned := strings.Join(pinnedServerPairs, ",")
|
|
fullSync = fullSync || currentPinned != previousPinned
|
|
previousPinned = currentPinned
|
|
|
|
if err := s.syncRepoState(gitServerAddrs, batchSize, perSecond, fullSync); err != nil {
|
|
s.Logger.Error("Syncing repo state", log.Error(err))
|
|
}
|
|
|
|
time.Sleep(interval)
|
|
}
|
|
}
|
|
|
|
func (s *Server) addrForRepo(ctx context.Context, repoName api.RepoName, gitServerAddrs gitserver.GitServerAddresses) (string, error) {
|
|
return gitserver.AddrForRepo(ctx, filepath.Base(os.Args[0]), repoName, gitServerAddrs)
|
|
}
|
|
|
|
func currentGitserverAddresses() gitserver.GitServerAddresses {
|
|
cfg := conf.Get()
|
|
gitServerAddrs := gitserver.GitServerAddresses{
|
|
Addresses: cfg.ServiceConnectionConfig.GitServers,
|
|
}
|
|
if cfg.ExperimentalFeatures != nil {
|
|
gitServerAddrs.PinnedServers = cfg.ExperimentalFeatures.GitServerPinnedRepos
|
|
}
|
|
|
|
return gitServerAddrs
|
|
}
|
|
|
|
// StartClonePipeline clones repos asynchronously. It creates a producer-consumer
|
|
// pipeline.
|
|
func (s *Server) StartClonePipeline(ctx context.Context) {
|
|
jobs := make(chan *cloneJob)
|
|
|
|
go s.cloneJobConsumer(ctx, jobs)
|
|
go s.cloneJobProducer(ctx, jobs)
|
|
}
|
|
|
|
func (s *Server) cloneJobProducer(ctx context.Context, jobs chan<- *cloneJob) {
|
|
defer close(jobs)
|
|
|
|
for {
|
|
// Acquire the cond mutex lock and wait for a signal if the queue is empty.
|
|
s.CloneQueue.cmu.Lock()
|
|
if s.CloneQueue.empty() {
|
|
s.CloneQueue.cond.Wait()
|
|
}
|
|
|
|
// The queue is not empty and we have a job to process! But don't forget to unlock the cond
|
|
// mutex here as we don't need to hold the lock beyond this point for now.
|
|
s.CloneQueue.cmu.Unlock()
|
|
|
|
// Keep popping from the queue until the queue is empty again, in which case we start all
|
|
// over again from the top.
|
|
for {
|
|
job := s.CloneQueue.pop()
|
|
if job == nil {
|
|
break
|
|
}
|
|
|
|
select {
|
|
case jobs <- job:
|
|
case <-ctx.Done():
|
|
s.Logger.Error("cloneJobProducer: ", log.Error(ctx.Err()))
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Server) cloneJobConsumer(ctx context.Context, jobs <-chan *cloneJob) {
|
|
logger := s.Logger.Scoped("cloneJobConsumer", "process clone jobs")
|
|
|
|
for j := range jobs {
|
|
logger := logger.With(log.String("job.repo", string(j.repo)))
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
logger.Error("context done", log.Error(ctx.Err()))
|
|
return
|
|
default:
|
|
}
|
|
|
|
ctx, cancel, err := s.acquireCloneLimiter(ctx)
|
|
if err != nil {
|
|
logger.Error("acquireCloneLimiter", log.Error(err))
|
|
continue
|
|
}
|
|
|
|
go func(job *cloneJob) {
|
|
defer cancel()
|
|
|
|
err := s.doClone(ctx, job.repo, job.dir, job.syncer, job.lock, job.remoteURL, job.options)
|
|
if err != nil {
|
|
logger.Error("failed to clone repo", log.Error(err))
|
|
}
|
|
// Use a different context in case we failed because the original context failed.
|
|
s.setLastErrorNonFatal(s.ctx, job.repo, err)
|
|
}(j)
|
|
}
|
|
}
|
|
|
|
// hostnameMatch checks whether the hostname matches the given address.
|
|
// If we don't find an exact match, we look at the initial prefix.
|
|
func (s *Server) hostnameMatch(addr string) bool {
|
|
if !strings.HasPrefix(addr, s.Hostname) {
|
|
return false
|
|
}
|
|
if addr == s.Hostname {
|
|
return true
|
|
}
|
|
// We know that s.Hostname is shorter than addr so we can safely check the next
|
|
// char
|
|
next := addr[len(s.Hostname)]
|
|
return next == '.' || next == ':'
|
|
}
|
|
|
|
var (
|
|
repoSyncStateCounter = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "src_repo_sync_state_counter",
|
|
Help: "Incremented each time we check the state of repo",
|
|
}, []string{"type"})
|
|
repoStateUpsertCounter = promauto.NewCounterVec(prometheus.CounterOpts{
|
|
Name: "src_repo_sync_state_upsert_counter",
|
|
Help: "Incremented each time we upsert repo state in the database",
|
|
}, []string{"success"})
|
|
wrongShardReposTotal = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "src_gitserver_repo_wrong_shard",
|
|
Help: "The number of repos that are on disk on the wrong shard",
|
|
})
|
|
wrongShardReposSizeTotalBytes = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "src_gitserver_repo_wrong_shard_bytes",
|
|
Help: "Size (in bytes) of repos that are on disk on the wrong shard",
|
|
})
|
|
wrongShardReposDeletedCounter = promauto.NewCounter(prometheus.CounterOpts{
|
|
Name: "src_gitserver_repo_wrong_shard_deleted",
|
|
Help: "The number of repos on the wrong shard that we deleted",
|
|
})
|
|
)
|
|
|
|
func (s *Server) syncRepoState(gitServerAddrs gitserver.GitServerAddresses, batchSize, perSecond int, fullSync bool) error {
|
|
s.Logger.Debug("starting syncRepoState", log.Bool("fullSync", fullSync))
|
|
addrs := gitServerAddrs.Addresses
|
|
|
|
// When fullSync is true we'll scan all repos in the database and ensure we set
|
|
// their clone state and assign any that belong to this shard with the correct
|
|
// shard_id.
|
|
//
|
|
// When fullSync is false, we assume that we only need to check repos that have
|
|
// not yet had their shard_id allocated.
|
|
|
|
// Sanity check our host exists in addrs before starting any work
|
|
var found bool
|
|
for _, a := range addrs {
|
|
if s.hostnameMatch(a) {
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
return errors.Errorf("gitserver hostname, %q, not found in list", s.Hostname)
|
|
}
|
|
|
|
ctx := s.ctx
|
|
store := s.DB.GitserverRepos()
|
|
|
|
// The rate limit should be enforced across all instances
|
|
perSecond = perSecond / len(addrs)
|
|
if perSecond < 0 {
|
|
perSecond = 1
|
|
}
|
|
limiter := ratelimit.NewInstrumentedLimiter("SyncRepoState", rate.NewLimiter(rate.Limit(perSecond), perSecond))
|
|
|
|
// The rate limiter doesn't allow writes that are larger than the burst size
|
|
// which we've set to perSecond.
|
|
if batchSize > perSecond {
|
|
batchSize = perSecond
|
|
}
|
|
|
|
batch := make([]*types.GitserverRepo, 0)
|
|
|
|
writeBatch := func() {
|
|
if len(batch) == 0 {
|
|
return
|
|
}
|
|
// We always clear the batch
|
|
defer func() {
|
|
batch = batch[0:0]
|
|
}()
|
|
err := limiter.WaitN(ctx, len(batch))
|
|
if err != nil {
|
|
s.Logger.Error("Waiting for rate limiter", log.Error(err))
|
|
return
|
|
}
|
|
|
|
if err := store.Update(ctx, batch...); err != nil {
|
|
repoStateUpsertCounter.WithLabelValues("false").Add(float64(len(batch)))
|
|
s.Logger.Error("Updating GitserverRepos", log.Error(err))
|
|
return
|
|
}
|
|
repoStateUpsertCounter.WithLabelValues("true").Add(float64(len(batch)))
|
|
}
|
|
|
|
// Make sure we fetch at least a good chunk of records, assuming that most
|
|
// would not need an update anyways. Don't fetch too many though to keep the
|
|
// DB load at a reasonable level and constrain memory usage.
|
|
iteratePageSize := batchSize * 2
|
|
if iteratePageSize < 500 {
|
|
iteratePageSize = 500
|
|
}
|
|
|
|
options := database.IterateRepoGitserverStatusOptions{
|
|
// We also want to include deleted repos as they may still be cloned on disk
|
|
IncludeDeleted: true,
|
|
BatchSize: iteratePageSize,
|
|
}
|
|
if !fullSync {
|
|
options.OnlyWithoutShard = true
|
|
}
|
|
for {
|
|
repos, nextRepo, err := store.IterateRepoGitserverStatus(ctx, options)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, repo := range repos {
|
|
repoSyncStateCounter.WithLabelValues("check").Inc()
|
|
|
|
// We may have a deleted repo, we need to extract the original name both to
|
|
// ensure that the shard check is correct and also so that we can find the
|
|
// directory.
|
|
repo.Name = api.UndeletedRepoName(repo.Name)
|
|
|
|
// Ensure we're only dealing with repos we are responsible for.
|
|
addr, err := s.addrForRepo(ctx, repo.Name, gitServerAddrs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !s.hostnameMatch(addr) {
|
|
repoSyncStateCounter.WithLabelValues("other_shard").Inc()
|
|
continue
|
|
}
|
|
repoSyncStateCounter.WithLabelValues("this_shard").Inc()
|
|
|
|
dir := s.dir(repo.Name)
|
|
cloned := repoCloned(dir)
|
|
_, cloning := s.locker.Status(dir)
|
|
|
|
var shouldUpdate bool
|
|
if repo.ShardID != s.Hostname {
|
|
repo.ShardID = s.Hostname
|
|
shouldUpdate = true
|
|
}
|
|
cloneStatus := cloneStatus(cloned, cloning)
|
|
if repo.CloneStatus != cloneStatus {
|
|
repo.CloneStatus = cloneStatus
|
|
// Since the repo has been recloned or is being cloned
|
|
// we can reset the corruption
|
|
repo.CorruptedAt = time.Time{}
|
|
shouldUpdate = true
|
|
}
|
|
|
|
if !shouldUpdate {
|
|
continue
|
|
}
|
|
|
|
batch = append(batch, repo.GitserverRepo)
|
|
|
|
if len(batch) >= batchSize {
|
|
writeBatch()
|
|
}
|
|
}
|
|
|
|
if nextRepo == 0 {
|
|
break
|
|
}
|
|
|
|
options.NextCursor = nextRepo
|
|
}
|
|
|
|
// Attempt final write
|
|
writeBatch()
|
|
|
|
return nil
|
|
}
|
|
|
|
// Stop cancels the running background jobs and returns when done.
|
|
func (s *Server) Stop() {
|
|
// idempotent so we can just always set and cancel
|
|
s.cancel()
|
|
s.cancelMu.Lock()
|
|
s.canceled = true
|
|
s.cancelMu.Unlock()
|
|
s.wg.Wait()
|
|
}
|
|
|
|
// serverContext returns a child context tied to the lifecycle of server.
|
|
func (s *Server) serverContext() (context.Context, context.CancelFunc) {
|
|
// if we are already canceled don't increment our WaitGroup. This is to
|
|
// prevent a loop somewhere preventing us from ever finishing the
|
|
// WaitGroup, even though all calls fails instantly due to the canceled
|
|
// context.
|
|
s.cancelMu.Lock()
|
|
if s.canceled {
|
|
s.cancelMu.Unlock()
|
|
return s.ctx, func() {}
|
|
}
|
|
s.wg.Add(1)
|
|
s.cancelMu.Unlock()
|
|
|
|
ctx, cancel := context.WithCancel(s.ctx)
|
|
|
|
// we need to track if we have called cancel, since we are only allowed to
|
|
// call wg.Done() once, but CancelFuncs can be called any number of times.
|
|
var canceled int32
|
|
return ctx, func() {
|
|
ok := atomic.CompareAndSwapInt32(&canceled, 0, 1)
|
|
if ok {
|
|
cancel()
|
|
s.wg.Done()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (s *Server) getRemoteURL(ctx context.Context, name api.RepoName) (*vcs.URL, error) {
|
|
if s.GetRemoteURLFunc == nil {
|
|
return nil, errors.New("gitserver GetRemoteURLFunc is unset")
|
|
}
|
|
|
|
remoteURL, err := s.GetRemoteURLFunc(ctx, name)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "GetRemoteURLFunc")
|
|
}
|
|
|
|
return vcs.ParseURL(remoteURL)
|
|
}
|
|
|
|
// acquireCloneLimiter() acquires a cancellable context associated with the
|
|
// clone limiter.
|
|
func (s *Server) acquireCloneLimiter(ctx context.Context) (context.Context, context.CancelFunc, error) {
|
|
pendingClones.Inc()
|
|
defer pendingClones.Dec()
|
|
return s.cloneLimiter.Acquire(ctx)
|
|
}
|
|
|
|
func (s *Server) acquireCloneableLimiter(ctx context.Context) (context.Context, context.CancelFunc, error) {
|
|
lsRemoteQueue.Inc()
|
|
defer lsRemoteQueue.Dec()
|
|
return s.cloneableLimiter.Acquire(ctx)
|
|
}
|
|
|
|
// tempDir is a wrapper around os.MkdirTemp, but using the server's
|
|
// temporary directory filepath.Join(s.ReposDir, tempDirName).
|
|
//
|
|
// This directory is cleaned up by gitserver and will be ignored by repository
|
|
// listing operations.
|
|
func (s *Server) tempDir(prefix string) (name string, err error) {
|
|
dir := filepath.Join(s.ReposDir, tempDirName)
|
|
|
|
// Create tmpdir directory if doesn't exist yet.
|
|
if err := os.MkdirAll(dir, os.ModePerm); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return os.MkdirTemp(dir, prefix)
|
|
}
|
|
|
|
func (s *Server) ignorePath(path string) bool {
|
|
// We ignore any path which starts with .tmp or .p4home in ReposDir
|
|
if filepath.Dir(path) != s.ReposDir {
|
|
return false
|
|
}
|
|
base := filepath.Base(path)
|
|
return strings.HasPrefix(base, tempDirName) || strings.HasPrefix(base, P4HomeName)
|
|
}
|
|
|
|
func (s *Server) handleIsRepoCloneable(w http.ResponseWriter, r *http.Request) {
|
|
var req protocol.IsRepoCloneableRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
if req.Repo == "" {
|
|
http.Error(w, "no Repo given", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
var syncer VCSSyncer
|
|
// We use an internal actor here as the repo may be private. It is safe since all
|
|
// we return is a bool indicating whether the repo is cloneable or not. Perhaps
|
|
// the only things that could leak here is whether a private repo exists although
|
|
// the endpoint is only available internally so it's low risk.
|
|
remoteURL, err := s.getRemoteURL(actor.WithInternalActor(r.Context()), req.Repo)
|
|
if err != nil {
|
|
// We use this endpoint to verify if a repo exists without consuming
|
|
// API rate limit, since many users visit private or bogus repos,
|
|
// so we deduce the unauthenticated clone URL from the repo name.
|
|
remoteURL, _ = vcs.ParseURL("https://" + string(req.Repo) + ".git")
|
|
|
|
// At this point we are assuming it's a git repo
|
|
syncer = &GitRepoSyncer{}
|
|
} else {
|
|
syncer, err = s.GetVCSSyncer(r.Context(), req.Repo)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
}
|
|
|
|
resp := protocol.IsRepoCloneableResponse{
|
|
Cloned: repoCloned(s.dir(req.Repo)),
|
|
}
|
|
if err := syncer.IsCloneable(r.Context(), remoteURL); err == nil {
|
|
resp.Cloneable = true
|
|
} else {
|
|
resp.Reason = err.Error()
|
|
}
|
|
|
|
if err := json.NewEncoder(w).Encode(resp); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
}
|
|
|
|
// handleRepoUpdate is a synchronous (waits for update to complete or
|
|
// time out) method so it can yield errors. Updates are not
|
|
// unconditional; we debounce them based on the provided
|
|
// interval, to avoid spam.
|
|
func (s *Server) handleRepoUpdate(w http.ResponseWriter, r *http.Request) {
|
|
logger := s.Logger.Scoped("handleRepoUpdate", "synchronous http handler for repo updates")
|
|
var req protocol.RepoUpdateRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
var resp protocol.RepoUpdateResponse
|
|
req.Repo = protocol.NormalizeRepo(req.Repo)
|
|
dir := s.dir(req.Repo)
|
|
|
|
// despite the existence of a context on the request, we don't want to
|
|
// cancel the git commands partway through if the request terminates.
|
|
ctx, cancel1 := s.serverContext()
|
|
defer cancel1()
|
|
ctx, cancel2 := context.WithTimeout(ctx, conf.GitLongCommandTimeout())
|
|
defer cancel2()
|
|
if !repoCloned(dir) && !s.skipCloneForTests {
|
|
// We do not need to check if req.CloneFromShard is non-zero here since that has no effect on
|
|
// the code path at this point. Since the repo is already not cloned at this point, either
|
|
// this request was received for a repo migration or a regular clone - for both of which we
|
|
// want to go ahead and clone the repo. The responsibility of figuring out where to clone
|
|
// the repo from (upstream URL of the external service or the gitserver instance) lies with
|
|
// the implementation details of cloneRepo.
|
|
_, err := s.cloneRepo(ctx, req.Repo, &cloneOptions{Block: true, CloneFromShard: req.CloneFromShard})
|
|
if err != nil {
|
|
logger.Warn("error cloning repo", log.String("repo", string(req.Repo)), log.Error(err))
|
|
resp.Error = err.Error()
|
|
}
|
|
} else {
|
|
var statusErr, updateErr error
|
|
|
|
if debounce(req.Repo, req.Since) {
|
|
updateErr = s.doRepoUpdate(ctx, req.Repo, "")
|
|
}
|
|
|
|
// attempts to acquire these values are not contingent on the success of
|
|
// the update.
|
|
lastFetched, err := repoLastFetched(dir)
|
|
if err != nil {
|
|
statusErr = err
|
|
} else {
|
|
resp.LastFetched = &lastFetched
|
|
}
|
|
lastChanged, err := repoLastChanged(dir)
|
|
if err != nil {
|
|
statusErr = err
|
|
} else {
|
|
resp.LastChanged = &lastChanged
|
|
}
|
|
if statusErr != nil {
|
|
logger.Error("failed to get status of repo", log.String("repo", string(req.Repo)), log.Error(statusErr))
|
|
// report this error in-band, but still produce a valid response with the
|
|
// other information.
|
|
resp.Error = statusErr.Error()
|
|
}
|
|
// If an error occurred during update, report it but don't actually make
|
|
// it into an http error; we want the client to get the information cleanly.
|
|
// An update error "wins" over a status error.
|
|
if updateErr != nil {
|
|
resp.Error = updateErr.Error()
|
|
}
|
|
}
|
|
|
|
if err := json.NewEncoder(w).Encode(resp); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
}
|
|
|
|
// handleRepoClone is an asynchronous (does not wait for update to complete or
|
|
// time out) call to clone a repository.
|
|
// Asynchronous errors will have to be checked in the gitserver_repos table under last_error.
|
|
func (s *Server) handleRepoClone(w http.ResponseWriter, r *http.Request) {
|
|
logger := s.Logger.Scoped("handleRepoClone", "asynchronous http handler for repo clones")
|
|
var req protocol.RepoCloneRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
var resp protocol.RepoCloneResponse
|
|
req.Repo = protocol.NormalizeRepo(req.Repo)
|
|
|
|
_, err := s.cloneRepo(context.Background(), req.Repo, &cloneOptions{Block: false})
|
|
if err != nil {
|
|
logger.Warn("error cloning repo", log.String("repo", string(req.Repo)), log.Error(err))
|
|
resp.Error = err.Error()
|
|
}
|
|
|
|
if err := json.NewEncoder(w).Encode(resp); err != nil {
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
}
|
|
|
|
func (s *Server) handleArchive(w http.ResponseWriter, r *http.Request) {
|
|
var (
|
|
logger = s.Logger.Scoped("handleArchive", "http handler for repo archive")
|
|
q = r.URL.Query()
|
|
treeish = q.Get("treeish")
|
|
repo = q.Get("repo")
|
|
format = q.Get("format")
|
|
pathspecs = q["path"]
|
|
)
|
|
|
|
// Log which which actor is accessing the repo.
|
|
accesslog.Record(r.Context(), repo,
|
|
log.String("treeish", treeish),
|
|
log.String("format", format),
|
|
log.Strings("path", pathspecs),
|
|
)
|
|
|
|
if err := checkSpecArgSafety(treeish); err != nil {
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
s.Logger.Error("gitserver.archive.CheckSpecArgSafety", log.Error(err))
|
|
return
|
|
}
|
|
|
|
if repo == "" || format == "" {
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
logger.Error("gitserver.archive", log.String("error", "empty repo or format"))
|
|
return
|
|
}
|
|
|
|
req := &protocol.ExecRequest{
|
|
Repo: api.RepoName(repo),
|
|
Args: []string{
|
|
"archive",
|
|
|
|
// Suppresses fatal error when the repo contains paths matching **/.git/** and instead
|
|
// includes those files (to allow archiving invalid such repos). This is unexpected
|
|
// behavior; the --worktree-attributes flag should merely let us specify a gitattributes
|
|
// file that contains `**/.git/** export-ignore`, but it actually makes everything work as
|
|
// desired. Tested by the "repo with .git dir" test case.
|
|
"--worktree-attributes",
|
|
|
|
"--format=" + format,
|
|
},
|
|
}
|
|
|
|
if format == string(gitserver.ArchiveFormatZip) {
|
|
// Compression level of 0 (no compression) seems to perform the
|
|
// best overall on fast network links, but this has not been tuned
|
|
// thoroughly.
|
|
req.Args = append(req.Args, "-0")
|
|
}
|
|
|
|
req.Args = append(req.Args, treeish, "--")
|
|
req.Args = append(req.Args, pathspecs...)
|
|
|
|
s.exec(w, r, req)
|
|
}
|
|
|
|
func (s *Server) handleSearch(w http.ResponseWriter, r *http.Request) {
|
|
logger := s.Logger.Scoped("handleSearch", "http handler for search")
|
|
tr, ctx := trace.New(r.Context(), "search", "")
|
|
defer tr.Finish()
|
|
|
|
// Decode the request
|
|
protocol.RegisterGob()
|
|
var args protocol.SearchRequest
|
|
if err := gob.NewDecoder(r.Body).Decode(&args); err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
tr.SetAttributes(
|
|
attribute.String("repo", string(args.Repo)),
|
|
attribute.Bool("include_diff", args.IncludeDiff),
|
|
attribute.String("query", args.Query.String()),
|
|
attribute.Int("limit", args.Limit),
|
|
attribute.Bool("include_modified_files", args.IncludeModifiedFiles),
|
|
)
|
|
|
|
searchStart := time.Now()
|
|
searchRunning.Inc()
|
|
defer searchRunning.Dec()
|
|
|
|
observeLatency := syncx.OnceFunc(func() {
|
|
searchLatency.Observe(time.Since(searchStart).Seconds())
|
|
})
|
|
|
|
eventWriter, err := streamhttp.NewWriter(w)
|
|
if err != nil {
|
|
tr.SetError(err)
|
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
matchesBuf := streamhttp.NewJSONArrayBuf(8*1024, func(data []byte) error {
|
|
tr.AddEvent("flushing data", attribute.Int("data.len", len(data)))
|
|
observeLatency()
|
|
return eventWriter.EventBytes("matches", data)
|
|
})
|
|
|
|
// Run the search
|
|
limitHit, searchErr := s.search(ctx, &args, matchesBuf)
|
|
if writeErr := eventWriter.Event("done", protocol.NewSearchEventDone(limitHit, searchErr)); writeErr != nil {
|
|
if !errors.Is(writeErr, syscall.EPIPE) {
|
|
logger.Error("failed to send done event", log.Error(writeErr))
|
|
}
|
|
}
|
|
tr.AddEvent("done", attribute.Bool("limit_hit", limitHit))
|
|
tr.SetError(searchErr)
|
|
searchDuration.
|
|
WithLabelValues(strconv.FormatBool(searchErr != nil)).
|
|
Observe(time.Since(searchStart).Seconds())
|
|
|
|
if honey.Enabled() || traceLogs {
|
|
act := actor.FromContext(ctx)
|
|
ev := honey.NewEvent("gitserver-search")
|
|
ev.SetSampleRate(honeySampleRate("", act))
|
|
ev.AddField("repo", args.Repo)
|
|
ev.AddField("revisions", args.Revisions)
|
|
ev.AddField("include_diff", args.IncludeDiff)
|
|
ev.AddField("include_modified_files", args.IncludeModifiedFiles)
|
|
ev.AddField("actor", act.UIDString())
|
|
ev.AddField("query", args.Query.String())
|
|
ev.AddField("limit", args.Limit)
|
|
ev.AddField("duration_ms", time.Since(searchStart).Milliseconds())
|
|
if searchErr != nil {
|
|
ev.AddField("error", searchErr.Error())
|
|
}
|
|
if traceID := trace.ID(ctx); traceID != "" {
|
|
ev.AddField("traceID", traceID)
|
|
ev.AddField("trace", trace.URL(traceID, conf.DefaultClient()))
|
|
}
|
|
if honey.Enabled() {
|
|
_ = ev.Send()
|
|
}
|
|
if traceLogs {
|
|
logger.Debug("TRACE gitserver search", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
}
|
|
}
|
|
|
|
// search handles the core logic of the search. It is passed a matchesBuf so it doesn't need to
|
|
// concern itself with event types, and all instrumentation is handled in the calling function.
|
|
func (s *Server) search(ctx context.Context, args *protocol.SearchRequest, matchesBuf *streamhttp.JSONArrayBuf) (limitHit bool, err error) {
|
|
args.Repo = protocol.NormalizeRepo(args.Repo)
|
|
if args.Limit == 0 {
|
|
args.Limit = math.MaxInt32
|
|
}
|
|
|
|
dir := s.dir(args.Repo)
|
|
if !repoCloned(dir) {
|
|
if conf.Get().DisableAutoGitUpdates {
|
|
s.Logger.Debug("not cloning on demand as DisableAutoGitUpdates is set")
|
|
return false, &gitdomain.RepoNotExistError{
|
|
Repo: args.Repo,
|
|
}
|
|
}
|
|
|
|
cloneProgress, cloneInProgress := s.locker.Status(dir)
|
|
if cloneInProgress {
|
|
return false, &gitdomain.RepoNotExistError{
|
|
Repo: args.Repo,
|
|
CloneInProgress: true,
|
|
CloneProgress: cloneProgress,
|
|
}
|
|
}
|
|
|
|
cloneProgress, err := s.cloneRepo(ctx, args.Repo, nil)
|
|
if err != nil {
|
|
s.Logger.Debug("error starting repo clone", log.String("repo", string(args.Repo)), log.Error(err))
|
|
return false, &gitdomain.RepoNotExistError{
|
|
Repo: args.Repo,
|
|
CloneInProgress: false,
|
|
}
|
|
}
|
|
|
|
return false, &gitdomain.RepoNotExistError{
|
|
Repo: args.Repo,
|
|
CloneInProgress: true,
|
|
CloneProgress: cloneProgress,
|
|
}
|
|
}
|
|
|
|
for _, rev := range args.Revisions {
|
|
// TODO add result to trace
|
|
if rev.RevSpec != "" {
|
|
_ = s.ensureRevision(ctx, args.Repo, rev.RevSpec, dir)
|
|
} else if rev.RefGlob != "" {
|
|
_ = s.ensureRevision(ctx, args.Repo, rev.RefGlob, dir)
|
|
}
|
|
}
|
|
|
|
g, ctx := errgroup.WithContext(ctx)
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
defer cancel()
|
|
|
|
// Search all commits, sending matching commits down resultChan
|
|
resultChan := make(chan *protocol.CommitMatch, 128)
|
|
g.Go(func() error {
|
|
defer close(resultChan)
|
|
done := ctx.Done()
|
|
|
|
mt, err := search.ToMatchTree(args.Query)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Ensure that we populate ModifiedFiles when we have a DiffModifiesFile filter.
|
|
// --name-status is not zero cost, so we don't do it on every search.
|
|
hasDiffModifiesFile := false
|
|
search.Visit(mt, func(mt search.MatchTree) {
|
|
switch mt.(type) {
|
|
case *search.DiffModifiesFile:
|
|
hasDiffModifiesFile = true
|
|
}
|
|
})
|
|
|
|
searcher := &search.CommitSearcher{
|
|
Logger: s.Logger,
|
|
RepoName: args.Repo,
|
|
RepoDir: dir.Path(),
|
|
Revisions: args.Revisions,
|
|
Query: mt,
|
|
IncludeDiff: args.IncludeDiff,
|
|
IncludeModifiedFiles: args.IncludeModifiedFiles || hasDiffModifiesFile,
|
|
}
|
|
|
|
return searcher.Search(ctx, func(match *protocol.CommitMatch) {
|
|
select {
|
|
case <-done:
|
|
case resultChan <- match:
|
|
}
|
|
})
|
|
})
|
|
|
|
// Write matching commits to the stream, flushing occasionally
|
|
g.Go(func() error {
|
|
defer cancel()
|
|
defer matchesBuf.Flush()
|
|
|
|
flushTicker := time.NewTicker(50 * time.Millisecond)
|
|
defer flushTicker.Stop()
|
|
|
|
sentCount := 0
|
|
firstMatch := true
|
|
for {
|
|
select {
|
|
case result, ok := <-resultChan:
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
if sentCount >= args.Limit {
|
|
limitHit = true
|
|
return nil
|
|
}
|
|
sentCount += matchCount(result)
|
|
|
|
_ = matchesBuf.Append(result) // EOF only
|
|
|
|
// Send immediately if this if the first result we've seen
|
|
if firstMatch {
|
|
_ = matchesBuf.Flush() // EOF only
|
|
firstMatch = false
|
|
}
|
|
case <-flushTicker.C:
|
|
_ = matchesBuf.Flush() // EOF only
|
|
}
|
|
}
|
|
})
|
|
|
|
return limitHit, g.Wait()
|
|
}
|
|
|
|
// matchCount returns either:
|
|
// 1) the number of diff matches if there are any
|
|
// 2) the number of messsage matches if there are any
|
|
// 3) one, to represent matching the commit, but nothing inside it
|
|
func matchCount(cm *protocol.CommitMatch) int {
|
|
if len(cm.Diff.MatchedRanges) > 0 {
|
|
return len(cm.Diff.MatchedRanges)
|
|
}
|
|
if len(cm.Message.MatchedRanges) > 0 {
|
|
return len(cm.Message.MatchedRanges)
|
|
}
|
|
return 1
|
|
}
|
|
|
|
func (s *Server) handleBatchLog(w http.ResponseWriter, r *http.Request) {
|
|
// 🚨 SECURITY: Only allow POST requests.
|
|
if strings.ToUpper(r.Method) != http.MethodPost {
|
|
http.Error(w, "", http.StatusMethodNotAllowed)
|
|
return
|
|
}
|
|
|
|
operations := s.ensureOperations()
|
|
|
|
// Run git log for a single repository.
|
|
// Invoked multiple times from the handler defined below.
|
|
performGitLogCommand := func(ctx context.Context, repoCommit api.RepoCommit, format string) (output string, isRepoCloned bool, err error) {
|
|
ctx, _, endObservation := operations.batchLogSingle.With(ctx, &err, observation.Args{
|
|
LogFields: append(
|
|
[]otlog.Field{
|
|
otlog.String("format", format),
|
|
},
|
|
repoCommit.LogFields()...,
|
|
),
|
|
})
|
|
defer func() {
|
|
endObservation(1, observation.Args{LogFields: []otlog.Field{
|
|
otlog.Bool("isRepoCloned", isRepoCloned),
|
|
}})
|
|
}()
|
|
|
|
dir := s.dir(repoCommit.Repo)
|
|
if !repoCloned(dir) {
|
|
return "", false, nil
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
|
|
commitId := string(repoCommit.CommitID)
|
|
// make sure CommitID is not an arg
|
|
if commitId[0] == '-' {
|
|
return "", true, errors.New("commit ID starting with - is not allowed")
|
|
}
|
|
|
|
cmd := s.recordingCommandFactory.Command(ctx, s.Logger, "git", "log", "-n", "1", "--name-only", format, commitId)
|
|
dir.Set(cmd.Unwrap())
|
|
cmd.Unwrap().Stdout = &buf
|
|
|
|
if _, err := runCommand(ctx, cmd); err != nil {
|
|
return "", true, err
|
|
}
|
|
|
|
return buf.String(), true, nil
|
|
}
|
|
|
|
// Handles the /batch-log route
|
|
instrumentedHandler := func(ctx context.Context) (statusCodeOnError int, err error) {
|
|
ctx, logger, endObservation := operations.batchLog.With(ctx, &err, observation.Args{})
|
|
defer func() {
|
|
endObservation(1, observation.Args{LogFields: []otlog.Field{
|
|
otlog.Int("statusCodeOnError", statusCodeOnError),
|
|
}})
|
|
}()
|
|
|
|
// Read request body
|
|
var req protocol.BatchLogRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
return http.StatusBadRequest, err
|
|
}
|
|
logger.AddEvent("read request.body", req.SpanAttributes()...)
|
|
|
|
// Validate request parameters
|
|
if len(req.RepoCommits) == 0 {
|
|
// Early exit: implicitly writes 200 OK
|
|
_ = json.NewEncoder(w).Encode(protocol.BatchLogResponse{Results: []protocol.BatchLogResult{}})
|
|
return 0, nil
|
|
}
|
|
if !strings.HasPrefix(req.Format, "--format=") {
|
|
return http.StatusUnprocessableEntity, errors.New("format parameter expected to be of the form `--format=<git log format>`")
|
|
}
|
|
|
|
// Perform requests in each repository in the input batch. We perform these commands
|
|
// concurrently, but only allow for so many commands to be in-flight at a time so that
|
|
// we don't overwhelm a shard with either a large request or too many concurrent batch
|
|
// requests.
|
|
|
|
g, ctx := errgroup.WithContext(ctx)
|
|
results := make([]protocol.BatchLogResult, len(req.RepoCommits))
|
|
|
|
if s.GlobalBatchLogSemaphore == nil {
|
|
return http.StatusInternalServerError, errors.New("s.GlobalBatchLogSemaphore not initialized")
|
|
}
|
|
|
|
for i, repoCommit := range req.RepoCommits {
|
|
// Avoid capture of loop variables
|
|
i, repoCommit := i, repoCommit
|
|
|
|
start := time.Now()
|
|
if err := s.GlobalBatchLogSemaphore.Acquire(ctx, 1); err != nil {
|
|
return http.StatusInternalServerError, err
|
|
}
|
|
s.operations.batchLogSemaphoreWait.Observe(time.Since(start).Seconds())
|
|
|
|
g.Go(func() error {
|
|
defer s.GlobalBatchLogSemaphore.Release(1)
|
|
|
|
output, isRepoCloned, err := performGitLogCommand(ctx, repoCommit, req.Format)
|
|
if err == nil && !isRepoCloned {
|
|
err = errors.Newf("repo not found")
|
|
}
|
|
var errMessage string
|
|
if err != nil {
|
|
errMessage = err.Error()
|
|
}
|
|
|
|
// Concurrently write results to shared slice. This slice is already properly
|
|
// sized, and each goroutine writes to a unique index exactly once. There should
|
|
// be no data race conditions possible here.
|
|
|
|
results[i] = protocol.BatchLogResult{
|
|
RepoCommit: repoCommit,
|
|
CommandOutput: output,
|
|
CommandError: errMessage,
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
if err := g.Wait(); err != nil {
|
|
return http.StatusInternalServerError, err
|
|
}
|
|
|
|
// Write payload to client: implicitly writes 200 OK
|
|
_ = json.NewEncoder(w).Encode(protocol.BatchLogResponse{Results: results})
|
|
return 0, nil
|
|
}
|
|
|
|
// Handle unexpected error conditions. We expect the instrumented handler to not
|
|
// have written the status code or any of the body if this error value is non-nil.
|
|
if statusCodeOnError, err := instrumentedHandler(r.Context()); err != nil {
|
|
http.Error(w, err.Error(), statusCodeOnError)
|
|
return
|
|
}
|
|
}
|
|
|
|
// ensureOperations returns the non-nil operations value supplied to this server
|
|
// via RegisterMetrics (when constructed as part of the gitserver binary), or
|
|
// constructs and memoizes a no-op operations value (for use in tests).
|
|
func (s *Server) ensureOperations() *operations {
|
|
if s.operations == nil {
|
|
s.operations = newOperations(s.ObservationCtx)
|
|
}
|
|
|
|
return s.operations
|
|
}
|
|
|
|
func (s *Server) handleExec(w http.ResponseWriter, r *http.Request) {
|
|
// 🚨 SECURITY: Only allow POST requests.
|
|
// See https://github.com/sourcegraph/security-issues/issues/213.
|
|
if strings.ToUpper(r.Method) != http.MethodPost {
|
|
http.Error(w, "", http.StatusMethodNotAllowed)
|
|
return
|
|
}
|
|
|
|
var req protocol.ExecRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Log which which actor is accessing the repo.
|
|
args := req.Args
|
|
cmd := ""
|
|
if len(req.Args) > 0 {
|
|
cmd = req.Args[0]
|
|
args = args[1:]
|
|
}
|
|
accesslog.Record(r.Context(), string(req.Repo),
|
|
log.String("cmd", cmd),
|
|
log.Strings("args", args),
|
|
)
|
|
|
|
s.exec(w, r, &req)
|
|
}
|
|
|
|
var blockedCommandExecutedCounter = promauto.NewCounter(prometheus.CounterOpts{
|
|
Name: "src_gitserver_exec_blocked_command_received",
|
|
Help: "Incremented each time a command not in the allowlist for gitserver is executed",
|
|
})
|
|
|
|
func (s *Server) exec(w http.ResponseWriter, r *http.Request, req *protocol.ExecRequest) {
|
|
logger := s.Logger.Scoped("exec", "").With(log.Strings("req.Args", req.Args))
|
|
|
|
// Flush writes more aggressively than standard net/http so that clients
|
|
// with a context deadline see as much partial response body as possible.
|
|
if fw := newFlushingResponseWriter(logger, w); fw != nil {
|
|
w = fw
|
|
defer fw.Close()
|
|
}
|
|
|
|
// 🚨 SECURITY: Ensure that only commands in the allowed list are executed.
|
|
// See https://github.com/sourcegraph/security-issues/issues/213.
|
|
if !gitdomain.IsAllowedGitCmd(logger, req.Args) {
|
|
blockedCommandExecutedCounter.Inc()
|
|
logger.Warn("exec: bad command", log.String("RemoteAddr", r.RemoteAddr))
|
|
|
|
w.WriteHeader(http.StatusBadRequest)
|
|
_, _ = w.Write([]byte("invalid command"))
|
|
return
|
|
}
|
|
|
|
ctx := r.Context()
|
|
|
|
if !req.NoTimeout {
|
|
var cancel context.CancelFunc
|
|
ctx, cancel = context.WithTimeout(ctx, shortGitCommandTimeout(req.Args))
|
|
defer cancel()
|
|
}
|
|
|
|
start := time.Now()
|
|
var cmdStart time.Time // set once we have ensured commit
|
|
exitStatus := -10810 // sentinel value to indicate not set
|
|
var stdoutN, stderrN int64
|
|
var status string
|
|
var execErr error
|
|
ensureRevisionStatus := "noop"
|
|
|
|
req.Repo = protocol.NormalizeRepo(req.Repo)
|
|
|
|
// Instrumentation
|
|
{
|
|
cmd := ""
|
|
if len(req.Args) > 0 {
|
|
cmd = req.Args[0]
|
|
}
|
|
args := strings.Join(req.Args, " ")
|
|
|
|
var tr *trace.Trace
|
|
tr, ctx = trace.New(ctx, "exec."+cmd, string(req.Repo))
|
|
tr.SetAttributes(
|
|
attribute.String("args", args),
|
|
attribute.String("ensure_revision", req.EnsureRevision),
|
|
)
|
|
logger = logger.WithTrace(trace.Context(ctx))
|
|
|
|
execRunning.WithLabelValues(cmd).Inc()
|
|
defer func() {
|
|
tr.AddEvent(
|
|
"done",
|
|
attribute.String("status", status),
|
|
attribute.Int64("stdout", stdoutN),
|
|
attribute.Int64("stderr", stderrN),
|
|
attribute.String("ensure_revision_status", ensureRevisionStatus),
|
|
)
|
|
tr.SetError(execErr)
|
|
tr.Finish()
|
|
|
|
duration := time.Since(start)
|
|
execRunning.WithLabelValues(cmd).Dec()
|
|
execDuration.WithLabelValues(cmd, status).Observe(duration.Seconds())
|
|
|
|
var cmdDuration time.Duration
|
|
var fetchDuration time.Duration
|
|
if !cmdStart.IsZero() {
|
|
cmdDuration = time.Since(cmdStart)
|
|
fetchDuration = cmdStart.Sub(start)
|
|
}
|
|
|
|
isSlow := cmdDuration > shortGitCommandSlow(req.Args)
|
|
isSlowFetch := fetchDuration > 10*time.Second
|
|
if honey.Enabled() || traceLogs || isSlow || isSlowFetch {
|
|
act := actor.FromContext(ctx)
|
|
ev := honey.NewEvent("gitserver-exec")
|
|
ev.SetSampleRate(honeySampleRate(cmd, act))
|
|
ev.AddField("repo", req.Repo)
|
|
ev.AddField("cmd", cmd)
|
|
ev.AddField("args", args)
|
|
ev.AddField("actor", act.UIDString())
|
|
ev.AddField("ensure_revision", req.EnsureRevision)
|
|
ev.AddField("ensure_revision_status", ensureRevisionStatus)
|
|
ev.AddField("client", r.UserAgent())
|
|
ev.AddField("duration_ms", duration.Milliseconds())
|
|
ev.AddField("stdin_size", len(req.Stdin))
|
|
ev.AddField("stdout_size", stdoutN)
|
|
ev.AddField("stderr_size", stderrN)
|
|
ev.AddField("exit_status", exitStatus)
|
|
ev.AddField("status", status)
|
|
if execErr != nil {
|
|
ev.AddField("error", execErr.Error())
|
|
}
|
|
if !cmdStart.IsZero() {
|
|
ev.AddField("cmd_duration_ms", cmdDuration.Milliseconds())
|
|
ev.AddField("fetch_duration_ms", fetchDuration.Milliseconds())
|
|
}
|
|
|
|
if traceID := trace.ID(ctx); traceID != "" {
|
|
ev.AddField("traceID", traceID)
|
|
ev.AddField("trace", trace.URL(traceID, conf.DefaultClient()))
|
|
}
|
|
|
|
if honey.Enabled() {
|
|
_ = ev.Send()
|
|
}
|
|
|
|
if traceLogs {
|
|
logger.Debug("TRACE gitserver exec", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
if isSlow {
|
|
logger.Warn("Long exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
if isSlowFetch {
|
|
logger.Warn("Slow fetch/clone for exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
if notFoundPayload, cloned := s.maybeStartClone(ctx, logger, req.Repo); !cloned {
|
|
if notFoundPayload.CloneInProgress {
|
|
status = "clone-in-progress"
|
|
} else {
|
|
status = "repo-not-found"
|
|
}
|
|
w.WriteHeader(http.StatusNotFound)
|
|
_ = json.NewEncoder(w).Encode(notFoundPayload)
|
|
return
|
|
}
|
|
|
|
dir := s.dir(req.Repo)
|
|
if s.ensureRevision(ctx, req.Repo, req.EnsureRevision, dir) {
|
|
ensureRevisionStatus = "fetched"
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/octet-stream")
|
|
w.Header().Set("Cache-Control", "no-cache")
|
|
|
|
w.Header().Set("Trailer", "X-Exec-Error")
|
|
w.Header().Add("Trailer", "X-Exec-Exit-Status")
|
|
w.Header().Add("Trailer", "X-Exec-Stderr")
|
|
w.WriteHeader(http.StatusOK)
|
|
|
|
// Special-case `git rev-parse HEAD` requests. These are invoked by search queries for every repo in scope.
|
|
// For searches over large repo sets (> 1k), this leads to too many child process execs, which can lead
|
|
// to a persistent failure mode where every exec takes > 10s, which is disastrous for gitserver performance.
|
|
if len(req.Args) == 2 && req.Args[0] == "rev-parse" && req.Args[1] == "HEAD" {
|
|
if resolved, err := quickRevParseHead(dir); err == nil && isAbsoluteRevision(resolved) {
|
|
_, _ = w.Write([]byte(resolved))
|
|
w.Header().Set("X-Exec-Error", "")
|
|
w.Header().Set("X-Exec-Exit-Status", "0")
|
|
w.Header().Set("X-Exec-Stderr", "")
|
|
return
|
|
}
|
|
}
|
|
// Special-case `git symbolic-ref HEAD` requests. These are invoked by resolvers determining the default branch of a repo.
|
|
// For searches over large repo sets (> 1k), this leads to too many child process execs, which can lead
|
|
// to a persistent failure mode where every exec takes > 10s, which is disastrous for gitserver performance.
|
|
if len(req.Args) == 2 && req.Args[0] == "symbolic-ref" && req.Args[1] == "HEAD" {
|
|
if resolved, err := quickSymbolicRefHead(dir); err == nil {
|
|
_, _ = w.Write([]byte(resolved))
|
|
w.Header().Set("X-Exec-Error", "")
|
|
w.Header().Set("X-Exec-Exit-Status", "0")
|
|
w.Header().Set("X-Exec-Stderr", "")
|
|
return
|
|
}
|
|
}
|
|
|
|
var stderrBuf bytes.Buffer
|
|
stdoutW := &writeCounter{w: w}
|
|
stderrW := &writeCounter{w: &limitWriter{W: &stderrBuf, N: 1024}}
|
|
|
|
cmdStart = time.Now()
|
|
cmd := s.recordingCommandFactory.Command(ctx, s.Logger, "git", req.Args...)
|
|
dir.Set(cmd.Unwrap())
|
|
cmd.Unwrap().Stdout = stdoutW
|
|
cmd.Unwrap().Stderr = stderrW
|
|
cmd.Unwrap().Stdin = bytes.NewReader(req.Stdin)
|
|
|
|
exitStatus, execErr = runCommand(ctx, cmd)
|
|
|
|
status = strconv.Itoa(exitStatus)
|
|
stdoutN = stdoutW.n
|
|
stderrN = stderrW.n
|
|
|
|
stderr := stderrBuf.String()
|
|
s.logIfCorrupt(ctx, req.Repo, dir, stderr)
|
|
|
|
// write trailer
|
|
w.Header().Set("X-Exec-Error", errorString(execErr))
|
|
w.Header().Set("X-Exec-Exit-Status", status)
|
|
w.Header().Set("X-Exec-Stderr", stderr)
|
|
}
|
|
|
|
func (s *Server) handleP4Exec(w http.ResponseWriter, r *http.Request) {
|
|
var req protocol.P4ExecRequest
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
if len(req.Args) < 1 {
|
|
http.Error(w, "args must be greater than or equal to 1", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Make sure the subcommand is explicitly allowed
|
|
allowlist := []string{"protects", "groups", "users", "group"}
|
|
allowed := false
|
|
for _, arg := range allowlist {
|
|
if req.Args[0] == arg {
|
|
allowed = true
|
|
break
|
|
}
|
|
}
|
|
if !allowed {
|
|
http.Error(w, fmt.Sprintf("subcommand %q is not allowed", req.Args[0]), http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Log which actor is accessing p4-exec.
|
|
//
|
|
// p4-exec is currently only used for fetching user based permissions information
|
|
// so, we don't have a repo name.
|
|
accesslog.Record(r.Context(), "<no-repo>",
|
|
log.String("p4user", req.P4User),
|
|
log.String("p4port", req.P4Port),
|
|
log.Strings("args", req.Args),
|
|
)
|
|
|
|
// Make sure credentials are valid before heavier operation
|
|
err := p4pingWithTrust(r.Context(), req.P4Port, req.P4User, req.P4Passwd)
|
|
if err != nil {
|
|
http.Error(w, err.Error(), http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
s.p4exec(w, r, &req)
|
|
}
|
|
|
|
func (s *Server) p4exec(w http.ResponseWriter, r *http.Request, req *protocol.P4ExecRequest) {
|
|
logger := s.Logger.Scoped("p4exec", "")
|
|
|
|
// Flush writes more aggressively than standard net/http so that clients
|
|
// with a context deadline see as much partial response body as possible.
|
|
if fw := newFlushingResponseWriter(logger, w); fw != nil {
|
|
w = fw
|
|
defer fw.Close()
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(r.Context(), time.Minute)
|
|
defer cancel()
|
|
|
|
start := time.Now()
|
|
var cmdStart time.Time // set once we have ensured commit
|
|
exitStatus := -10810 // sentinel value to indicate not set
|
|
var stdoutN, stderrN int64
|
|
var status string
|
|
var execErr error
|
|
|
|
// Instrumentation
|
|
{
|
|
cmd := ""
|
|
if len(req.Args) > 0 {
|
|
cmd = req.Args[0]
|
|
}
|
|
args := strings.Join(req.Args, " ")
|
|
|
|
var tr *trace.Trace
|
|
tr, ctx = trace.New(ctx, "p4exec."+cmd, req.P4Port)
|
|
tr.SetAttributes(attribute.String("args", args))
|
|
logger = logger.WithTrace(trace.Context(ctx))
|
|
|
|
execRunning.WithLabelValues(cmd).Inc()
|
|
defer func() {
|
|
tr.AddEvent("done",
|
|
attribute.String("status", status),
|
|
attribute.Int64("stdout", stdoutN),
|
|
attribute.Int64("stderr", stderrN),
|
|
)
|
|
tr.SetError(execErr)
|
|
tr.Finish()
|
|
|
|
duration := time.Since(start)
|
|
execRunning.WithLabelValues(cmd).Dec()
|
|
execDuration.WithLabelValues(cmd, status).Observe(duration.Seconds())
|
|
|
|
var cmdDuration time.Duration
|
|
if !cmdStart.IsZero() {
|
|
cmdDuration = time.Since(cmdStart)
|
|
}
|
|
|
|
isSlow := cmdDuration > 30*time.Second
|
|
if honey.Enabled() || traceLogs || isSlow {
|
|
act := actor.FromContext(ctx)
|
|
ev := honey.NewEvent("gitserver-p4exec")
|
|
ev.SetSampleRate(honeySampleRate(cmd, act))
|
|
ev.AddField("p4port", req.P4Port)
|
|
ev.AddField("cmd", cmd)
|
|
ev.AddField("args", args)
|
|
ev.AddField("actor", act.UIDString())
|
|
ev.AddField("client", r.UserAgent())
|
|
ev.AddField("duration_ms", duration.Milliseconds())
|
|
ev.AddField("stdout_size", stdoutN)
|
|
ev.AddField("stderr_size", stderrN)
|
|
ev.AddField("exit_status", exitStatus)
|
|
ev.AddField("status", status)
|
|
if execErr != nil {
|
|
ev.AddField("error", execErr.Error())
|
|
}
|
|
if !cmdStart.IsZero() {
|
|
ev.AddField("cmd_duration_ms", cmdDuration.Milliseconds())
|
|
}
|
|
|
|
if traceID := trace.ID(ctx); traceID != "" {
|
|
ev.AddField("traceID", traceID)
|
|
ev.AddField("trace", trace.URL(traceID, conf.DefaultClient()))
|
|
}
|
|
|
|
_ = ev.Send()
|
|
|
|
if traceLogs {
|
|
logger.Debug("TRACE gitserver p4exec", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
if isSlow {
|
|
logger.Warn("Long p4exec request", log.Object("ev.Fields", mapToLoggerField(ev.Fields())...))
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
w.Header().Set("Trailer", "X-Exec-Error")
|
|
w.Header().Add("Trailer", "X-Exec-Exit-Status")
|
|
w.Header().Add("Trailer", "X-Exec-Stderr")
|
|
w.WriteHeader(http.StatusOK)
|
|
|
|
var stderrBuf bytes.Buffer
|
|
stdoutW := &writeCounter{w: w}
|
|
stderrW := &writeCounter{w: &limitWriter{W: &stderrBuf, N: 1024}}
|
|
|
|
cmdStart = time.Now()
|
|
cmd := exec.CommandContext(ctx, "p4", req.Args...)
|
|
cmd.Env = append(os.Environ(),
|
|
"P4PORT="+req.P4Port,
|
|
"P4USER="+req.P4User,
|
|
"P4PASSWD="+req.P4Passwd,
|
|
)
|
|
cmd.Stdout = stdoutW
|
|
cmd.Stderr = stderrW
|
|
|
|
exitStatus, execErr = runCommand(ctx, s.recordingCommandFactory.Wrap(ctx, s.Logger, cmd))
|
|
|
|
status = strconv.Itoa(exitStatus)
|
|
stdoutN = stdoutW.n
|
|
stderrN = stderrW.n
|
|
|
|
stderr := stderrBuf.String()
|
|
|
|
// write trailer
|
|
w.Header().Set("X-Exec-Error", errorString(execErr))
|
|
w.Header().Set("X-Exec-Exit-Status", status)
|
|
w.Header().Set("X-Exec-Stderr", stderr)
|
|
}
|
|
|
|
func (s *Server) setLastFetched(ctx context.Context, name api.RepoName) error {
|
|
dir := s.dir(name)
|
|
|
|
lastFetched, err := repoLastFetched(dir)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to get last fetched for %s", name)
|
|
}
|
|
|
|
lastChanged, err := repoLastChanged(dir)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to get last changed for %s", name)
|
|
}
|
|
|
|
return s.DB.GitserverRepos().SetLastFetched(ctx, name, database.GitserverFetchData{
|
|
LastFetched: lastFetched,
|
|
LastChanged: lastChanged,
|
|
ShardID: s.Hostname,
|
|
})
|
|
}
|
|
|
|
// setLastErrorNonFatal will set the last_error column for the repo in the gitserver table.
|
|
func (s *Server) setLastErrorNonFatal(ctx context.Context, name api.RepoName, err error) {
|
|
var errString string
|
|
if err != nil {
|
|
errString = err.Error()
|
|
}
|
|
|
|
if err := s.DB.GitserverRepos().SetLastError(ctx, name, errString, s.Hostname); err != nil {
|
|
s.Logger.Warn("Setting last error in DB", log.Error(err))
|
|
}
|
|
}
|
|
|
|
func (s *Server) setCloneStatus(ctx context.Context, name api.RepoName, status types.CloneStatus) (err error) {
|
|
return s.DB.GitserverRepos().SetCloneStatus(ctx, name, status, s.Hostname)
|
|
}
|
|
|
|
// setCloneStatusNonFatal is the same as setCloneStatus but only logs errors
|
|
func (s *Server) setCloneStatusNonFatal(ctx context.Context, name api.RepoName, status types.CloneStatus) {
|
|
if err := s.setCloneStatus(ctx, name, status); err != nil {
|
|
s.Logger.Warn("Setting clone status in DB", log.Error(err))
|
|
}
|
|
}
|
|
|
|
// setRepoSize calculates the size of the repo and stores it in the database.
|
|
func (s *Server) setRepoSize(ctx context.Context, name api.RepoName) error {
|
|
return s.DB.GitserverRepos().SetRepoSize(ctx, name, dirSize(s.dir(name).Path(".")), s.Hostname)
|
|
}
|
|
|
|
func (s *Server) logIfCorrupt(ctx context.Context, repo api.RepoName, dir GitDir, stderr string) {
|
|
if checkMaybeCorruptRepo(s.Logger, repo, dir, stderr) {
|
|
reason := stderr
|
|
if err := s.DB.GitserverRepos().LogCorruption(ctx, repo, reason, s.Hostname); err != nil {
|
|
s.Logger.Warn("failed to log repo corruption", log.String("repo", string(repo)), log.Error(err))
|
|
}
|
|
}
|
|
}
|
|
|
|
// setGitAttributes writes our global gitattributes to
|
|
// gitDir/info/attributes. This will override .gitattributes inside of
|
|
// repositories. It is used to unset attributes such as export-ignore.
|
|
func setGitAttributes(dir GitDir) error {
|
|
infoDir := dir.Path("info")
|
|
if err := os.Mkdir(infoDir, os.ModePerm); err != nil && !os.IsExist(err) {
|
|
return errors.Wrap(err, "failed to set git attributes")
|
|
}
|
|
|
|
_, err := fileutil.UpdateFileIfDifferent(
|
|
filepath.Join(infoDir, "attributes"),
|
|
[]byte(`# Managed by Sourcegraph gitserver.
|
|
|
|
# We want every file to be present in git archive.
|
|
* -export-ignore
|
|
`))
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to set git attributes")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// testRepoCorrupter is used by tests to disrupt a cloned repository (e.g. deleting
|
|
// HEAD, zeroing it out, etc.)
|
|
var testRepoCorrupter func(ctx context.Context, tmpDir GitDir)
|
|
|
|
// cloneOptions specify optional behaviour for the cloneRepo function.
|
|
type cloneOptions struct {
|
|
// Block will wait for the clone to finish before returning. If the clone
|
|
// fails, the error will be returned. The passed in context is
|
|
// respected. When not blocking the clone is done with a server background
|
|
// context.
|
|
Block bool
|
|
|
|
// Overwrite will overwrite the existing clone.
|
|
Overwrite bool
|
|
|
|
// CloneFromShard is the hostname of the gitserver instance which is the current owner of the
|
|
// repository. If this is a non-zero string, then gitserver will attempt to clone the repo from
|
|
// that gitserver instance instead of the upstream repo URL of the external service.
|
|
CloneFromShard string
|
|
}
|
|
|
|
// cloneRepo performs a clone operation for the given repository. It is
|
|
// non-blocking by default.
|
|
func (s *Server) cloneRepo(ctx context.Context, repo api.RepoName, opts *cloneOptions) (cloneProgress string, err error) {
|
|
if isAlwaysCloningTest(repo) {
|
|
return "This will never finish cloning", nil
|
|
}
|
|
|
|
// We always want to store whether there was an error cloning the repo
|
|
defer func() {
|
|
// Use a different context in case we failed because the original context failed.
|
|
s.setLastErrorNonFatal(s.ctx, repo, err)
|
|
}()
|
|
|
|
dir := s.dir(repo)
|
|
|
|
// PERF: Before doing the network request to check if isCloneable, lets
|
|
// ensure we are not already cloning.
|
|
if progress, cloneInProgress := s.locker.Status(dir); cloneInProgress {
|
|
return progress, nil
|
|
}
|
|
|
|
syncer, err := s.GetVCSSyncer(ctx, repo)
|
|
if err != nil {
|
|
return "", errors.Wrap(err, "get VCS syncer")
|
|
}
|
|
|
|
var remoteURL *vcs.URL
|
|
if opts != nil && opts.CloneFromShard != "" {
|
|
// are we cloning from the same gitserver instance?
|
|
if s.hostnameMatch(strings.TrimPrefix(opts.CloneFromShard, "http://")) {
|
|
return "", errors.Errorf("cannot clone from the same gitserver instance")
|
|
}
|
|
|
|
remoteURL, err = vcs.ParseURL(opts.CloneFromShard)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
remoteURL = remoteURL.JoinPath("git", string(repo))
|
|
} else {
|
|
// We may be attempting to clone a private repo so we need an internal actor.
|
|
remoteURL, err = s.getRemoteURL(actor.WithInternalActor(ctx), repo)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
}
|
|
|
|
// isCloneable causes a network request, so we limit the number that can
|
|
// run at one time. We use a separate semaphore to cloning since these
|
|
// checks being blocked by a few slow clones will lead to poor feedback to
|
|
// users. We can defer since the rest of the function does not block this
|
|
// goroutine.
|
|
ctx, cancel, err := s.acquireCloneableLimiter(ctx)
|
|
if err != nil {
|
|
return "", err // err will be a context error
|
|
}
|
|
defer cancel()
|
|
|
|
if err = s.rpsLimiter.Wait(ctx); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if err := syncer.IsCloneable(ctx, remoteURL); err != nil {
|
|
redactedErr := newURLRedactor(remoteURL).redact(err.Error())
|
|
return "", errors.Errorf("error cloning repo: repo %s not cloneable: %s", repo, redactedErr)
|
|
}
|
|
|
|
// Mark this repo as currently being cloned. We have to check again if someone else isn't already
|
|
// cloning since we released the lock. We released the lock since isCloneable is a potentially
|
|
// slow operation.
|
|
lock, ok := s.locker.TryAcquire(dir, "starting clone")
|
|
if !ok {
|
|
// Someone else beat us to it
|
|
status, _ := s.locker.Status(dir)
|
|
return status, nil
|
|
}
|
|
|
|
if s.skipCloneForTests {
|
|
lock.Release()
|
|
return "", nil
|
|
}
|
|
|
|
// We clone to a temporary location first to avoid having incomplete
|
|
// clones in the repo tree. This also avoids leaving behind corrupt clones
|
|
// if the clone is interrupted.
|
|
if opts != nil && opts.Block {
|
|
ctx, cancel, err := s.acquireCloneLimiter(ctx)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer cancel()
|
|
|
|
// We are blocking, so use the passed in context.
|
|
err = s.doClone(ctx, repo, dir, syncer, lock, remoteURL, opts)
|
|
err = errors.Wrapf(err, "failed to clone %s", repo)
|
|
return "", err
|
|
}
|
|
|
|
// We push the cloneJob to a queue and let the producer-consumer pipeline take over from this
|
|
// point. See definitions of cloneJobProducer and cloneJobConsumer to understand how these jobs
|
|
// are processed.
|
|
s.CloneQueue.push(&cloneJob{
|
|
repo: repo,
|
|
dir: dir,
|
|
syncer: syncer,
|
|
lock: lock,
|
|
remoteURL: remoteURL,
|
|
options: opts,
|
|
})
|
|
|
|
return "", nil
|
|
}
|
|
|
|
func (s *Server) doClone(ctx context.Context, repo api.RepoName, dir GitDir, syncer VCSSyncer, lock *RepositoryLock, remoteURL *vcs.URL, opts *cloneOptions) (err error) {
|
|
logger := s.Logger.Scoped("doClone", "").With(log.String("repo", string(repo)))
|
|
|
|
defer lock.Release()
|
|
defer func() {
|
|
if err != nil {
|
|
repoCloneFailedCounter.Inc()
|
|
}
|
|
}()
|
|
if err := s.rpsLimiter.Wait(ctx); err != nil {
|
|
return err
|
|
}
|
|
ctx, cancel2 := context.WithTimeout(ctx, conf.GitLongCommandTimeout())
|
|
defer cancel2()
|
|
|
|
dstPath := string(dir)
|
|
overwrite := opts != nil && opts.Overwrite
|
|
if !overwrite {
|
|
// We clone to a temporary directory first, so avoid wasting resources
|
|
// if the directory already exists.
|
|
if _, err := os.Stat(dstPath); err == nil {
|
|
return &os.PathError{
|
|
Op: "cloneRepo",
|
|
Path: dstPath,
|
|
Err: os.ErrExist,
|
|
}
|
|
}
|
|
}
|
|
|
|
tmpPath, err := s.tempDir("clone-")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer os.RemoveAll(tmpPath)
|
|
tmpPath = filepath.Join(tmpPath, ".git")
|
|
tmp := GitDir(tmpPath)
|
|
|
|
// It may already be cloned
|
|
if !repoCloned(dir) {
|
|
s.setCloneStatusNonFatal(ctx, repo, types.CloneStatusCloning)
|
|
}
|
|
defer func() {
|
|
// Use a background context to ensure we still update the DB even if we time out
|
|
s.setCloneStatusNonFatal(context.Background(), repo, cloneStatus(repoCloned(dir), false))
|
|
}()
|
|
|
|
cmd, err := syncer.CloneCommand(ctx, remoteURL, tmpPath)
|
|
if err != nil {
|
|
return errors.Wrap(err, "get clone command")
|
|
}
|
|
if cmd.Env == nil {
|
|
cmd.Env = os.Environ()
|
|
}
|
|
|
|
// see issue #7322: skip LFS content in repositories with Git LFS configured
|
|
cmd.Env = append(cmd.Env, "GIT_LFS_SKIP_SMUDGE=1")
|
|
logger.Info("cloning repo", log.String("tmp", tmpPath), log.String("dst", dstPath))
|
|
|
|
pr, pw := io.Pipe()
|
|
defer pw.Close()
|
|
|
|
go readCloneProgress(logger, newURLRedactor(remoteURL), lock, pr, repo)
|
|
|
|
if output, err := runWith(ctx, s.recordingCommandFactory.Wrap(ctx, s.Logger, cmd), true, pw); err != nil {
|
|
return errors.Wrapf(err, "clone failed. Output: %s", string(output))
|
|
}
|
|
|
|
if testRepoCorrupter != nil {
|
|
testRepoCorrupter(ctx, tmp)
|
|
}
|
|
|
|
removeBadRefs(ctx, tmp)
|
|
|
|
if err := setHEAD(ctx, logger, s.recordingCommandFactory, tmp, syncer, remoteURL); err != nil {
|
|
logger.Warn("Failed to ensure HEAD exists", log.Error(err))
|
|
return errors.Wrap(err, "failed to ensure HEAD exists")
|
|
}
|
|
|
|
if err := setRepositoryType(tmp, syncer.Type()); err != nil {
|
|
return errors.Wrap(err, `git config set "sourcegraph.type"`)
|
|
}
|
|
|
|
// Update the last-changed stamp.
|
|
if err := setLastChanged(logger, tmp); err != nil {
|
|
return errors.Wrapf(err, "failed to update last changed time")
|
|
}
|
|
|
|
// Set gitattributes
|
|
if err := setGitAttributes(tmp); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Set gc.auto depending on gitGCMode.
|
|
if err := gitSetAutoGC(tmp); err != nil {
|
|
return err
|
|
}
|
|
|
|
if overwrite {
|
|
// remove the current repo by putting it into our temporary directory
|
|
err := fileutil.RenameAndSync(dstPath, filepath.Join(filepath.Dir(tmpPath), "old"))
|
|
if err != nil && !os.IsNotExist(err) {
|
|
return errors.Wrapf(err, "failed to remove old clone")
|
|
}
|
|
}
|
|
|
|
if err := os.MkdirAll(filepath.Dir(dstPath), os.ModePerm); err != nil {
|
|
return err
|
|
}
|
|
if err := fileutil.RenameAndSync(tmpPath, dstPath); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Successfully updated, best-effort updating of db fetch state based on
|
|
// disk state.
|
|
if err := s.setLastFetched(ctx, repo); err != nil {
|
|
logger.Warn("failed setting last fetch in DB", log.Error(err))
|
|
}
|
|
|
|
// Successfully updated, best-effort calculation of the repo size.
|
|
if err := s.setRepoSize(ctx, repo); err != nil {
|
|
logger.Warn("failed setting repo size", log.Error(err))
|
|
}
|
|
|
|
logger.Info("repo cloned")
|
|
repoClonedCounter.Inc()
|
|
|
|
return nil
|
|
}
|
|
|
|
// readCloneProgress scans the reader and saves the most recent line of output
|
|
// as the lock status.
|
|
func readCloneProgress(logger log.Logger, redactor *urlRedactor, lock *RepositoryLock, pr io.Reader, repo api.RepoName) {
|
|
var logFile *os.File
|
|
var err error
|
|
|
|
if conf.Get().CloneProgressLog {
|
|
logFile, err = os.CreateTemp("", "")
|
|
if err != nil {
|
|
logger.Warn("failed to create temporary clone log file", log.Error(err), log.String("repo", string(repo)))
|
|
} else {
|
|
logger.Info("logging clone output", log.String("file", logFile.Name()), log.String("repo", string(repo)))
|
|
defer logFile.Close()
|
|
}
|
|
}
|
|
|
|
scan := bufio.NewScanner(pr)
|
|
scan.Split(scanCRLF)
|
|
for scan.Scan() {
|
|
progress := scan.Text()
|
|
|
|
// 🚨 SECURITY: The output could include the clone url with may contain a sensitive token.
|
|
// Redact the full url and any found HTTP credentials to be safe.
|
|
//
|
|
// e.g.
|
|
// $ git clone http://token@github.com/foo/bar
|
|
// Cloning into 'nick'...
|
|
// fatal: repository 'http://token@github.com/foo/bar/' not found
|
|
redactedProgress := redactor.redact(progress)
|
|
|
|
lock.SetStatus(redactedProgress)
|
|
|
|
if logFile != nil {
|
|
// Failing to write here is non-fatal and we don't want to spam our logs if there
|
|
// are issues
|
|
_, _ = fmt.Fprintln(logFile, progress)
|
|
}
|
|
}
|
|
if err := scan.Err(); err != nil {
|
|
logger.Error("error reporting progress", log.Error(err))
|
|
}
|
|
}
|
|
|
|
// urlRedactor redacts all sensitive strings from a message.
|
|
type urlRedactor struct {
|
|
// sensitive are sensitive strings to be redacted.
|
|
// The strings should not be empty.
|
|
sensitive []string
|
|
}
|
|
|
|
// newURLRedactor returns a new urlRedactor that redacts
|
|
// credentials found in rawurl, and the rawurl itself.
|
|
func newURLRedactor(parsedURL *vcs.URL) *urlRedactor {
|
|
var sensitive []string
|
|
pw, _ := parsedURL.User.Password()
|
|
u := parsedURL.User.Username()
|
|
if pw != "" && u != "" {
|
|
// Only block password if we have both as we can
|
|
// assume that the username isn't sensitive in this case
|
|
sensitive = append(sensitive, pw)
|
|
} else {
|
|
if pw != "" {
|
|
sensitive = append(sensitive, pw)
|
|
}
|
|
if u != "" {
|
|
sensitive = append(sensitive, u)
|
|
}
|
|
}
|
|
sensitive = append(sensitive, parsedURL.String())
|
|
return &urlRedactor{sensitive: sensitive}
|
|
}
|
|
|
|
// redact returns a redacted version of message.
|
|
// Sensitive strings are replaced with "<redacted>".
|
|
func (r *urlRedactor) redact(message string) string {
|
|
for _, s := range r.sensitive {
|
|
message = strings.ReplaceAll(message, s, "<redacted>")
|
|
}
|
|
return message
|
|
}
|
|
|
|
// scanCRLF is similar to bufio.ScanLines except it splits on both '\r' and '\n'
|
|
// and it does not return tokens that contain only whitespace.
|
|
func scanCRLF(data []byte, atEOF bool) (advance int, token []byte, err error) {
|
|
if atEOF && len(data) == 0 {
|
|
return 0, nil, nil
|
|
}
|
|
trim := func(data []byte) []byte {
|
|
data = bytes.TrimSpace(data)
|
|
if len(data) == 0 {
|
|
// Don't pass back a token that is all whitespace.
|
|
return nil
|
|
}
|
|
return data
|
|
}
|
|
if i := bytes.IndexAny(data, "\r\n"); i >= 0 {
|
|
// We have a full newline-terminated line.
|
|
return i + 1, trim(data[:i]), nil
|
|
}
|
|
// If we're at EOF, we have a final, non-terminated line. Return it.
|
|
if atEOF {
|
|
return len(data), trim(data), nil
|
|
}
|
|
// Request more data.
|
|
return 0, nil, nil
|
|
}
|
|
|
|
// testGitRepoExists is a test fixture that overrides the return value for
|
|
// GitRepoSyncer.IsCloneable when it is set.
|
|
var testGitRepoExists func(ctx context.Context, remoteURL *vcs.URL) error
|
|
|
|
var (
|
|
execRunning = promauto.NewGaugeVec(prometheus.GaugeOpts{
|
|
Name: "src_gitserver_exec_running",
|
|
Help: "number of gitserver.GitCommand running concurrently.",
|
|
}, []string{"cmd"})
|
|
execDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "src_gitserver_exec_duration_seconds",
|
|
Help: "gitserver.GitCommand latencies in seconds.",
|
|
Buckets: trace.UserLatencyBuckets,
|
|
}, []string{"cmd", "status"})
|
|
|
|
searchRunning = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "src_gitserver_search_running",
|
|
Help: "number of gitserver.Search running concurrently.",
|
|
})
|
|
searchDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
|
|
Name: "src_gitserver_search_duration_seconds",
|
|
Help: "gitserver.Search duration in seconds.",
|
|
Buckets: []float64{0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30},
|
|
}, []string{"error"})
|
|
searchLatency = promauto.NewHistogram(prometheus.HistogramOpts{
|
|
Name: "src_gitserver_search_latency_seconds",
|
|
Help: "gitserver.Search latency (time until first result is sent) in seconds.",
|
|
Buckets: []float64{0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30},
|
|
})
|
|
|
|
pendingClones = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "src_gitserver_clone_queue",
|
|
Help: "number of repos waiting to be cloned.",
|
|
})
|
|
lsRemoteQueue = promauto.NewGauge(prometheus.GaugeOpts{
|
|
Name: "src_gitserver_lsremote_queue",
|
|
Help: "number of repos waiting to check existence on remote code host (git ls-remote).",
|
|
})
|
|
repoClonedCounter = promauto.NewCounter(prometheus.CounterOpts{
|
|
Name: "src_gitserver_repo_cloned",
|
|
Help: "number of successful git clones run",
|
|
})
|
|
repoCloneFailedCounter = promauto.NewCounter(prometheus.CounterOpts{
|
|
Name: "src_gitserver_repo_cloned_failed",
|
|
Help: "number of failed git clones",
|
|
})
|
|
)
|
|
|
|
// Send 1 in 16 events to honeycomb. This is hardcoded since we only use this
|
|
// for Sourcegraph.com.
|
|
//
|
|
// 2020-05-29 1 in 4. We are currently at the top tier for honeycomb (before
|
|
// enterprise) and using double our quota. This gives us room to grow. If you
|
|
// find we keep bumping this / missing data we care about we can look into
|
|
// more dynamic ways to sample in our application code.
|
|
//
|
|
// 2020-07-20 1 in 16. Again hitting very high usage. Likely due to recent
|
|
// scaling up of the indexed search cluster. Will require more investigation,
|
|
// but we should probably segment user request path traffic vs internal batch
|
|
// traffic.
|
|
//
|
|
// 2020-11-02 Dynamically sample. Again hitting very high usage. Same root
|
|
// cause as before, scaling out indexed search cluster. We update our sampling
|
|
// to instead be dynamic, since "rev-parse" is 12 times more likely than the
|
|
// next most common command.
|
|
//
|
|
// 2021-08-20 over two hours we did 128 * 128 * 1e6 rev-parse requests
|
|
// internally. So we update our sampling to heavily downsample internal
|
|
// rev-parse, while upping our sampling for non-internal.
|
|
// https://ui.honeycomb.io/sourcegraph/datasets/gitserver-exec/result/67e4bLvUddg
|
|
func honeySampleRate(cmd string, actor *actor.Actor) uint {
|
|
// HACK(keegan) 2022-11-02 IsInternal on sourcegraph.com is always
|
|
// returning false. For now I am also marking it internal if UID is not
|
|
// set to work around us hammering honeycomb.
|
|
internal := actor.IsInternal() || actor.UID == 0
|
|
switch {
|
|
case cmd == "rev-parse" && internal:
|
|
return 1 << 14 // 16384
|
|
|
|
case internal:
|
|
// we care more about user requests, so downsample internal more.
|
|
return 16
|
|
|
|
default:
|
|
return 8
|
|
}
|
|
}
|
|
|
|
var headBranchPattern = lazyregexp.New(`HEAD branch: (.+?)\n`)
|
|
|
|
func (s *Server) doRepoUpdate(ctx context.Context, repo api.RepoName, revspec string) error {
|
|
span, ctx := ot.StartSpanFromContext(ctx, "Server.doRepoUpdate") //nolint:staticcheck // OT is deprecated
|
|
span.SetTag("repo", repo)
|
|
defer span.Finish()
|
|
|
|
if msg, ok := isPaused(filepath.Join(s.ReposDir, string(protocol.NormalizeRepo(repo)))); ok {
|
|
s.Logger.Warn("doRepoUpdate paused", log.String("repo", string(repo)), log.String("reason", msg))
|
|
return nil
|
|
}
|
|
|
|
s.repoUpdateLocksMu.Lock()
|
|
l, ok := s.repoUpdateLocks[repo]
|
|
if !ok {
|
|
l = &locks{
|
|
once: new(sync.Once),
|
|
mu: new(sync.Mutex),
|
|
}
|
|
s.repoUpdateLocks[repo] = l
|
|
}
|
|
once := l.once
|
|
mu := l.mu
|
|
s.repoUpdateLocksMu.Unlock()
|
|
|
|
// doBackgroundRepoUpdate can block longer than our context deadline. done will
|
|
// close when its done. We can return when either done is closed or our
|
|
// deadline has passed.
|
|
done := make(chan struct{})
|
|
err := errors.New("another operation is already in progress")
|
|
go func() {
|
|
defer close(done)
|
|
once.Do(func() {
|
|
mu.Lock() // Prevent multiple updates in parallel. It works fine, but it wastes resources.
|
|
defer mu.Unlock()
|
|
|
|
s.repoUpdateLocksMu.Lock()
|
|
l.once = new(sync.Once) // Make new requests wait for next update.
|
|
s.repoUpdateLocksMu.Unlock()
|
|
|
|
err = s.doBackgroundRepoUpdate(repo, revspec)
|
|
if err != nil {
|
|
// We don't want to spam our logs when the rate limiter has been set to block all
|
|
// updates
|
|
if !errors.Is(err, ratelimit.ErrBlockAll) {
|
|
s.Logger.Error("performing background repo update", log.Error(err))
|
|
}
|
|
|
|
// The repo update might have failed due to the repo being corrupt
|
|
var gitErr *GitCommandError
|
|
if errors.As(err, &gitErr) {
|
|
s.logIfCorrupt(ctx, repo, s.dir(repo), gitErr.Output)
|
|
}
|
|
}
|
|
s.setLastErrorNonFatal(s.ctx, repo, err)
|
|
})
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
return errors.Wrapf(err, "repo %s:", repo)
|
|
case <-ctx.Done():
|
|
span.LogFields(otlog.String("event", "context canceled"))
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
|
|
var doBackgroundRepoUpdateMock func(api.RepoName) error
|
|
|
|
func (s *Server) doBackgroundRepoUpdate(repo api.RepoName, revspec string) error {
|
|
logger := s.Logger.Scoped("backgroundRepoUpdate", "").With(log.String("repo", string(repo)))
|
|
|
|
if doBackgroundRepoUpdateMock != nil {
|
|
return doBackgroundRepoUpdateMock(repo)
|
|
}
|
|
// background context.
|
|
ctx, cancel1 := s.serverContext()
|
|
defer cancel1()
|
|
|
|
// ensure the background update doesn't hang forever
|
|
ctx, cancel2 := context.WithTimeout(ctx, conf.GitLongCommandTimeout())
|
|
defer cancel2()
|
|
|
|
// This background process should use our internal actor
|
|
ctx = actor.WithInternalActor(ctx)
|
|
|
|
ctx, cancel2, err := s.acquireCloneLimiter(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer cancel2()
|
|
|
|
if err = s.rpsLimiter.Wait(ctx); err != nil {
|
|
return err
|
|
}
|
|
|
|
repo = protocol.NormalizeRepo(repo)
|
|
dir := s.dir(repo)
|
|
|
|
remoteURL, err := s.getRemoteURL(ctx, repo)
|
|
if err != nil {
|
|
return errors.Wrap(err, "failed to determine Git remote URL")
|
|
}
|
|
|
|
syncer, err := s.GetVCSSyncer(ctx, repo)
|
|
if err != nil {
|
|
return errors.Wrap(err, "get VCS syncer")
|
|
}
|
|
|
|
// drop temporary pack files after a fetch. this function won't
|
|
// return until this fetch has completed or definitely-failed,
|
|
// either way they can't still be in use. we don't care exactly
|
|
// when the cleanup happens, just that it does.
|
|
defer s.cleanTmpFiles(dir)
|
|
|
|
err = syncer.Fetch(ctx, remoteURL, dir, revspec)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to fetch repo %q", repo)
|
|
}
|
|
|
|
removeBadRefs(ctx, dir)
|
|
|
|
if err := setHEAD(ctx, logger, s.recordingCommandFactory, dir, syncer, remoteURL); err != nil {
|
|
return errors.Wrapf(err, "failed to ensure HEAD exists for repo %q", repo)
|
|
}
|
|
|
|
if err := setRepositoryType(dir, syncer.Type()); err != nil {
|
|
return errors.Wrapf(err, "failed to set repository type for repo %q", repo)
|
|
}
|
|
|
|
// Update the last-changed stamp on disk.
|
|
if err := setLastChanged(logger, dir); err != nil {
|
|
logger.Warn("failed to update last changed time", log.Error(err))
|
|
}
|
|
|
|
// Successfully updated, best-effort updating of db fetch state based on
|
|
// disk state.
|
|
if err := s.setLastFetched(ctx, repo); err != nil {
|
|
logger.Warn("failed to set last_fetched in DB", log.Error(err))
|
|
}
|
|
|
|
// Successfully updated, best-effort calculation of the repo size.
|
|
if err := s.setRepoSize(ctx, repo); err != nil {
|
|
logger.Warn("failed to set repo size", log.Error(err))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// older versions of git do not remove tags case insensitively, so we generate
|
|
// every possible case of HEAD (2^4 = 16)
|
|
var badRefs = syncx.OnceValue(func() []string {
|
|
refs := make([]string, 0, 1<<4)
|
|
for bits := uint8(0); bits < (1 << 4); bits++ {
|
|
s := []byte("HEAD")
|
|
for i, c := range s {
|
|
// lowercase if the i'th bit of bits is 1
|
|
if bits&(1<<i) != 0 {
|
|
s[i] = c - 'A' + 'a'
|
|
}
|
|
}
|
|
refs = append(refs, string(s))
|
|
}
|
|
return refs
|
|
})
|
|
|
|
// removeBadRefs removes bad refs and tags from the git repo at dir. This
|
|
// should be run after a clone or fetch. If your repository contains a ref or
|
|
// tag called HEAD (case insensitive), most commands will output a warning
|
|
// from git:
|
|
//
|
|
// warning: refname 'HEAD' is ambiguous.
|
|
//
|
|
// Instead we just remove this ref.
|
|
func removeBadRefs(ctx context.Context, dir GitDir) {
|
|
args := append([]string{"branch", "-D"}, badRefs()...)
|
|
cmd := exec.CommandContext(ctx, "git", args...)
|
|
dir.Set(cmd)
|
|
_ = cmd.Run()
|
|
|
|
args = append([]string{"tag", "-d"}, badRefs()...)
|
|
cmd = exec.CommandContext(ctx, "git", args...)
|
|
dir.Set(cmd)
|
|
_ = cmd.Run()
|
|
}
|
|
|
|
// ensureHEAD verifies that there is a HEAD file within the repo, and that it
|
|
// is of non-zero length. If either condition is met, we configure a
|
|
// best-effort default.
|
|
func ensureHEAD(dir GitDir) {
|
|
head, err := os.Stat(dir.Path("HEAD"))
|
|
if os.IsNotExist(err) || head.Size() == 0 {
|
|
os.WriteFile(dir.Path("HEAD"), []byte("ref: refs/heads/master"), 0o600)
|
|
}
|
|
}
|
|
|
|
// setHEAD configures git repo defaults (such as what HEAD is) which are
|
|
// needed for git commands to work.
|
|
func setHEAD(ctx context.Context, logger log.Logger, rf *wrexec.RecordingCommandFactory, dir GitDir, syncer VCSSyncer, remoteURL *vcs.URL) error {
|
|
// Verify that there is a HEAD file within the repo, and that it is of
|
|
// non-zero length.
|
|
ensureHEAD(dir)
|
|
|
|
// Fallback to git's default branch name if git remote show fails.
|
|
headBranch := "master"
|
|
|
|
// try to fetch HEAD from origin
|
|
cmd, err := syncer.RemoteShowCommand(ctx, remoteURL)
|
|
if err != nil {
|
|
return errors.Wrap(err, "get remote show command")
|
|
}
|
|
dir.Set(cmd)
|
|
output, err := runWith(ctx, rf.Wrap(ctx, logger, cmd), true, nil)
|
|
if err != nil {
|
|
logger.Error("Failed to fetch remote info", log.Error(err), log.String("output", string(output)))
|
|
return errors.Wrap(err, "failed to fetch remote info")
|
|
}
|
|
|
|
submatches := headBranchPattern.FindSubmatch(output)
|
|
if len(submatches) == 2 {
|
|
submatch := string(submatches[1])
|
|
if submatch != "(unknown)" {
|
|
headBranch = submatch
|
|
}
|
|
}
|
|
|
|
// check if branch pointed to by HEAD exists
|
|
cmd = exec.CommandContext(ctx, "git", "rev-parse", headBranch, "--")
|
|
dir.Set(cmd)
|
|
if err := cmd.Run(); err != nil {
|
|
// branch does not exist, pick first branch
|
|
cmd := exec.CommandContext(ctx, "git", "branch")
|
|
dir.Set(cmd)
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
logger.Error("Failed to list branches", log.Error(err), log.String("output", string(output)))
|
|
return errors.Wrap(err, "failed to list branches")
|
|
}
|
|
lines := strings.Split(string(output), "\n")
|
|
branch := strings.TrimPrefix(strings.TrimPrefix(lines[0], "* "), " ")
|
|
if branch != "" {
|
|
headBranch = branch
|
|
}
|
|
}
|
|
|
|
// set HEAD
|
|
cmd = exec.CommandContext(ctx, "git", "symbolic-ref", "HEAD", "refs/heads/"+headBranch)
|
|
dir.Set(cmd)
|
|
if output, err := cmd.CombinedOutput(); err != nil {
|
|
logger.Error("Failed to set HEAD", log.Error(err), log.String("output", string(output)))
|
|
return errors.Wrap(err, "Failed to set HEAD")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// setLastChanged discerns an approximate last-changed timestamp for a
|
|
// repository. This can be approximate; it's used to determine how often we
|
|
// should run `git fetch`, but is not relied on strongly. The basic plan
|
|
// is as follows: If a repository has never had a timestamp before, we
|
|
// guess that the right stamp is *probably* the timestamp of the most
|
|
// chronologically-recent commit. If there are no commits, we just use the
|
|
// current time because that's probably usually a temporary state.
|
|
//
|
|
// If a timestamp already exists, we want to update it if and only if
|
|
// the set of references (as determined by `git show-ref`) has changed.
|
|
//
|
|
// To accomplish this, we assert that the file `sg_refhash` in the git
|
|
// directory should, if it exists, contain a hash of the output of
|
|
// `git show-ref`, and have a timestamp of "the last time this changed",
|
|
// except that if we're creating that file for the first time, we set
|
|
// it to the timestamp of the top commit. We then compute the hash of
|
|
// the show-ref output, and store it in the file if and only if it's
|
|
// different from the current contents.
|
|
//
|
|
// If show-ref fails, we use rev-list to determine whether that's just
|
|
// an empty repository (not an error) or some kind of actual error
|
|
// that is possibly causing our data to be incorrect, which should
|
|
// be reported.
|
|
func setLastChanged(logger log.Logger, dir GitDir) error {
|
|
hashFile := dir.Path("sg_refhash")
|
|
|
|
hash, err := computeRefHash(dir)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "computeRefHash failed for %s", dir)
|
|
}
|
|
|
|
var stamp time.Time
|
|
if _, err := os.Stat(hashFile); os.IsNotExist(err) {
|
|
// This is the first time we are calculating the hash. Give a more
|
|
// approriate timestamp for sg_refhash than the current time.
|
|
stamp = computeLatestCommitTimestamp(logger, dir)
|
|
}
|
|
|
|
_, err = fileutil.UpdateFileIfDifferent(hashFile, hash)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to update %s", hashFile)
|
|
}
|
|
|
|
// If stamp is non-zero we have a more approriate mtime.
|
|
if !stamp.IsZero() {
|
|
err = os.Chtimes(hashFile, stamp, stamp)
|
|
if err != nil {
|
|
return errors.Wrapf(err, "failed to set mtime to the lastest commit timestamp for %s", dir)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// computeLatestCommitTimestamp returns the timestamp of the most recent
|
|
// commit if any. If there are no commits or the latest commit is in the
|
|
// future, or there is any error, time.Now is returned.
|
|
func computeLatestCommitTimestamp(logger log.Logger, dir GitDir) time.Time {
|
|
logger = logger.Scoped("computeLatestCommitTimestamp", "compute the timestamp of the most recent commit").
|
|
With(log.String("repo", string(dir)))
|
|
|
|
now := time.Now() // return current time if we don't find a more accurate time
|
|
cmd := exec.Command("git", "rev-list", "--all", "--timestamp", "-n", "1")
|
|
dir.Set(cmd)
|
|
output, err := cmd.Output()
|
|
// If we don't have a more specific stamp, we'll return the current time,
|
|
// and possibly an error.
|
|
if err != nil {
|
|
logger.Warn("failed to execute, defaulting to time.Now", log.Error(err))
|
|
return now
|
|
}
|
|
|
|
words := bytes.Split(output, []byte(" "))
|
|
// An empty rev-list output, without an error, is okay.
|
|
if len(words) < 2 {
|
|
return now
|
|
}
|
|
|
|
// We should have a timestamp and a commit hash; format is
|
|
// 1521316105 ff03fac223b7f16627b301e03bf604e7808989be
|
|
epoch, err := strconv.ParseInt(string(words[0]), 10, 64)
|
|
if err != nil {
|
|
logger.Warn("ignoring corrupted timestamp, defaulting to time.Now", log.String("timestamp", string(words[0])))
|
|
return now
|
|
}
|
|
stamp := time.Unix(epoch, 0)
|
|
if stamp.After(now) {
|
|
return now
|
|
}
|
|
return stamp
|
|
}
|
|
|
|
// computeRefHash returns a hash of the refs for dir. The hash should only
|
|
// change if the set of refs and the commits they point to change.
|
|
func computeRefHash(dir GitDir) ([]byte, error) {
|
|
// Do not use CommandContext since this is a fast operation we do not want
|
|
// to interrupt.
|
|
cmd := exec.Command("git", "show-ref")
|
|
dir.Set(cmd)
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
// Ignore the failure for an empty repository: show-ref fails with
|
|
// empty output and an exit code of 1
|
|
var e *exec.ExitError
|
|
if !errors.As(err, &e) || len(output) != 0 || len(e.Stderr) != 0 || e.Sys().(syscall.WaitStatus).ExitStatus() != 1 {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
lines := bytes.Split(output, []byte("\n"))
|
|
sort.Slice(lines, func(i, j int) bool {
|
|
return bytes.Compare(lines[i], lines[j]) < 0
|
|
})
|
|
hasher := sha256.New()
|
|
for _, b := range lines {
|
|
_, _ = hasher.Write(b)
|
|
_, _ = hasher.Write([]byte("\n"))
|
|
}
|
|
hash := make([]byte, hex.EncodedLen(hasher.Size()))
|
|
hex.Encode(hash, hasher.Sum(nil))
|
|
return hash, nil
|
|
}
|
|
|
|
func (s *Server) ensureRevision(ctx context.Context, repo api.RepoName, rev string, repoDir GitDir) (didUpdate bool) {
|
|
if rev == "" || rev == "HEAD" {
|
|
return false
|
|
}
|
|
if conf.Get().DisableAutoGitUpdates {
|
|
// ensureRevision may kick off a git fetch operation which we don't want if we've
|
|
// configured DisableAutoGitUpdates.
|
|
return false
|
|
}
|
|
|
|
// rev-parse on an OID does not check if the commit actually exists, so it always
|
|
// works. So we append ^0 to force the check
|
|
if isAbsoluteRevision(rev) {
|
|
rev = rev + "^0"
|
|
}
|
|
cmd := exec.Command("git", "rev-parse", rev, "--")
|
|
repoDir.Set(cmd)
|
|
if err := cmd.Run(); err == nil {
|
|
return false
|
|
}
|
|
// Revision not found, update before returning.
|
|
err := s.doRepoUpdate(ctx, repo, rev)
|
|
if err != nil {
|
|
s.Logger.Warn("failed to perform background repo update", log.Error(err), log.String("repo", string(repo)), log.String("rev", rev))
|
|
}
|
|
return true
|
|
}
|
|
|
|
const headFileRefPrefix = "ref: "
|
|
|
|
// quickSymbolicRefHead best-effort mimics the execution of `git symbolic-ref HEAD`, but doesn't exec a child process.
|
|
// It just reads the .git/HEAD file from the bare git repository directory.
|
|
func quickSymbolicRefHead(dir GitDir) (string, error) {
|
|
// See if HEAD contains a commit hash and fail if so.
|
|
head, err := os.ReadFile(dir.Path("HEAD"))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
head = bytes.TrimSpace(head)
|
|
if isAbsoluteRevision(string(head)) {
|
|
return "", errors.New("ref HEAD is not a symbolic ref")
|
|
}
|
|
|
|
// HEAD doesn't contain a commit hash. It contains something like "ref: refs/heads/master".
|
|
if !bytes.HasPrefix(head, []byte(headFileRefPrefix)) {
|
|
return "", errors.New("unrecognized HEAD file format")
|
|
}
|
|
headRef := bytes.TrimPrefix(head, []byte(headFileRefPrefix))
|
|
return string(headRef), nil
|
|
}
|
|
|
|
// quickRevParseHead best-effort mimics the execution of `git rev-parse HEAD`, but doesn't exec a child process.
|
|
// It just reads the relevant files from the bare git repository directory.
|
|
func quickRevParseHead(dir GitDir) (string, error) {
|
|
// See if HEAD contains a commit hash and return it if so.
|
|
head, err := os.ReadFile(dir.Path("HEAD"))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
head = bytes.TrimSpace(head)
|
|
if h := string(head); isAbsoluteRevision(h) {
|
|
return h, nil
|
|
}
|
|
|
|
// HEAD doesn't contain a commit hash. It contains something like "ref: refs/heads/master".
|
|
if !bytes.HasPrefix(head, []byte(headFileRefPrefix)) {
|
|
return "", errors.New("unrecognized HEAD file format")
|
|
}
|
|
// Look for the file in refs/heads. If it exists, it contains the commit hash.
|
|
headRef := bytes.TrimPrefix(head, []byte(headFileRefPrefix))
|
|
if bytes.HasPrefix(headRef, []byte("../")) || bytes.Contains(headRef, []byte("/../")) || bytes.HasSuffix(headRef, []byte("/..")) {
|
|
// 🚨 SECURITY: prevent leakage of file contents outside repo dir
|
|
return "", errors.Errorf("invalid ref format: %s", headRef)
|
|
}
|
|
headRefFile := dir.Path(filepath.FromSlash(string(headRef)))
|
|
if refs, err := os.ReadFile(headRefFile); err == nil {
|
|
return string(bytes.TrimSpace(refs)), nil
|
|
}
|
|
|
|
// File didn't exist in refs/heads. Look for it in packed-refs.
|
|
f, err := os.Open(dir.Path("packed-refs"))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer f.Close()
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
fields := bytes.Fields(scanner.Bytes())
|
|
if len(fields) != 2 {
|
|
continue
|
|
}
|
|
commit, ref := fields[0], fields[1]
|
|
if bytes.Equal(ref, headRef) {
|
|
return string(commit), nil
|
|
}
|
|
}
|
|
if err := scanner.Err(); err != nil {
|
|
return "", err
|
|
}
|
|
|
|
// Didn't find the refs/heads/$HEAD_BRANCH in packed_refs
|
|
return "", errors.New("could not compute `git rev-parse HEAD` in-process, try running `git` process")
|
|
}
|
|
|
|
// errorString returns the error string. If err is nil it returns the empty
|
|
// string.
|
|
func errorString(err error) string {
|
|
if err == nil {
|
|
return ""
|
|
}
|
|
return err.Error()
|
|
}
|
|
|
|
// IsAbsoluteRevision checks if the revision is a git OID SHA string.
|
|
//
|
|
// Note: This doesn't mean the SHA exists in a repository, nor does it mean it
|
|
// isn't a ref. Git allows 40-char hexadecimal strings to be references.
|
|
//
|
|
// copied from internal/vcs/git to avoid cyclic import
|
|
func isAbsoluteRevision(s string) bool {
|
|
if len(s) != 40 {
|
|
return false
|
|
}
|
|
for _, r := range s {
|
|
if !(('0' <= r && r <= '9') ||
|
|
('a' <= r && r <= 'f') ||
|
|
('A' <= r && r <= 'F')) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|