prevent disk overflow on gitserver (#3675)

This commit is contained in:
Issac Trotts 2019-05-01 00:15:54 -07:00 committed by GitHub
parent 12796f388b
commit cd9cc2f6be
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 322 additions and 55 deletions

View File

@ -12,8 +12,8 @@ import (
"time"
"github.com/opentracing-contrib/go-stdlib/nethttp"
opentracing "github.com/opentracing/opentracing-go"
log15 "gopkg.in/inconshreveable/log15.v2"
"github.com/opentracing/opentracing-go"
"gopkg.in/inconshreveable/log15.v2"
"github.com/sourcegraph/sourcegraph/cmd/gitserver/server"
"github.com/sourcegraph/sourcegraph/pkg/debugserver"
@ -21,11 +21,11 @@ import (
"github.com/sourcegraph/sourcegraph/pkg/tracer"
)
const janitorInterval = 24 * time.Hour
var (
reposDir = env.Get("SRC_REPOS_DIR", "/data/repos", "Root dir containing repos.")
runRepoCleanup, _ = strconv.ParseBool(env.Get("SRC_RUN_REPO_CLEANUP", "", "Periodically remove inactive repositories."))
wantFreeG = env.Get("SRC_REPOS_DESIRED_FREE_GB", "10", "How many gigabytes of space to keep free on the disk with the repos")
janitorInterval = env.Get("SRC_REPOS_JANITOR_INTERVAL", "1m", "Interval between cleanup runs")
)
func main() {
@ -40,9 +40,14 @@ func main() {
log.Fatalf("failed to create SRC_REPOS_DIR: %s", err)
}
wantFreeG2, err := strconv.Atoi(wantFreeG)
if err != nil {
log.Fatalf("parsing $SRC_REPOS_DESIRED_FREE_GB: %v", err)
}
gitserver := server.Server{
ReposDir: reposDir,
DeleteStaleRepositories: runRepoCleanup,
DesiredFreeDiskSpace: uint64(wantFreeG2 * 1024 * 1024 * 1024),
}
gitserver.RegisterMetrics()
@ -59,10 +64,14 @@ func main() {
go debugserver.Start()
janitorInterval2, err := time.ParseDuration(janitorInterval)
if err != nil {
log.Fatalf("parsing $SRC_REPOS_JANITOR_INTERVAL: %v", err)
}
go func() {
for {
gitserver.Janitor()
time.Sleep(janitorInterval)
time.Sleep(janitorInterval2)
}
}()

View File

@ -2,24 +2,26 @@ package server
import (
"context"
"fmt"
"io/ioutil"
"math/rand"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"syscall"
"time"
multierror "github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-multierror"
"github.com/pkg/errors"
"github.com/sourcegraph/sourcegraph/pkg/api"
"github.com/sourcegraph/sourcegraph/pkg/gitserver/protocol"
"github.com/prometheus/client_golang/prometheus"
log15 "gopkg.in/inconshreveable/log15.v2"
"gopkg.in/inconshreveable/log15.v2"
)
func init() {
@ -27,9 +29,6 @@ func init() {
prometheus.MustRegister(reposRecloned)
}
// inactiveRepoTTL is the amount of time a repository will remain on a
// gitserver without being updated before it is removed.
const inactiveRepoTTL = time.Hour * 24 * 20
const repoTTL = time.Hour * 24 * 45
var reposRemoved = prometheus.NewCounter(prometheus.CounterOpts{
@ -71,30 +70,6 @@ func (s *Server) cleanupRepos() {
return true, nil
}
maybeRemoveInactive := func(gitDir string) (done bool, err error) {
// We rewrite the HEAD file whenever we update a repo, and repos are
// updated in response to user traffic. Check to see the last time
// HEAD was rewritten to determine whether to consider this repo
// inactive. Note: This is only accurate for installations which set
// disableAutoGitUpdates=true. This is true for sourcegraph.com and
// maybeRemoveInactive should only be run for sourcegraph.com
head, err := os.Stat(filepath.Join(gitDir, "HEAD"))
if err != nil {
return false, err
}
lastUpdated := head.ModTime()
if time.Since(lastUpdated) <= inactiveRepoTTL {
return false, nil
}
log15.Info("removing inactive repo", "repo", gitDir)
if err := s.removeRepoDirectory(gitDir); err != nil {
return true, err
}
reposRemoved.Inc()
return true, nil
}
ensureGitAttributes := func(gitDir string) (done bool, err error) {
return false, setGitAttributes(gitDir)
}
@ -180,20 +155,13 @@ func (s *Server) cleanupRepos() {
// info/attributes.
{"ensure git attributes", ensureGitAttributes},
}
if s.DeleteStaleRepositories {
// Sourcegraph.com can potentially clone all of github.com, so we
// delete repos which have not been used for a period of
// time. s.DeleteStaleRepositories should only be true for
// sourcegraph.com.
cleanups = append(cleanups, cleanupFn{"maybe remove inactive", maybeRemoveInactive})
}
// Old git clones accumulate loose git objects that waste space and
// slow down git operations. Periodically do a fresh clone to avoid
// these problems. git gc is slow and resource intensive. It is
// cheaper and faster to just reclone the repository.
cleanups = append(cleanups, cleanupFn{"maybe reclone", maybeReclone})
filepath.Walk(s.ReposDir, func(gitDir string, fi os.FileInfo, fileErr error) error {
err := filepath.Walk(s.ReposDir, func(gitDir string, fi os.FileInfo, fileErr error) error {
if fileErr != nil {
return nil
}
@ -221,6 +189,190 @@ func (s *Server) cleanupRepos() {
}
return filepath.SkipDir
})
if err != nil {
log15.Error("cleanup: error iterating over repositories", "error", err)
}
actualFreeBytes, err := s.bytesFreeOnDisk()
if err != nil {
log15.Error("cleanup: finding the amount of space free on disk", "error", err)
return
}
howManyBytesToFree := int64(s.DesiredFreeDiskSpace) - int64(actualFreeBytes)
if err := s.freeUpSpace(howManyBytesToFree); err != nil {
log15.Error("cleanup: error freeing up space", "error", err)
}
}
// bytesFreeOnDisk tells how much space is available on the disk containing s.ReposDir.
func (s *Server) bytesFreeOnDisk() (uint64, error) {
var fs syscall.Statfs_t
mp, err := findMountPoint(s.ReposDir)
if err != nil {
return 0, errors.Wrap(err, "finding mount point for dir containing repos")
}
if err := syscall.Statfs(mp, &fs); err != nil {
return 0, errors.Wrap(err, "finding out how much disk space is free")
}
free := fs.Bavail * uint64(fs.Bsize)
log15.Info("computed free space", "repo dir", s.ReposDir, "mount point", mp, "free space bytes", free)
return free, nil
}
// findMountPoint searches upwards starting from the directory d to find the mount point.
func findMountPoint(d string) (string, error) {
d, err := filepath.Abs(d)
if err != nil {
return "", errors.Wrapf(err, "getting absolute version of %s", d)
}
for {
m, err := isMount(d)
if err != nil {
return "", errors.Wrapf(err, "finding out if %s is a mount point", d)
}
if m {
return d, nil
}
d2 := filepath.Dir(d)
if d2 == d {
return d2, nil
}
d = d2
}
}
// isMount tells whether the directory d is a mount point.
func isMount(d string) (bool, error) {
ddev, err := device(d)
if err != nil {
return false, errors.Wrapf(err, "gettting device id for %s", d)
}
parent := filepath.Dir(d)
if parent == d {
return true, nil
}
pdev, err := device(parent)
if err != nil {
return false, errors.Wrapf(err, "getting device id for %s", parent)
}
return pdev != ddev, nil
}
// device gets the device id of a file f.
func device(f string) (int64, error) {
fi, err := os.Stat(f)
if err != nil {
return 0, errors.Wrapf(err, "running stat on %s", f)
}
stat, ok := fi.Sys().(*syscall.Stat_t)
if !ok {
return 0, fmt.Errorf("failed to get stat details for %s", f)
}
return int64(stat.Dev), nil
}
// freeUpSpace removes git directories under ReposDir, in order from least
// recently to most recently used, until it has freed howManyBytesToFree.
func (s *Server) freeUpSpace(howManyBytesToFree int64) error {
if howManyBytesToFree <= 0 {
log15.Info("cleanup: skipping repository cleanup, don't need to free disk space", "howManyBytesToFree", howManyBytesToFree)
return nil
}
// Get the git directories and their mod times.
gitDirs, err := s.findGitDirs(s.ReposDir)
if err != nil {
return errors.Wrap(err, "finding git dirs")
}
dirModTimes := make(map[string]time.Time, len(gitDirs))
for _, d := range gitDirs {
mt, err := gitDirModTime(d)
if err != nil {
return errors.Wrap(err, "computing mod time of git dir")
}
dirModTimes[d] = mt
}
// Sort the repos from least to most recently used.
sort.Slice(gitDirs, func(i, j int) bool {
return dirModTimes[gitDirs[i]].Before(dirModTimes[gitDirs[j]])
})
// Remove repos until howManyBytesToFree is met or exceeded.
var spaceFreed int64
for _, d := range gitDirs {
delta, err := dirSize(d)
if err != nil {
return errors.Wrapf(err, "computing size of directory %s", d)
}
gitDirParent := filepath.Dir(d)
log15.Info("cleanup: removing repo dir that hasn't been used in a while", "repodir", d, "howlong", time.Since(dirModTimes[d]))
if err := os.RemoveAll(gitDirParent); err != nil {
return errors.Wrap(err, "removing repo directory")
}
spaceFreed += delta
if spaceFreed >= howManyBytesToFree {
return nil
}
}
// Check.
if spaceFreed < howManyBytesToFree {
return fmt.Errorf("only freed %d bytes, wanted to free %d", spaceFreed, howManyBytesToFree)
}
return nil
}
func gitDirModTime(d string) (time.Time, error) {
head, err := os.Stat(filepath.Join(d, "HEAD"))
if err != nil {
return time.Time{}, errors.Wrap(err, "getting repository modification time")
}
return head.ModTime(), nil
}
// findGitDirs returns the .git directories below d.
func (s *Server) findGitDirs(d string) ([]string, error) {
var dirs []string
err := filepath.Walk(d, func(path string, fi os.FileInfo, fileErr error) error {
if fileErr != nil {
return nil
}
if s.ignorePath(path) {
if fi.IsDir() {
return filepath.SkipDir
}
return nil
}
if !fi.IsDir() || fi.Name() != ".git" {
return nil
}
dirs = append(dirs, path)
return nil
})
if err != nil {
return nil, errors.Wrapf(err, "walking dir tree from %s to find git dirs", d)
}
return dirs, nil
}
// dirSize returns the total size in bytes of all the files under d.
func dirSize(d string) (int64, error) {
var size int64
err := filepath.Walk(d, func(path string, fi os.FileInfo, fileErr error) error {
if fileErr != nil {
return nil
}
if fi.IsDir() {
return nil
}
size += fi.Size()
return nil
})
if err != nil {
return 0, errors.Wrapf(err, "walking dir tree from %s to find size", d)
}
return size, nil
}
// removeRepoDirectory atomically removes a directory from s.ReposDir.
@ -360,7 +512,7 @@ func (s *Server) SetupAndClearTmp() (string, error) {
}
go func(path string) {
if err := os.RemoveAll(path); err != nil {
log15.Error("failed to remove old temporary directory", "path", path, "error", err)
log15.Error("cleanup: failed to remove old temporary directory", "path", path, "error", err)
}
}(filepath.Join(s.ReposDir, f.Name()))
}

View File

@ -14,6 +14,8 @@ import (
"strings"
"testing"
"time"
"github.com/pkg/errors"
)
const (
@ -34,19 +36,10 @@ func TestCleanupInactive(t *testing.T) {
if err := cmd.Run(); err != nil {
t.Fatal(err)
}
repoB := path.Join(root, testRepoB, ".git")
cmd = exec.Command("git", "--bare", "init", repoB)
if err := cmd.Run(); err != nil {
t.Fatal(err)
}
repoC := path.Join(root, testRepoC, ".git")
if err := os.MkdirAll(repoC, os.ModePerm); err != nil {
t.Fatal(err)
}
filepath.Walk(repoB, func(p string, _ os.FileInfo, _ error) error {
// Rollback the mtime for these files to simulate an old repo.
return os.Chtimes(p, time.Now().Add(-inactiveRepoTTL-time.Hour), time.Now().Add(-inactiveRepoTTL-time.Hour))
})
s := &Server{ReposDir: root, DeleteStaleRepositories: true}
s.Handler() // Handler as a side-effect sets up Server
@ -55,9 +48,6 @@ func TestCleanupInactive(t *testing.T) {
if _, err := os.Stat(repoA); os.IsNotExist(err) {
t.Error("expected repoA not to be removed")
}
if _, err := os.Stat(repoB); err == nil {
t.Error("expected repoB to be removed during clean up")
}
if _, err := os.Stat(repoC); err == nil {
t.Error("expected corrupt repoC to be removed during clean up")
}
@ -409,3 +399,116 @@ func isEmptyDir(path string) (bool, error) {
}
return false, err
}
func TestFreeUpSpace(t *testing.T) {
t.Run("no error if no space requested and no repos", func(t *testing.T) {
s := &Server{}
if err := s.freeUpSpace(0); err != nil {
t.Fatal(err)
}
})
t.Run("error if space requested and no repos", func(t *testing.T) {
s := &Server{}
if err := s.freeUpSpace(1); err == nil {
t.Fatal("want error")
}
})
t.Run("oldest repo gets removed to free up space", func(t *testing.T) {
// Set up.
rd, err := ioutil.TempDir("", "freeUpSpace")
if err != nil {
t.Fatal(err)
}
r1 := filepath.Join(rd, "repo1")
r2 := filepath.Join(rd, "repo2")
if err := makeFakeRepo(r1, 1000); err != nil {
t.Fatal(err)
}
if err := makeFakeRepo(r2, 1000); err != nil {
t.Fatal(err)
}
// Force the modification time of r2 to be after that of r1.
fi1, err := os.Stat(r1)
if err != nil {
t.Fatal(err)
}
mtime2 := fi1.ModTime().Add(time.Second)
if err := os.Chtimes(r2, time.Now(), mtime2); err != nil {
t.Fatal(err)
}
// Run.
s := Server{
ReposDir: rd,
}
if err := s.freeUpSpace(1000); err != nil {
t.Fatal(err)
}
// Check.
files, err := ioutil.ReadDir(rd)
if err != nil {
t.Fatal(err)
}
if len(files) != 1 {
t.Fatalf("got %d items in %s, want exactly 1", len(files), rd)
}
if files[0].Name() != "repo2" {
t.Errorf("name of only item in repos dir is %q, want repo2", files[0].Name())
}
rds, err := dirSize(rd)
if err != nil {
t.Fatal(err)
}
wantSize := int64(1000)
if rds > wantSize {
t.Errorf("repo dir size is %d, want no more than %d", rds, wantSize)
}
})
}
func makeFakeRepo(d string, sizeBytes int) error {
gd := filepath.Join(d, ".git")
if err := os.MkdirAll(gd, 0700); err != nil {
return errors.Wrap(err, "creating .git dir and any parents")
}
if err := ioutil.WriteFile(filepath.Join(gd, "HEAD"), nil, 0666); err != nil {
return errors.Wrap(err, "creating HEAD file")
}
if err := ioutil.WriteFile(filepath.Join(gd, "space_eater"), make([]byte, sizeBytes), 0666); err != nil {
return errors.Wrapf(err, "writing to space_eater file")
}
return nil
}
func Test_findMountPoint(t *testing.T) {
type args struct {
d string
}
tests := []struct {
name string
args args
want string
wantErr bool
}{
{
name: "mount point of root is root",
args: args{d: "/"},
want: "/",
wantErr: false,
},
// What else can we portably count on?
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := findMountPoint(tt.args.d)
if (err != nil) != tt.wantErr {
t.Errorf("findMountPoint() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("findMountPoint() = %v, want %v", got, tt.want)
}
})
}
}

View File

@ -111,6 +111,9 @@ type Server struct {
// Janitor job runs.
DeleteStaleRepositories bool
// DesiredFreeDiskSpace is how much space we need to keep free in bytes.
DesiredFreeDiskSpace uint64
// skipCloneForTests is set by tests to avoid clones.
skipCloneForTests bool