codeintel: Send tar files via SendDB (#11074)

This commit is contained in:
Eric Fritz 2020-06-01 13:46:47 -05:00 committed by GitHub
parent fc15df5136
commit 5ff1e04154
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 267 additions and 146 deletions

1
.github/CODEOWNERS vendored
View File

@ -46,6 +46,7 @@
/internal/src-cli @efritz
/internal/linkheader @efritz
/internal/sqliteutil @efritz
/internal/tar @efritz
/renovate.json @felixfbecker
/.stylelintrc.json @felixfbecker
/.stylelintignore @felixfbecker

View File

@ -47,7 +47,7 @@ func makeFileWithSize(path string, size int) error {
return err
}
return ioutil.WriteFile(path, make([]byte, size), 0600)
return ioutil.WriteFile(path, make([]byte, size), os.ModePerm)
}
func getFilenames(root string) ([]string, error) {

View File

@ -7,7 +7,6 @@ import (
"io"
"net/http"
"os"
"path/filepath"
"strings"
"github.com/gorilla/mux"
@ -22,6 +21,7 @@ import (
"github.com/sourcegraph/sourcegraph/internal/codeintel/bundles/persistence"
sqlitereader "github.com/sourcegraph/sourcegraph/internal/codeintel/bundles/persistence/sqlite"
"github.com/sourcegraph/sourcegraph/internal/codeintel/bundles/types"
"github.com/sourcegraph/sourcegraph/internal/tar"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
)
@ -127,19 +127,20 @@ func (s *Server) handlePostDatabasePart(w http.ResponseWriter, r *http.Request)
// POST /dbs/{id:[0-9]+}/stitch
func (s *Server) handlePostDatabaseStitch(w http.ResponseWriter, r *http.Request) {
id := idFromRequest(r)
filename := paths.SQLiteDBFilename(s.bundleDir, id)
dirname := paths.DBDir(s.bundleDir, id)
makePartFilename := func(index int) string {
return paths.DBPartFilename(s.bundleDir, id, int64(index))
}
if err := os.MkdirAll(filepath.Dir(filename), os.ModePerm); err != nil {
log15.Error("Failed to create directory", "err", err)
stitchedReader, err := codeintelutils.StitchFilesReader(makePartFilename, false)
if err != nil {
log15.Error("Failed to stitch multipart database", "err", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
if err := codeintelutils.StitchFiles(filename, makePartFilename, false); err != nil {
log15.Error("Failed to stitch multipart database", "err", err)
if err := tar.Extract(dirname, stitchedReader); err != nil {
log15.Error("Failed to extract database archive", "err", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}

View File

@ -8,6 +8,7 @@ import (
"github.com/pkg/errors"
"github.com/sourcegraph/sourcegraph/internal/codeintel/db"
"github.com/sourcegraph/sourcegraph/internal/codeintel/gitserver"
"github.com/sourcegraph/sourcegraph/internal/tar"
)
func fetchRepository(ctx context.Context, db db.DB, gitserverClient gitserver.Client, repositoryID int, commit string) (string, error) {
@ -26,8 +27,8 @@ func fetchRepository(ctx context.Context, db db.DB, gitserverClient gitserver.Cl
return "", errors.Wrap(err, "gitserver.Archive")
}
if err := extractTarfile(tempDir, archive); err != nil {
return "", errors.Wrap(err, "extractTarfile")
if err := tar.Extract(tempDir, archive); err != nil {
return "", errors.Wrap(err, "tar.Extract")
}
return tempDir, nil

View File

@ -1,35 +0,0 @@
package indexer
import (
"io/ioutil"
"os"
"testing"
"github.com/google/go-cmp/cmp"
)
func TestExtractTarfile(t *testing.T) {
tempDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp dir: %s", err)
}
defer os.RemoveAll(tempDir)
tarfile, err := os.Open("./testdata/clone.tar")
if err != nil {
t.Fatalf("unexpected opening test tarfile: %s", err)
}
if err := extractTarfile(tempDir, tarfile); err != nil {
t.Fatalf("unexpected extracting tarfile: %s", err)
}
sizes, err := readFiles(tempDir)
if err != nil {
t.Fatalf("unexpected reading directory: %s", err)
}
if diff := cmp.Diff(expectedCloneTarSizes, sizes); diff != "" {
t.Errorf("unexpected commits (-want +got):\n%s", diff)
}
}

View File

@ -7,7 +7,6 @@ import (
"os"
"path/filepath"
"github.com/google/uuid"
"github.com/hashicorp/go-multierror"
"github.com/inconshreveable/log15"
"github.com/pkg/errors"
@ -33,22 +32,22 @@ type processor struct {
// process converts a raw upload into a dump within the given transaction context.
func (p *processor) Process(ctx context.Context, tx db.DB, upload db.Upload) (err error) {
// Create scratch directory that we can clean on completion/failure
name, err := ioutil.TempDir("", "")
tempDir, err := ioutil.TempDir("", "")
if err != nil {
return err
}
defer func() {
if cleanupErr := os.RemoveAll(name); cleanupErr != nil {
log15.Warn("Failed to remove temporary directory", "path", name, "err", cleanupErr)
if cleanupErr := os.RemoveAll(tempDir); cleanupErr != nil {
log15.Warn("Failed to remove temporary directory", "path", tempDir, "err", cleanupErr)
}
}()
// Create target file for converted database
uuid, err := uuid.NewRandom()
if err != nil {
return err
}
newFilename := filepath.Join(name, uuid.String())
// // Create target file for converted database
// uuid, err := uuid.NewRandom()
// if err != nil {
// return err
// }
// newFilename := filepath.Join(tempDir, uuid.String())
// Pull raw uploaded data from bundle manager
r, err := p.bundleManagerClient.GetUpload(ctx, upload.ID)
@ -64,12 +63,10 @@ func (p *processor) Process(ctx context.Context, tx db.DB, upload db.Upload) (er
}
}()
// Read raw upload and write converted database to newFilename. This process also correlates
// and returns the data we need to insert into Postgres to support cross-dump/repo queries.
packages, packageReferences, err := convert(
ctx,
r,
newFilename,
tempDir,
upload.ID,
upload.Root,
func(dirnames []string) (map[string][]string, error) {
@ -131,7 +128,7 @@ func (p *processor) Process(ctx context.Context, tx db.DB, upload db.Upload) (er
}
// Send converted database file to bundle manager
if err := p.bundleManagerClient.SendDB(ctx, upload.ID, newFilename); err != nil {
if err := p.bundleManagerClient.SendDB(ctx, upload.ID, tempDir); err != nil {
return errors.Wrap(err, "bundleManager.SendDB")
}
@ -177,20 +174,13 @@ func (p *processor) updateCommitsAndVisibility(ctx context.Context, db db.DB, re
}
// convert correlates the raw input data and commits the correlated data to disk.
func convert(
ctx context.Context,
r io.Reader,
newFilename string,
dumpID int,
root string,
getChildren existence.GetChildrenFunc,
) (_ []types.Package, _ []types.PackageReference, err error) {
func convert(ctx context.Context, r io.Reader, tempDir string, dumpID int, root string, getChildren existence.GetChildrenFunc) ([]types.Package, []types.PackageReference, error) {
groupedBundleData, err := correlation.Correlate(r, dumpID, root, getChildren)
if err != nil {
return nil, nil, errors.Wrap(err, "correlation.Correlate")
}
if err := write(ctx, newFilename, groupedBundleData); err != nil {
if err := write(ctx, tempDir, groupedBundleData); err != nil {
return nil, nil, err
}
@ -198,8 +188,8 @@ func convert(
}
// write commits the correlated data to disk.
func write(ctx context.Context, filename string, groupedBundleData *correlation.GroupedBundleData) (err error) {
writer, err := sqlitewriter.NewWriter(ctx, filename)
func write(ctx context.Context, tempDir string, groupedBundleData *correlation.GroupedBundleData) (err error) {
writer, err := sqlitewriter.NewWriter(ctx, filepath.Join(tempDir, "sqlite.db"))
if err != nil {
return err
}
@ -237,5 +227,6 @@ func write(ctx context.Context, filename string, groupedBundleData *correlation.
err = multierror.Append(err, writeErr)
}
}
return err
}

3
go.mod
View File

@ -117,7 +117,7 @@ require (
github.com/sirupsen/logrus v1.5.0 // indirect
github.com/sloonz/go-qprintable v0.0.0-20160203160305-775b3a4592d5 // indirect
github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d // indirect
github.com/sourcegraph/codeintelutils v0.0.0-20200527005717-5df0d7f50241
github.com/sourcegraph/codeintelutils v0.0.0-20200528142143-a2f46204e2e9
github.com/sourcegraph/ctxvfs v0.0.0-20180418081416-2b65f1b1ea81
github.com/sourcegraph/go-diff v0.5.3
github.com/sourcegraph/go-jsonschema v0.0.0-20191222043427-cdbee60427af
@ -126,6 +126,7 @@ require (
github.com/sourcegraph/gosyntect v0.0.0-20200331033347-c35e64c39373
github.com/sourcegraph/jsonx v0.0.0-20190114210550-ba8cb36a8614
github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/sqs/httpgzip v0.0.0-20180622165210-91da61ed4dff
github.com/src-d/enry/v2 v2.1.0
github.com/stripe/stripe-go v70.11.0+incompatible

10
go.sum
View File

@ -876,16 +876,12 @@ github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9
github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM=
github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d h1:yKm7XZV6j9Ev6lojP2XaIshpT4ymkqhMeSghO5Ps00E=
github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE=
github.com/sourcegraph/codeintelutils v0.0.0-20200526222402-589c34503031 h1:q0Hg6HiBtM1d7ddCBg0Fl52bGyjqTDlJOnEXqPDQF3w=
github.com/sourcegraph/codeintelutils v0.0.0-20200526222402-589c34503031/go.mod h1:3Y0EB0hYuJho8mmu2VONjt/wdSdcALuVUn/TfwFL8gM=
github.com/sourcegraph/codeintelutils v0.0.0-20200527005717-5df0d7f50241 h1:sqN0aI5oQIBUrPAEQy9wQf0tD8S4rSbz/9VoN1IWz/o=
github.com/sourcegraph/codeintelutils v0.0.0-20200527005717-5df0d7f50241/go.mod h1:HplI8gRslTrTUUsSYwu28hSOderix7m5dHNca7xBzeo=
github.com/sourcegraph/codeintelutils v0.0.0-20200528142143-a2f46204e2e9 h1:AUUSD/me2hphB7WyWGuljtUEdFBpgWjDII3wd9CmeX8=
github.com/sourcegraph/codeintelutils v0.0.0-20200528142143-a2f46204e2e9/go.mod h1:HplI8gRslTrTUUsSYwu28hSOderix7m5dHNca7xBzeo=
github.com/sourcegraph/ctxvfs v0.0.0-20180418081416-2b65f1b1ea81 h1:v4/JVxZSPWifxmICRqgXK7khThjw03RfdGhyeA2S4EQ=
github.com/sourcegraph/ctxvfs v0.0.0-20180418081416-2b65f1b1ea81/go.mod h1:xIvvI5FiHLxhv8prbzVpaMHaaGPFPFQSuTcxC91ryOo=
github.com/sourcegraph/go-diff v0.5.1 h1:gO6i5zugwzo1RVTvgvfwCOSVegNuvnNi6bAD1QCmkHs=
github.com/sourcegraph/go-diff v0.5.1/go.mod h1:j2dHj3m8aZgQO8lMTcTnBcXkRRRqi34cd2MNlA9u1mE=
github.com/sourcegraph/go-diff v0.5.2 h1:aREwkyV8nKvCkMW0129XBB4+ZmE/zyLkdZU569ylqmQ=
github.com/sourcegraph/go-diff v0.5.2/go.mod h1:v9JDtjCE4HHHCZGId75rg8gkKKa98RVjBcBGsVmMmak=
github.com/sourcegraph/go-diff v0.5.3 h1:lhIKJ2nXLZZ+AfbHpYxTn0pXpNTTui0DX7DO3xeb1Zs=
github.com/sourcegraph/go-diff v0.5.3/go.mod h1:v9JDtjCE4HHHCZGId75rg8gkKKa98RVjBcBGsVmMmak=
github.com/sourcegraph/go-jsonschema v0.0.0-20191222043427-cdbee60427af h1:VmLIjAWL5tLYoqW7l1Q/E3CLuqdLyBuSyQJYOOS01JA=
@ -910,6 +906,8 @@ github.com/sourcegraph/zoekt v0.0.0-20200511113954-b56036a3b745 h1:o0pdeZagP1z3/
github.com/sourcegraph/zoekt v0.0.0-20200511113954-b56036a3b745/go.mod h1:WleTVLMEfvGF6uZ/mSWXVUH1H4NPxAcu6YbJ0TORdWc=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v0.0.0-20170901052352-ee1bd8ee15a1/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
github.com/spf13/afero v1.2.2 h1:5jhuqJyZCZf2JRofRvN/nIFgIWNzPa3/Vz8mYylgbWc=

View File

@ -22,6 +22,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/sourcegraph/codeintelutils"
"github.com/sourcegraph/sourcegraph/internal/metrics"
"github.com/sourcegraph/sourcegraph/internal/tar"
"github.com/sourcegraph/sourcegraph/internal/trace/ot"
"golang.org/x/net/context/ctxhttp"
)
@ -57,9 +58,8 @@ type BundleManagerClient interface {
// from the bundle manager.
GetUpload(ctx context.Context, bundleID int) (io.ReadCloser, error)
// SendDB transfers a converted database to the bundle manager to be stored on disk. This
// will also remove the original upload file with the same identifier from disk.
SendDB(ctx context.Context, bundleID int, filename string) error
// SendDB transfers a converted database archive to the bundle manager to be stored on disk.
SendDB(ctx context.Context, bundleID int, path string) error
// Exists determines if a file exists on disk for all the supplied identifiers.
Exists(ctx context.Context, bundleIDs []int) (map[int]bool, error)
@ -253,9 +253,9 @@ func (c *bundleManagerClientImpl) getUploadChunk(ctx context.Context, w io.Write
return c.ioCopy(w, body)
}
// SendDB transfers a converted database to the bundle manager to be stored on disk.
func (c *bundleManagerClientImpl) SendDB(ctx context.Context, bundleID int, filename string) (err error) {
files, cleanup, err := codeintelutils.SplitFile(filename, c.maxPayloadSizeBytes)
// SendDB transfers a converted database archive to the bundle manager to be stored on disk.
func (c *bundleManagerClientImpl) SendDB(ctx context.Context, bundleID int, path string) (err error) {
files, cleanup, err := codeintelutils.SplitReader(tar.Archive(path), c.maxPayloadSizeBytes)
if err != nil {
return err
}

View File

@ -18,6 +18,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/inconshreveable/log15"
"github.com/sourcegraph/sourcegraph/internal/tar"
)
func TestMain(m *testing.M) {
@ -305,8 +306,13 @@ func TestGetUploadBadResponse(t *testing.T) {
}
func TestSendDB(t *testing.T) {
var paths []string
tempDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp dir: %s", err)
}
defer os.RemoveAll(tempDir)
var paths []string
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
paths = append(paths, r.URL.Path)
if r.URL.Path == "/dbs/42/stitch" {
@ -317,47 +323,45 @@ func TestSendDB(t *testing.T) {
t.Errorf("unexpected path. want=%s have=%s", "/dbs/42/0", r.URL.Path)
}
rawContent, err := ioutil.ReadAll(r.Body)
if err != nil {
t.Fatalf("unexpected error reading payload: %s", err)
}
gzipReader, err := gzip.NewReader(bytes.NewReader(rawContent))
gzipReader, err := gzip.NewReader(r.Body)
if err != nil {
t.Fatalf("unexpected error decompressing payload: %s", err)
}
defer gzipReader.Close()
content, err := ioutil.ReadAll(gzipReader)
if err != nil {
t.Fatalf("unexpected error reading decompressed payload: %s", err)
}
if diff := cmp.Diff([]byte("payload\n"), content); diff != "" {
t.Errorf("unexpected contents (-want +got):\n%s", diff)
if err := tar.Extract(filepath.Join(tempDir, "dest"), gzipReader); err != nil {
t.Fatalf("unexpected error extracting payload: %s", err)
}
}))
defer ts.Close()
tempDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp directory: %s", err)
}
defer os.RemoveAll(tempDir)
filename := filepath.Join(tempDir, "test.db")
filename := filepath.Join(tempDir, "test")
if err := ioutil.WriteFile(filename, []byte("payload\n"), os.ModePerm); err != nil {
t.Fatalf("unexpected error writing file: %s", err)
t.Fatalf("unexpected error writing temp file: %s", err)
}
client := &bundleManagerClientImpl{bundleManagerURL: ts.URL, maxPayloadSizeBytes: 1000}
if err := client.SendDB(context.Background(), 42, filename); err != nil {
client := &bundleManagerClientImpl{bundleManagerURL: ts.URL, maxPayloadSizeBytes: 10000}
if err := client.SendDB(context.Background(), 42, tempDir); err != nil {
t.Fatalf("unexpected error sending db: %s", err)
}
contents, err := ioutil.ReadFile(filepath.Join(tempDir, "dest", "test"))
if err != nil {
t.Fatalf("unexpected error reading file: %s", err)
}
if diff := cmp.Diff([]byte("payload\n"), contents); diff != "" {
t.Errorf("unexpected contents (-want +got):\n%s", diff)
}
}
func TestSendDBMultipart(t *testing.T) {
tempDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp dir: %s", err)
}
defer os.RemoveAll(tempDir)
const maxPayloadSizeBytes = 1000
var fullContents []byte
@ -367,7 +371,6 @@ func TestSendDBMultipart(t *testing.T) {
var paths []string
var sentContent []byte
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
paths = append(paths, r.URL.Path)
if r.URL.Path == "/dbs/42/stitch" {
@ -398,37 +401,33 @@ func TestSendDBMultipart(t *testing.T) {
}))
defer ts.Close()
tempDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp directory: %s", err)
}
defer os.RemoveAll(tempDir)
filename := filepath.Join(tempDir, "test.db")
filename := filepath.Join(tempDir, "test")
if err := ioutil.WriteFile(filename, fullContents, os.ModePerm); err != nil {
t.Fatalf("unexpected error writing file: %s", err)
t.Fatalf("unexpected error writing temp file: %s", err)
}
client := &bundleManagerClientImpl{bundleManagerURL: ts.URL, maxPayloadSizeBytes: maxPayloadSizeBytes}
if err := client.SendDB(context.Background(), 42, filename); err != nil {
if err := client.SendDB(context.Background(), 42, tempDir); err != nil {
t.Fatalf("unexpected error sending db: %s", err)
}
expectedPaths := []string{
"/dbs/42/0",
"/dbs/42/1",
"/dbs/42/2",
"/dbs/42/3",
"/dbs/42/4",
"/dbs/42/5",
"/dbs/42/stitch",
if len(paths) < 5 {
t.Errorf("unexpected number of requests. want>=%d have=%d", 5, len(paths))
}
if diff := cmp.Diff(expectedPaths, paths); diff != "" {
t.Errorf("unexpected paths (-want +got):\n%s", diff)
if paths[len(paths)-1] != "/dbs/42/stitch" {
t.Errorf("unexpected final request path. want=%s have=%s", "/dbs/42/stitch", paths[len(paths)-1])
}
if diff := cmp.Diff(fullContents, sentContent); diff != "" {
if err := tar.Extract(filepath.Join(tempDir, "dest"), bytes.NewReader(sentContent)); err != nil {
t.Fatalf("unexpected error extracting payload: %s", err)
}
contents, err := ioutil.ReadFile(filepath.Join(tempDir, "dest", "test"))
if err != nil {
t.Fatalf("unexpected error reading file: %s", err)
}
if diff := cmp.Diff(fullContents, contents); diff != "" {
t.Errorf("unexpected contents (-want +got):\n%s", diff)
}
}
@ -441,17 +440,12 @@ func TestSendDBBadResponse(t *testing.T) {
tempDir, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp directory: %s", err)
t.Fatalf("unexpected error creating temp dir: %s", err)
}
defer os.RemoveAll(tempDir)
filename := filepath.Join(tempDir, "test.db")
if err := ioutil.WriteFile(filename, []byte("payload\n"), os.ModePerm); err != nil {
t.Fatalf("unexpected error writing file: %s", err)
}
client := &bundleManagerClientImpl{bundleManagerURL: ts.URL, maxPayloadSizeBytes: 1000}
if err := client.SendDB(context.Background(), 42, filename); err == nil {
if err := client.SendDB(context.Background(), 42, tempDir); err == nil {
t.Fatalf("unexpected nil error sending db")
}
}

67
internal/tar/archive.go Normal file
View File

@ -0,0 +1,67 @@
package tar
import (
"archive/tar"
"io"
"os"
"path/filepath"
"strings"
"github.com/hashicorp/go-multierror"
)
// Archive walks the files rooted at the given path and streams them to a tar archive
// contained in the resulting reader. Any errors that occur while reading files on disk
// are exposed through Read calls on the the resulting reader.
func Archive(root string) io.Reader {
pr, pw := io.Pipe()
go func() {
defer pw.Close()
tw := tar.NewWriter(pw)
defer tw.Close()
err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if !info.Mode().IsRegular() {
return nil
}
header, err := tar.FileInfoHeader(info, info.Name())
if err != nil {
return err
}
header.Name = strings.TrimPrefix(strings.TrimPrefix(path, root), string(filepath.Separator))
if err := tw.WriteHeader(header); err != nil {
return err
}
return archiveFile(tw, path)
})
if err != nil {
_ = pw.CloseWithError(err)
}
}()
return pr
}
func archiveFile(w io.Writer, filename string) (err error) {
f, err := os.Open(filename)
if err != nil {
return err
}
defer func() {
if closeErr := f.Close(); closeErr != nil {
err = multierror.Append(err, closeErr)
}
}()
_, err = io.Copy(w, f)
return err
}

View File

@ -1,15 +1,17 @@
package indexer
package tar
import (
"archive/tar"
"io"
"os"
"path/filepath"
"github.com/hashicorp/go-multierror"
)
func extractTarfile(root string, r io.Reader) error {
// Extract reads tar archive data from r and extracts it into files under the given root.
func Extract(root string, r io.Reader) error {
tr := tar.NewReader(r)
for {
header, err := tr.Next()
if err != nil {
@ -55,7 +57,7 @@ func extractDir(root string, tr *tar.Reader, header *tar.Header) error {
return nil
}
func extractFile(root string, tr *tar.Reader, header *tar.Header) error {
func extractFile(root string, tr *tar.Reader, header *tar.Header) (err error) {
target := filepath.Join(root, header.Name)
// It's possible for a file to exist in a directory for which there is
@ -68,11 +70,12 @@ func extractFile(root string, tr *tar.Reader, header *tar.Header) error {
if err != nil {
return err
}
defer f.Close()
defer func() {
if closeErr := f.Close(); closeErr != nil {
err = multierror.Append(err, closeErr)
}
}()
if _, err := io.Copy(f, tr); err != nil {
return err
}
return nil
_, err = io.Copy(f, tr)
return err
}

99
internal/tar/tar_test.go Normal file
View File

@ -0,0 +1,99 @@
package tar
import (
"io/ioutil"
"os"
"path/filepath"
"testing"
)
func TestArchiveAndExtract(t *testing.T) {
tempDirSource, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp dir: %s", err)
}
defer os.RemoveAll(tempDirSource)
tempDirDestination, err := ioutil.TempDir("", "")
if err != nil {
t.Fatalf("unexpected error creating temp dir: %s", err)
}
defer os.RemoveAll(tempDirDestination)
fileContents := map[string]string{
"0": "Aenean maximus dolor id mi condimentum fringilla.",
"1": "Aliquam interdum feugiat auctor.",
"2": "Aliquam molestie pulvinar tellus, eget auctor sapien mattis non.",
"3": "Aliquam venenatis tortor eros, id sodales turpis blandit id.",
"4": "Cras tempus quam odio, sit amet tincidunt tortor pellentesque sit amet.",
"5": "Donec commodo, dui quis fringilla mollis, est elit venenatis sapien, eget laoreet quam ante eu odio.",
"6": "Donec malesuada accumsan gravida.",
"7": "Donec tincidunt lectus metus, at lobortis nisi maximus in.",
"8": "Donec tristique enim non turpis dignissim placerat.",
"9": "Duis bibendum eu eros eu faucibus.",
"foo/0": "Etiam a dignissim urna, quis porttitor nulla.",
"foo/1": "Etiam in finibus ligula, ut dictum tellus.",
"foo/2": "Fusce semper metus vel quam tempus, quis sollicitudin turpis condimentum.",
"foo/3": "In a vestibulum augue.",
"foo/4": "In convallis dui ut urna auctor maximus.",
"foo/5": "In est neque, pulvinar eget velit quis, tristique facilisis nibh.",
"foo/6": "In pharetra eros vitae tempus faucibus.",
"foo/7": "In vel turpis lectus.",
"foo/8": "Integer condimentum vel metus ac accumsan.",
"foo/9": "Integer et mauris faucibus, tempor purus a, bibendum magna.",
"bar/0": "Integer varius ultrices rhoncus.",
"bar/1": "Integer vel egestas felis, ac porta augue.",
"bar/2": "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
"bar/3": "Maecenas porta, enim sit amet blandit luctus, mi justo vestibulum quam, eu rhoncus lacus dui eget felis.",
"bar/4": "Maecenas sed neque tristique, volutpat mauris eu, fermentum leo.",
"bar/5": "Mauris eu libero augue.",
"bar/6": "Mauris hendrerit, ante fermentum facilisis congue, quam odio blandit sem, placerat ultricies mauris nibh eget felis.",
"bar/7": "Morbi blandit, felis vitae gravida imperdiet, lacus ipsum varius quam, quis ullamcorper felis nisi nec elit.",
"bar/8": "Morbi commodo at urna non laoreet.",
"bar/9": "Morbi dapibus malesuada dolor in consequat.",
"foo/bar/0": "Morbi semper semper ex quis semper.",
"foo/bar/1": "Morbi tincidunt tellus turpis, eget finibus magna congue in.",
"foo/bar/2": "Morbi ut nisi nec purus sollicitudin feugiat.",
"foo/bar/3": "Morbi vehicula sodales ante, eu sodales purus dapibus ac.",
"foo/bar/4": "Nulla feugiat elementum ligula a imperdiet.",
"foo/bar/5": "Nulla sed augue augue.",
"foo/bar/6": "Nulla tempor sapien eu posuere dignissim.",
"foo/bar/7": "Nullam blandit nisl non enim accumsan, ac vulputate elit fringilla.",
"foo/bar/8": "Nullam cursus eros a ipsum laoreet commodo.",
"foo/bar/9": "Nunc imperdiet lacus quis cursus sodales.",
"baz/foo/0": "Pellentesque non magna luctus, sodales erat non, sollicitudin sapien.",
"baz/foo/1": "Proin facilisis nisi est, id ornare enim congue ut.",
"baz/foo/2": "Proin laoreet, tellus sed rhoncus ultricies, nisi odio egestas est, ac porttitor odio augue sit amet ante.",
"baz/foo/3": "Quisque a metus libero.",
"baz/foo/4": "Sed in lectus et quam malesuada dapibus eu id diam.",
"baz/foo/5": "Sed leo est, pretium quis dignissim ac, placerat ac justo.",
"baz/foo/6": "Sed ut nunc in purus pharetra consequat.",
"baz/foo/7": "Sed vitae lacus felis.",
"baz/foo/8": "Suspendisse a urna turpis.",
"baz/foo/9": "Suspendisse non orci vel ex bibendum feugiat a a nibh.",
}
for filename, contents := range fileContents {
if err := os.MkdirAll(filepath.Dir(filepath.Join(tempDirSource, filename)), os.ModePerm); err != nil {
t.Fatalf("unexpected error creating driectory: %s", err)
}
if err := ioutil.WriteFile(filepath.Join(tempDirSource, filename), []byte(contents), os.ModePerm); err != nil {
t.Fatalf("unexpected error writing file: %s", err)
}
}
if err := Extract(tempDirDestination, Archive(tempDirSource)); err != nil {
t.Fatalf("unexpected error archiving and extracting: %s", err)
}
for filename, expected := range fileContents {
actual, err := ioutil.ReadFile(filepath.Join(tempDirDestination, filename))
if err != nil {
t.Fatalf("Unexpected error reading file: %s", err)
}
if string(actual) != expected {
t.Errorf("unexpected content for file %s. want=%q have=%q", filename, expected, actual)
}
}
}