cargo: don't fail unpacking on duplicate files (#38557)

Turns out that crates.io sometimes has duplicate files in their `tar.gz`
archives.

    $ wget 'https://crates.io/api/v1/crates/proc/0.1.4/download' -O proc-0.1.4.tar.gz
    $ tar -ztvf  proc-0.1.4.tar.gz
    -rw-r--r--  0 1000   1000       18 Aug 14  2015 proc-0.1.4/.gitignore
    -rw-r--r--  0 1000   1000     1107 Aug 21  2015 proc-0.1.4/.travis.yml
    -rw-r--r--  0 1000   1000      304 Aug 29  2015 proc-0.1.4/Cargo.toml
    -rw-r--r--  0 1000   1000    10847 Aug 21  2015 proc-0.1.4/LICENSE-APACHE
    -rw-r--r--  0 1000   1000     1071 Aug 21  2015 proc-0.1.4/LICENSE-MIT
    -rw-r--r--  0 1000   1000     1149 Aug 29  2015 proc-0.1.4/README.md
    -rw-r--r--  0 1000   1000       22 Aug 29  2015 proc-0.1.4/src/lib.rs
    -rw-r--r--  0 1000   1000      304 Aug 29  2015 proc-0.1.4/Cargo.toml

There are two `Cargo.toml` in there.
This commit is contained in:
Thorsten Ball 2022-07-11 16:29:21 +02:00 committed by GitHub
parent 0a72dca385
commit 0dc2e5d88d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 54 additions and 8 deletions

View File

@ -91,7 +91,8 @@ func (s *rustDependencySource) Download(ctx context.Context, dir string, dep rep
func unpackRustPackage(pkg []byte, workDir string) error {
r := bytes.NewReader(pkg)
opts := unpack.Opts{
SkipInvalid: true,
SkipInvalid: true,
SkipDuplicates: true,
Filter: func(path string, file fs.FileInfo) bool {
size := file.Size()

View File

@ -34,6 +34,11 @@ type Opts struct {
// the whole unpack.
SkipInvalid bool
// SkipDuplicates makes unpacking skip any files that couldn't be extracted
// because of os.FileExist errors. In practice, this means the first file
// wins if the tar contains two or more entries with the same filename.
SkipDuplicates bool
// Filter filters out files that do not match the given predicate.
Filter func(path string, file fs.FileInfo) bool
}
@ -62,6 +67,9 @@ func Zip(r io.ReaderAt, size int64, dir string, opt Opts) error {
err = extractZipFile(f, dir)
if err != nil {
if opt.SkipDuplicates && errors.Is(err, os.ErrExist) {
continue
}
return err
}
}
@ -135,6 +143,9 @@ func Tar(r io.Reader, dir string, opt Opts) error {
err = extractTarFile(tr, header, dir)
if err != nil {
if opt.SkipDuplicates && errors.Is(err, os.ErrExist) {
continue
}
return err
}
}

View File

@ -11,6 +11,7 @@ import (
"os"
"path"
"path/filepath"
"strings"
"testing"
"time"
@ -46,11 +47,12 @@ func TestUnpack(t *testing.T) {
type testCase struct {
packer
name string
opts Opts
in []*fileInfo
out []*fileInfo
err string
name string
opts Opts
in []*fileInfo
out []*fileInfo
err string
errContains string
}
var testCases []testCase
@ -161,6 +163,30 @@ func TestUnpack(t *testing.T) {
{path: "dir/file4", contents: "x", mode: 0600, size: 1},
},
},
{
packer: p,
name: "duplicates",
in: []*fileInfo{
{path: "bar", contents: "bar", mode: 0655},
{path: "bar", contents: "bar", mode: 0655},
},
errContains: "/bar: file exists",
out: []*fileInfo{
{path: "bar", contents: "bar", mode: 0655, size: 3},
},
},
{
packer: p,
name: "skip-duplicates",
opts: Opts{SkipDuplicates: true},
in: []*fileInfo{
{path: "bar", contents: "bar", mode: 0655},
{path: "bar", contents: "bar", mode: 0655},
},
out: []*fileInfo{
{path: "bar", contents: "bar", mode: 0655, size: 3},
},
},
}...)
}
@ -174,7 +200,7 @@ func TestUnpack(t *testing.T) {
tc.opts,
)
assertError(t, err, tc.err)
assertError(t, err, tc.err, tc.errContains)
assertUnpack(t, dir, tc.out)
})
}
@ -249,7 +275,15 @@ func makeTar(t testing.TB, files ...*fileInfo) []byte {
return buf.Bytes()
}
func assertError(t testing.TB, have error, want string) {
func assertError(t testing.TB, have error, want string, wantContains string) {
if want == "" && wantContains != "" {
haveMessage := fmt.Sprint(have)
if !strings.Contains(haveMessage, wantContains) {
t.Fatalf("error should contain %q, but doesn't: %q", wantContains, haveMessage)
}
return
}
if want == "" {
want = "<nil>"
}