From fafdf1873eb64059d18f24c0e934abae23132167 Mon Sep 17 00:00:00 2001 From: Petri-Johan Last Date: Tue, 2 Apr 2024 15:39:55 +0200 Subject: [PATCH] [gitserver] ArchiveReader correctly handles paths with spaces (#61522) --- cmd/gitserver/internal/git/gitcli/BUILD.bazel | 1 + .../internal/git/gitcli/archivereader.go | 34 +++++++++---------- .../internal/git/gitcli/archivereader_test.go | 30 ++++++++++++++++ internal/byteutils/BUILD.bazel | 5 ++- internal/byteutils/nullscanner.go | 24 +++++++++++++ 5 files changed, 75 insertions(+), 19 deletions(-) create mode 100644 internal/byteutils/nullscanner.go diff --git a/cmd/gitserver/internal/git/gitcli/BUILD.bazel b/cmd/gitserver/internal/git/gitcli/BUILD.bazel index e3269c75195..f09deb7d63b 100644 --- a/cmd/gitserver/internal/git/gitcli/BUILD.bazel +++ b/cmd/gitserver/internal/git/gitcli/BUILD.bazel @@ -25,6 +25,7 @@ go_library( "//cmd/gitserver/internal/git", "//internal/actor", "//internal/api", + "//internal/byteutils", "//internal/collections", "//internal/conf", "//internal/gitserver/gitdomain", diff --git a/cmd/gitserver/internal/git/gitcli/archivereader.go b/cmd/gitserver/internal/git/gitcli/archivereader.go index fe03337263d..c419ad11ff7 100644 --- a/cmd/gitserver/internal/git/gitcli/archivereader.go +++ b/cmd/gitserver/internal/git/gitcli/archivereader.go @@ -1,12 +1,14 @@ package gitcli import ( + "bufio" "bytes" "context" "io" "os" "github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/git" + "github.com/sourcegraph/sourcegraph/internal/byteutils" "github.com/sourcegraph/sourcegraph/internal/collections" "github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain" "github.com/sourcegraph/sourcegraph/lib/errors" @@ -44,7 +46,7 @@ func buildArchiveArgs(format git.ArchiveFormat, treeish string, paths []string) func pathspecLiteral(s string) string { return ":(literal)" + s } func (g *gitCLIBackend) verifyPaths(ctx context.Context, treeish string, paths []string) error { - args := []string{"ls-tree", treeish, "--"} + args := []string{"ls-tree", "-z", "--name-only", treeish, "--"} args = append(args, paths...) r, err := g.NewCommand(ctx, WithArguments(args...)) if err != nil { @@ -52,7 +54,13 @@ func (g *gitCLIBackend) verifyPaths(ctx context.Context, treeish string, paths [ } defer r.Close() - stdout, err := io.ReadAll(r) + scanner := bufio.NewScanner(r) + scanner.Split(byteutils.ScanNullLines) + fileSet := make(collections.Set[string], len(paths)) + for scanner.Scan() { + fileSet.Add(scanner.Text()) + } + err = scanner.Err() if err != nil { // If exit code is 128 and `not a tree object` is part of stderr, most likely we // are referencing a commit that does not exist. @@ -65,27 +73,17 @@ func (g *gitCLIBackend) verifyPaths(ctx context.Context, treeish string, paths [ return err } + // Check if the resulting objects match the requested + // paths. If not, one or more of the requested + // file paths don't exist. + if len(paths) == 0 { return nil } - // Check if the resulting objects match the requested - // paths. If not, one or more of the requested - // file paths don't exist. - gotPaths := bytes.Split(bytes.TrimSpace(stdout), []byte("\n")) - fileSet := collections.NewSet[string]() - for _, p := range gotPaths { - if len(p) == 0 { - continue - } - pathSegments := bytes.Fields(p) - fileSet.Add(string(pathSegments[len(pathSegments)-1])) - } + pathsSet := make(collections.Set[string], len(paths)) + pathsSet.Add(paths...) - pathsSet := collections.NewSet[string]() - for _, path := range paths { - pathsSet.Add(path) - } diff := pathsSet.Difference(fileSet) if len(diff) != 0 { diff --git a/cmd/gitserver/internal/git/gitcli/archivereader_test.go b/cmd/gitserver/internal/git/gitcli/archivereader_test.go index 52189aa0b91..c055c311d80 100644 --- a/cmd/gitserver/internal/git/gitcli/archivereader_test.go +++ b/cmd/gitserver/internal/git/gitcli/archivereader_test.go @@ -70,7 +70,12 @@ func TestGitCLIBackend_ArchiveReader(t *testing.T) { "echo abcd > file1", "mkdir dir1", "echo efgh > dir1/file2", + `echo ijkl > " file3"`, + `echo mnop > "dir1/file with spaces"`, + "echo qrst > 我的工作", + "git add 我的工作", "git add file1", + `git add " file3"`, "git add dir1", "git commit -m commit --author='Foo Author '", ) @@ -123,6 +128,31 @@ func TestGitCLIBackend_ArchiveReader(t *testing.T) { require.Equal(t, "efgh\n", contents) }) + t.Run("read file with space in name", func(t *testing.T) { + r, err := backend.ArchiveReader(ctx, "tar", string(commitID), []string{" file3", "dir1/file with spaces"}) + require.NoError(t, err) + t.Cleanup(func() { r.Close() }) + tr := tar.NewReader(r) + contents := readFileContentsFromTar(t, tr, " file3") + require.Equal(t, "ijkl\n", contents) + + r, err = backend.ArchiveReader(ctx, "tar", string(commitID), []string{" file3", "dir1/file with spaces"}) + require.NoError(t, err) + t.Cleanup(func() { r.Close() }) + tr = tar.NewReader(r) + contents = readFileContentsFromTar(t, tr, "dir1/file with spaces") + require.Equal(t, "mnop\n", contents) + }) + + t.Run("read non-ascii filename", func(t *testing.T) { + r, err := backend.ArchiveReader(ctx, "tar", string(commitID), []string{" file3", "我的工作"}) + require.NoError(t, err) + t.Cleanup(func() { r.Close() }) + tr := tar.NewReader(r) + contents := readFileContentsFromTar(t, tr, "我的工作") + require.Equal(t, "qrst\n", contents) + }) + t.Run("non existent commit", func(t *testing.T) { _, err := backend.ArchiveReader(ctx, "tar", "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef", nil) require.Error(t, err) diff --git a/internal/byteutils/BUILD.bazel b/internal/byteutils/BUILD.bazel index 07cde6941d8..f0dc4f558d9 100644 --- a/internal/byteutils/BUILD.bazel +++ b/internal/byteutils/BUILD.bazel @@ -3,7 +3,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "byteutils", - srcs = ["linereader.go"], + srcs = [ + "linereader.go", + "nullscanner.go", + ], importpath = "github.com/sourcegraph/sourcegraph/internal/byteutils", visibility = ["//:__subpackages__"], ) diff --git a/internal/byteutils/nullscanner.go b/internal/byteutils/nullscanner.go new file mode 100644 index 00000000000..0289a6ac9b0 --- /dev/null +++ b/internal/byteutils/nullscanner.go @@ -0,0 +1,24 @@ +package byteutils + +import "bytes" + +// ScanNullLines is a split function for a [Scanner] that returns each null-terminated +// line of text, stripped of any trailing end-of-line marker. The returned line may +// be empty. The end-of-line marker is a null character. +// The last non-empty line of input will be returned even if it has no +// null character. +func ScanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\x00'); i >= 0 { + // We have a full null-terminated line. + return i + 1, data[0:i], nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), data, nil + } + // Request more data. + return 0, nil, nil +}