[gitserver] ArchiveReader correctly handles paths with spaces (#61522)

This commit is contained in:
Petri-Johan Last 2024-04-02 15:39:55 +02:00 committed by GitHub
parent a9608c7bf2
commit fafdf1873e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 75 additions and 19 deletions

View File

@ -25,6 +25,7 @@ go_library(
"//cmd/gitserver/internal/git",
"//internal/actor",
"//internal/api",
"//internal/byteutils",
"//internal/collections",
"//internal/conf",
"//internal/gitserver/gitdomain",

View File

@ -1,12 +1,14 @@
package gitcli
import (
"bufio"
"bytes"
"context"
"io"
"os"
"github.com/sourcegraph/sourcegraph/cmd/gitserver/internal/git"
"github.com/sourcegraph/sourcegraph/internal/byteutils"
"github.com/sourcegraph/sourcegraph/internal/collections"
"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"
"github.com/sourcegraph/sourcegraph/lib/errors"
@ -44,7 +46,7 @@ func buildArchiveArgs(format git.ArchiveFormat, treeish string, paths []string)
func pathspecLiteral(s string) string { return ":(literal)" + s }
func (g *gitCLIBackend) verifyPaths(ctx context.Context, treeish string, paths []string) error {
args := []string{"ls-tree", treeish, "--"}
args := []string{"ls-tree", "-z", "--name-only", treeish, "--"}
args = append(args, paths...)
r, err := g.NewCommand(ctx, WithArguments(args...))
if err != nil {
@ -52,7 +54,13 @@ func (g *gitCLIBackend) verifyPaths(ctx context.Context, treeish string, paths [
}
defer r.Close()
stdout, err := io.ReadAll(r)
scanner := bufio.NewScanner(r)
scanner.Split(byteutils.ScanNullLines)
fileSet := make(collections.Set[string], len(paths))
for scanner.Scan() {
fileSet.Add(scanner.Text())
}
err = scanner.Err()
if err != nil {
// If exit code is 128 and `not a tree object` is part of stderr, most likely we
// are referencing a commit that does not exist.
@ -65,27 +73,17 @@ func (g *gitCLIBackend) verifyPaths(ctx context.Context, treeish string, paths [
return err
}
// Check if the resulting objects match the requested
// paths. If not, one or more of the requested
// file paths don't exist.
if len(paths) == 0 {
return nil
}
// Check if the resulting objects match the requested
// paths. If not, one or more of the requested
// file paths don't exist.
gotPaths := bytes.Split(bytes.TrimSpace(stdout), []byte("\n"))
fileSet := collections.NewSet[string]()
for _, p := range gotPaths {
if len(p) == 0 {
continue
}
pathSegments := bytes.Fields(p)
fileSet.Add(string(pathSegments[len(pathSegments)-1]))
}
pathsSet := make(collections.Set[string], len(paths))
pathsSet.Add(paths...)
pathsSet := collections.NewSet[string]()
for _, path := range paths {
pathsSet.Add(path)
}
diff := pathsSet.Difference(fileSet)
if len(diff) != 0 {

View File

@ -70,7 +70,12 @@ func TestGitCLIBackend_ArchiveReader(t *testing.T) {
"echo abcd > file1",
"mkdir dir1",
"echo efgh > dir1/file2",
`echo ijkl > " file3"`,
`echo mnop > "dir1/file with spaces"`,
"echo qrst > 我的工作",
"git add 我的工作",
"git add file1",
`git add " file3"`,
"git add dir1",
"git commit -m commit --author='Foo Author <foo@sourcegraph.com>'",
)
@ -123,6 +128,31 @@ func TestGitCLIBackend_ArchiveReader(t *testing.T) {
require.Equal(t, "efgh\n", contents)
})
t.Run("read file with space in name", func(t *testing.T) {
r, err := backend.ArchiveReader(ctx, "tar", string(commitID), []string{" file3", "dir1/file with spaces"})
require.NoError(t, err)
t.Cleanup(func() { r.Close() })
tr := tar.NewReader(r)
contents := readFileContentsFromTar(t, tr, " file3")
require.Equal(t, "ijkl\n", contents)
r, err = backend.ArchiveReader(ctx, "tar", string(commitID), []string{" file3", "dir1/file with spaces"})
require.NoError(t, err)
t.Cleanup(func() { r.Close() })
tr = tar.NewReader(r)
contents = readFileContentsFromTar(t, tr, "dir1/file with spaces")
require.Equal(t, "mnop\n", contents)
})
t.Run("read non-ascii filename", func(t *testing.T) {
r, err := backend.ArchiveReader(ctx, "tar", string(commitID), []string{" file3", "我的工作"})
require.NoError(t, err)
t.Cleanup(func() { r.Close() })
tr := tar.NewReader(r)
contents := readFileContentsFromTar(t, tr, "我的工作")
require.Equal(t, "qrst\n", contents)
})
t.Run("non existent commit", func(t *testing.T) {
_, err := backend.ArchiveReader(ctx, "tar", "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef", nil)
require.Error(t, err)

View File

@ -3,7 +3,10 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "byteutils",
srcs = ["linereader.go"],
srcs = [
"linereader.go",
"nullscanner.go",
],
importpath = "github.com/sourcegraph/sourcegraph/internal/byteutils",
visibility = ["//:__subpackages__"],
)

View File

@ -0,0 +1,24 @@
package byteutils
import "bytes"
// ScanNullLines is a split function for a [Scanner] that returns each null-terminated
// line of text, stripped of any trailing end-of-line marker. The returned line may
// be empty. The end-of-line marker is a null character.
// The last non-empty line of input will be returned even if it has no
// null character.
func ScanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\x00'); i >= 0 {
// We have a full null-terminated line.
return i + 1, data[0:i], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}