lib/process: always pipe _exactly_ what the command outputs (#36574)

Right now, we use the default scanner split function, which is
`bufio.SplitLines`. This removes the trailing newline and any preceding
`\r` before it, then returns the remainder of the token. `PipeOutput`
then puts the newline back by invoking `fmt.Fprintln`.

This has the positive effect that we essentially normalise `\r\n` and
`\n` into `\n`, but the negative effect that every line of output
becomes newline-terminated, even if it's a final line that doesn't
include a newline.

This matters because this output is then used for output variables when
executing batch specs, which means that something like `echo -n hi` will
end up with an extraneous newline, even though `echo -n` has only
written two bytes to stdout.

Instead, let's set the split function to one that splits by line, but
_retains_ the newline, and then we can write it to the piped writer(s)
with `fmt.Fprint` directly.

The one tradeoff here is that I haven't reimplemented the `\r`
swallowing that `bufio.SplitLines` does. In practice, I don't believe we
ever actually used this behaviour: Docker _may_ return extraneous `\r`
bytes when running with TTY mode enabled, but we never enable that when
executing batch specs, and if the user's container actually outputs `\r`
we should actually retain it, rather than trying to filter their output.

Fixes #36562.
This commit is contained in:
Adam Harvey 2022-06-03 15:13:27 -07:00 committed by GitHub
parent 28e535a370
commit 79226578ec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 1 deletions

View File

@ -2,6 +2,7 @@ package process
import (
"bufio"
"bytes"
"context"
"fmt"
"io"
@ -35,12 +36,13 @@ type cmdPiper interface {
func PipeOutput(ctx context.Context, c cmdPiper, stdoutWriter, stderrWriter io.Writer) (*errgroup.Group, error) {
pipe := func(w io.Writer, r io.Reader) error {
scanner := bufio.NewScanner(r)
scanner.Split(scanLinesWithNewline)
buf := make([]byte, initialBufSize)
scanner.Buffer(buf, maxTokenSize)
for scanner.Scan() {
fmt.Fprintln(w, scanner.Text())
fmt.Fprint(w, scanner.Text())
}
return scanner.Err()
@ -92,3 +94,24 @@ func pipeProcessOutput(ctx context.Context, c cmdPiper, stdoutWriter, stderrWrit
return eg, nil
}
// scanLinesWithNewline is a modified version of bufio.ScanLines that retains
// the trailing newline byte(s) in the returned token.
func scanLinesWithNewline(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\n'); i >= 0 {
// We have a full newline-terminated line.
return i + 1, data[0 : i+1], nil
}
// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}
// Request more data.
return 0, nil, nil
}

View File

@ -58,11 +58,21 @@ func TestPipeOutput(t *testing.T) {
waitForWrite(t, out)
wantBytesWritten(t, out, 8)
// Finally, we'll write a line that isn't terminated by a newline, then EOF
write(t, d.stdout, "e")
// stdout should *not* be written yet
wantBytesWritten(t, out, 8)
d.stdout.Close()
d.stderr.Close()
if err := eg.Wait(); err != nil {
t.Fatalf("errgroup has err: %s", err)
}
// stdout should now be written with the one extra byte we wrote without a
// newline
waitForWrite(t, out)
wantBytesWritten(t, out, 9)
}
func TestPipeOutputUnbuffered(t *testing.T) {