lib/process: always pipe _exactly_ what the command outputs (#36574)

Right now, we use the default scanner split function, which is `bufio.SplitLines`. This removes the trailing newline and any preceding `\r` before it, then returns the remainder of the token. `PipeOutput` then puts the newline back by invoking `fmt.Fprintln`. This has the positive effect that we essentially normalise `\r\n` and `\n` into `\n`, but the negative effect that every line of output becomes newline-terminated, even if it's a final line that doesn't include a newline. This matters because this output is then used for output variables when executing batch specs, which means that something like `echo -n hi` will end up with an extraneous newline, even though `echo -n` has only written two bytes to stdout. Instead, let's set the split function to one that splits by line, but _retains_ the newline, and then we can write it to the piped writer(s) with `fmt.Fprint` directly. The one tradeoff here is that I haven't reimplemented the `\r` swallowing that `bufio.SplitLines` does. In practice, I don't believe we ever actually used this behaviour: Docker _may_ return extraneous `\r` bytes when running with TTY mode enabled, but we never enable that when executing batch specs, and if the user's container actually outputs `\r` we should actually retain it, rather than trying to filter their output. Fixes #36562.
2026-02-06 12:51:55 +00:00 · 2022-06-03 15:13:27 -07:00 · 2022-06-03 15:13:27 -07:00 · 79226578ec
commit 79226578ec
parent 28e535a370
2 changed files with 34 additions and 1 deletions
--- a/lib/process/pipe.go
+++ b/lib/process/pipe.go
@ -2,6 +2,7 @@ package process

 import (
 	"bufio"
+	"bytes"
 	"context"
 	"fmt"
 	"io"
@ -35,12 +36,13 @@ type cmdPiper interface {
 func PipeOutput(ctx context.Context, c cmdPiper, stdoutWriter, stderrWriter io.Writer) (*errgroup.Group, error) {
 	pipe := func(w io.Writer, r io.Reader) error {
 		scanner := bufio.NewScanner(r)
+		scanner.Split(scanLinesWithNewline)

 		buf := make([]byte, initialBufSize)
 		scanner.Buffer(buf, maxTokenSize)

 		for scanner.Scan() {
-			fmt.Fprintln(w, scanner.Text())
+			fmt.Fprint(w, scanner.Text())
 		}

 		return scanner.Err()
@ -92,3 +94,24 @@ func pipeProcessOutput(ctx context.Context, c cmdPiper, stdoutWriter, stderrWrit

 	return eg, nil
 }
+
+// scanLinesWithNewline is a modified version of bufio.ScanLines that retains
+// the trailing newline byte(s) in the returned token.
+func scanLinesWithNewline(data []byte, atEOF bool) (advance int, token []byte, err error) {
+	if atEOF && len(data) == 0 {
+		return 0, nil, nil
+	}
+
+	if i := bytes.IndexByte(data, '\n'); i >= 0 {
+		// We have a full newline-terminated line.
+		return i + 1, data[0 : i+1], nil
+	}
+
+	// If we're at EOF, we have a final, non-terminated line. Return it.
+	if atEOF {
+		return len(data), data, nil
+	}
+
+	// Request more data.
+	return 0, nil, nil
+}
--- a/lib/process/pipe_test.go
+++ b/lib/process/pipe_test.go
@ -58,11 +58,21 @@ func TestPipeOutput(t *testing.T) {
 	waitForWrite(t, out)
 	wantBytesWritten(t, out, 8)

+	// Finally, we'll write a line that isn't terminated by a newline, then EOF
+	write(t, d.stdout, "e")
+	// stdout should *not* be written yet
+	wantBytesWritten(t, out, 8)
+
 	d.stdout.Close()
 	d.stderr.Close()
 	if err := eg.Wait(); err != nil {
 		t.Fatalf("errgroup has err: %s", err)
 	}
+
+	// stdout should now be written with the one extra byte we wrote without a
+	// newline
+	waitForWrite(t, out)
+	wantBytesWritten(t, out, 9)
 }

 func TestPipeOutputUnbuffered(t *testing.T) {