sourcegraph/internal/byteutils/linereader.go
Erik Seliger 211d5430c6
Add linereader helper (#49965)
This adds a new helper to read byte slices line by line without the
overhead of requiring 24 bytes per subslice it creates from
`bytes.Split`. This is also different to bufio.Scanner in that it
doesn't need to allocate a large buffer to support long line lengths.
I've added a bunch of benchmarks for this, it seems to perform generally
better, with fewer allocated memory. In a follow-up PR, I will make use
of this helper.

```
goos: darwin
goarch: arm64
pkg: github.com/sourcegraph/sourcegraph/internal/byteutils
BenchmarkNewLineReader-10               12786025                87.10 ns/op            0 B/op          0 allocs/op
BenchmarkBytesSplit-10                   9051704               125.6 ns/op           176 B/op          1 allocs/op
BenchmarkNewLineReaderLongLine-10         689241              1673 ns/op               0 B/op          0 allocs/op
BenchmarkBytesSplitLongLine-10            518218              2198 ns/op              24 B/op          1 allocs/op
BenchmarkNewLineReaderManyLines-10          8460            137936 ns/op               0 B/op          0 allocs/op
BenchmarkBytesSplitManyLines-10             7690            149148 ns/op          245760 B/op          1 allocs/op
PASS
```
2023-03-24 17:39:49 +01:00

70 lines
2.0 KiB
Go

package byteutils
import "bytes"
// NewLineReader creates a new lineReader instance that reads lines from data.
// It is more memory effective than bytes.Split, because it does not require 24 bytes
// for each subslice it generates, and instead returns one subslice at a time.
// Benchmarks prove it is faster _and_ more memory efficient than bytes.Split, see
// the test file for details.
// Note: This behaves slightly differently to bytes.Split!
// For an empty input, it does NOT read a single line, like bytes.Split would.
// Also, it does NOT return a final empty line if the input is terminated with
// a final newline.
//
// data is the byte slice to read lines from.
//
// A lineReader can be used to iterate over lines in a byte slice.
//
// For example:
//
// data := []byte("hello\nworld\n")
// reader := bytes.NewLineReader(data)
//
// for reader.Scan() {
// line := reader.Line()
// // Use line...
// }
func NewLineReader(data []byte) lineReader {
return lineReader{data: data}
}
// lineReader is a struct that can be used to iterate over lines in a byte slice.
type lineReader struct {
i int
data []byte
current []byte
}
// Scan advances the lineReader to the next line and returns true, or returns false if there are no more lines.
// The lineReader's current field will be updated to contain the next line.
// Scan must be called before calling Line.
func (r *lineReader) Scan() bool {
// If we are at the end of the data, stop
if r.i >= len(r.data) {
return false
}
// Mark the start of the line
start := r.i
// Find the next newline
i := bytes.IndexByte(r.data[start:], '\n')
if i >= 0 {
// Exclude the newline from the line
r.current = r.data[start : start+i]
// Advance past the newline
r.i += i + 1
return true
}
// Otherwise include the last byte
r.current = r.data[start:]
r.i = len(r.data)
return true
}
// Line returns the current line.
// The line is valid until the next call to Scan.
// Scan must be called before calling Line.
func (r *lineReader) Line() []byte {
return r.current
}