mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 19:21:50 +00:00
fix: code insights - language stats - prevent data race condition and drop lock file exclusion
Co-authored-by: Michael Bahr <michael.bahr@sourcegraph.com>
This commit is contained in:
parent
7d80a583c7
commit
53c2e6d955
@ -15,7 +15,6 @@ go_library(
|
||||
"//lib/errors",
|
||||
"@com_github_go_enry_go_enry_v2//:go-enry",
|
||||
"@com_github_go_enry_go_enry_v2//data",
|
||||
"@com_github_grafana_regexp//:regexp",
|
||||
"@com_github_sourcegraph_conc//iter",
|
||||
"@com_github_sourcegraph_log//:log",
|
||||
"@io_opentelemetry_go_otel//attribute",
|
||||
@ -29,7 +28,6 @@ go_test(
|
||||
"inventory_test.go",
|
||||
],
|
||||
embed = [":inventory"],
|
||||
race = "off",
|
||||
deps = [
|
||||
"//internal/fileutil",
|
||||
"//lib/errors",
|
||||
|
||||
@ -8,23 +8,15 @@ import (
|
||||
"sort"
|
||||
)
|
||||
|
||||
// fileReadBufferSize is the size of the buffer we'll use while reading file contents
|
||||
const fileReadBufferSize = 16 * 1024
|
||||
|
||||
// Entries computes the inventory of languages for the given entries. It traverses trees recursively
|
||||
// and caches results for each subtree. Results for listed files are cached.
|
||||
//
|
||||
// If a file is referenced more than once (e.g., because it is a descendent of a subtree and it is
|
||||
// passed directly), it will be double-counted in the result.
|
||||
func (c *Context) Entries(ctx context.Context, entries ...fs.FileInfo) (inv Inventory, err error) {
|
||||
buf := make([]byte, fileReadBufferSize)
|
||||
return c.entries(ctx, entries, buf)
|
||||
}
|
||||
|
||||
func (c *Context) entries(ctx context.Context, entries []fs.FileInfo, buf []byte) (Inventory, error) {
|
||||
func (c *Context) Entries(ctx context.Context, entries ...fs.FileInfo) (Inventory, error) {
|
||||
invs := make([]Inventory, len(entries))
|
||||
for i, entry := range entries {
|
||||
var f func(context.Context, fs.FileInfo, []byte) (Inventory, error)
|
||||
var f func(context.Context, fs.FileInfo) (Inventory, error)
|
||||
switch {
|
||||
case entry.Mode().IsRegular():
|
||||
f = c.file
|
||||
@ -36,7 +28,7 @@ func (c *Context) entries(ctx context.Context, entries []fs.FileInfo, buf []byte
|
||||
}
|
||||
|
||||
var err error
|
||||
invs[i], err = f(ctx, entry, buf)
|
||||
invs[i], err = f(ctx, entry)
|
||||
if err != nil {
|
||||
return Inventory{}, err
|
||||
}
|
||||
@ -45,7 +37,7 @@ func (c *Context) entries(ctx context.Context, entries []fs.FileInfo, buf []byte
|
||||
return Sum(invs), nil
|
||||
}
|
||||
|
||||
func (c *Context) tree(ctx context.Context, tree fs.FileInfo, buf []byte) (inv Inventory, err error) {
|
||||
func (c *Context) tree(ctx context.Context, tree fs.FileInfo) (inv Inventory, err error) {
|
||||
// Get and set from the cache.
|
||||
if c.CacheGet != nil {
|
||||
if inv, ok := c.CacheGet(ctx, tree); ok {
|
||||
@ -72,10 +64,10 @@ func (c *Context) tree(ctx context.Context, tree fs.FileInfo, buf []byte) (inv I
|
||||
// Don't individually cache files that we found during tree traversal. The hit rate for
|
||||
// those cache entries is likely to be much lower than cache entries for files whose
|
||||
// inventory was directly requested.
|
||||
lang, err := getLang(ctx, e, buf, c.NewFileReader)
|
||||
lang, err := getLang(ctx, e, c.NewFileReader)
|
||||
return Inventory{Languages: []Lang{lang}}, err
|
||||
case e.Mode().IsDir(): // subtree
|
||||
subtreeInv, err := c.tree(ctx, e, buf)
|
||||
subtreeInv, err := c.tree(ctx, e)
|
||||
return subtreeInv, err
|
||||
default:
|
||||
// Skip symlinks, submodules, etc.
|
||||
@ -91,7 +83,7 @@ func (c *Context) tree(ctx context.Context, tree fs.FileInfo, buf []byte) (inv I
|
||||
}
|
||||
|
||||
// file computes the inventory of a single file. It caches the result.
|
||||
func (c *Context) file(ctx context.Context, file fs.FileInfo, buf []byte) (inv Inventory, err error) {
|
||||
func (c *Context) file(ctx context.Context, file fs.FileInfo) (inv Inventory, err error) {
|
||||
// Get and set from the cache.
|
||||
if c.CacheGet != nil {
|
||||
if inv, ok := c.CacheGet(ctx, file); ok {
|
||||
@ -106,7 +98,7 @@ func (c *Context) file(ctx context.Context, file fs.FileInfo, buf []byte) (inv I
|
||||
}()
|
||||
}
|
||||
|
||||
lang, err := getLang(ctx, file, buf, c.NewFileReader)
|
||||
lang, err := getLang(ctx, file, c.NewFileReader)
|
||||
if err != nil {
|
||||
return Inventory{}, errors.Wrapf(err, "inventory file %q", file.Name())
|
||||
}
|
||||
|
||||
@ -8,7 +8,6 @@ import (
|
||||
"fmt"
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-enry/go-enry/v2/data"
|
||||
"github.com/grafana/regexp"
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"io"
|
||||
@ -19,6 +18,9 @@ import (
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
// fileReadBufferSize is the size of the buffer we'll use while reading file contents
|
||||
const fileReadBufferSize = 4 * 1024
|
||||
|
||||
// Inventory summarizes a tree's contents (e.g., which programming
|
||||
// languages are used).
|
||||
type Inventory struct {
|
||||
@ -41,35 +43,13 @@ type Lang struct {
|
||||
|
||||
var newLine = []byte{'\n'}
|
||||
|
||||
func isExcluded(name string) (bool, error) {
|
||||
// Exclude lock files by default. We can later make the patterns configurable.
|
||||
excludedFileNamePatterns := []string{".*\\.lock"}
|
||||
for _, pattern := range excludedFileNamePatterns {
|
||||
matched, err := regexp.MatchString(pattern, name)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if matched {
|
||||
return matched, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func getLang(ctx context.Context, file fs.FileInfo, buf []byte, getFileReader func(ctx context.Context, path string) (io.ReadCloser, error)) (Lang, error) {
|
||||
func getLang(ctx context.Context, file fs.FileInfo, getFileReader func(ctx context.Context, path string) (io.ReadCloser, error)) (Lang, error) {
|
||||
if file == nil {
|
||||
return Lang{}, nil
|
||||
}
|
||||
if !file.Mode().IsRegular() || enry.IsVendor(file.Name()) {
|
||||
return Lang{}, nil
|
||||
}
|
||||
fileExcluded, err := isExcluded(file.Name())
|
||||
if err != nil {
|
||||
return Lang{}, errors.Wrap(err, "Failed to evaluate regex of excluded files.")
|
||||
}
|
||||
if fileExcluded {
|
||||
return Lang{}, nil
|
||||
}
|
||||
|
||||
trc, ctx := trace.New(ctx, "getLang")
|
||||
defer trc.End()
|
||||
@ -99,6 +79,8 @@ func getLang(ctx context.Context, file fs.FileInfo, buf []byte, getFileReader fu
|
||||
return lang, nil
|
||||
}
|
||||
|
||||
buf := make([]byte, fileReadBufferSize)
|
||||
|
||||
if !safe {
|
||||
// Detect language from content
|
||||
n, err := io.ReadFull(rc, buf)
|
||||
|
||||
@ -65,7 +65,6 @@ func TestGetLang_language(t *testing.T) {
|
||||
t.Run(label, func(t *testing.T) {
|
||||
lang, err := getLang(context.Background(),
|
||||
test.file,
|
||||
make([]byte, fileReadBufferSize),
|
||||
makeFileReader(test.file.Contents))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
@ -130,7 +129,7 @@ func TestGet_readFile(t *testing.T) {
|
||||
for _, test := range tests {
|
||||
t.Run(test.file.Name(), func(t *testing.T) {
|
||||
fr := makeFileReader(test.file.(fi).Contents)
|
||||
lang, err := getLang(context.Background(), test.file, make([]byte, fileReadBufferSize), fr)
|
||||
lang, err := getLang(context.Background(), test.file, fr)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@ -160,12 +159,11 @@ func BenchmarkGetLang(b *testing.B) {
|
||||
b.Fatal(err)
|
||||
}
|
||||
fr := newFileReader(files)
|
||||
buf := make([]byte, fileReadBufferSize)
|
||||
b.Logf("Calling Get on %d files.", len(files))
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
for _, file := range files {
|
||||
_, err = getLang(context.Background(), file, buf, fr)
|
||||
_, err = getLang(context.Background(), file, fr)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
@ -256,27 +254,3 @@ export function baz(n) {
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsExcluded(t *testing.T) {
|
||||
t.Run("should exclude lock file", func(t *testing.T) {
|
||||
excluded, err := isExcluded("yarn.lock")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !excluded {
|
||||
t.Errorf("expected file to be excluded")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("should not exclude non-lock file", func(t *testing.T) {
|
||||
excluded, err := isExcluded("hello-world.md")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if excluded {
|
||||
t.Errorf("expected file to not be excluded")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user