diff --git a/enterprise/internal/embeddings/embed/files.go b/enterprise/internal/embeddings/embed/files.go index f08720634c8..dbf4daa5d57 100644 --- a/enterprise/internal/embeddings/embed/files.go +++ b/enterprise/internal/embeddings/embed/files.go @@ -2,7 +2,6 @@ package embed import ( "path/filepath" - "strings" "github.com/sourcegraph/sourcegraph/enterprise/internal/paths" @@ -21,9 +20,16 @@ var textFileExtensions = map[string]struct{}{ } var defaultExcludedFilePathPatterns = []string{ + ".*ignore", // Files like .gitignore, .eslintignore + ".gitattributes", + ".mailmap", + "*.csv", "*.sql", "*.svg", "*.json", + "*.jsonc", + "*.jsonl", + "*.xml", "*.yml", "*.yaml", "__fixtures__/", diff --git a/enterprise/internal/embeddings/embed/files_test.go b/enterprise/internal/embeddings/embed/files_test.go index 6c2feafcd01..91d325fd1e2 100644 --- a/enterprise/internal/embeddings/embed/files_test.go +++ b/enterprise/internal/embeddings/embed/files_test.go @@ -19,9 +19,13 @@ func TestExcludingFilePaths(t *testing.T) { "vendor/README.md", "LICENSE.txt", "nested/vendor/file.py", + ".prettierignore", + "client/web/.gitattributes", + "no_ignore", + "data/names.csv", } - expectedFiles := []string{"cool.go", "Dockerfile", "README.md", "LICENSE.txt"} + expectedFiles := []string{"cool.go", "Dockerfile", "README.md", "LICENSE.txt", "no_ignore"} gotFiles := []string{} excludedGlobPatterns := GetDefaultExcludedFilePathPatterns()