From 772e1497298b7e5de36bfe9b47e9e51021aa06d4 Mon Sep 17 00:00:00 2001 From: Matthew Manela Date: Thu, 1 Aug 2024 10:58:59 -0400 Subject: [PATCH] feat(search): Add support to all Apex language extensions (#64194) Part of [GRAPH-759](https://linear.app/sourcegraph/issue/GRAPH-759/issue-with-apex-extension-not-appearing-for-langapex) Linguist only supports a subset of the file extensions often used for the Apex programming languages. This PR adds support for the main set commonly used. **Key changes** 1. Adds all extensions for Apex 2. Update our logic to handle multiple extensions for one language 3. Update tests to ensure we only manually map languages if they don't exist OR have different extensions in go-enry (prevents us from duplicating entries completely from go-enry) ## Test plan - [x] Update unit tests - [x] Validate locally by testing the language filter --- lib/codeintel/languages/extensions.go | 17 +++++--- lib/codeintel/languages/extensions_test.go | 50 ++++++++++++++++------ 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/lib/codeintel/languages/extensions.go b/lib/codeintel/languages/extensions.go index 2d0c536bbf3..d222f456bb4 100644 --- a/lib/codeintel/languages/extensions.go +++ b/lib/codeintel/languages/extensions.go @@ -41,8 +41,8 @@ func GetLanguageByNameOrAlias(nameOrAlias string) (lang string, ok bool) { // Mutually consistent with getLanguagesByExtension, see the tests // for the exact invariants. func GetLanguageExtensions(language string) []string { - if lang, ok := unsupportedByEnryNameToExtensionMap[language]; ok { - return []string{lang} + if langs, ok := unsupportedByEnryNameToExtensionMap[language]; ok { + return langs } ignoreExts, isNiche := nicheExtensionUsages[language] @@ -114,6 +114,13 @@ var overrideAmbiguousExtensionsMap = map[string]string{ } var unsupportedByEnryExtensionToNameMap = map[string]string{ + // Extensions for the Apex programming language + // See https://developer.salesforce.com/docs/atlas.en-us.apexcode.meta/apexcode/apex_dev_guide.htm + ".apex": "Apex", + ".apxt": "Apex", + ".apxc": "Apex", + ".cls": "Apex", + ".trigger": "Apex", // See TODO(id: remove-pkl-special-case) ".pkl": "Pkl", ".magik": "Magik", @@ -171,10 +178,10 @@ var unsupportedByEnryAliasMap = func() map[string]string { return out }() -func reverseMap(m map[string]string) map[string]string { - n := make(map[string]string, len(m)) +func reverseMap(m map[string]string) map[string][]string { + n := make(map[string][]string, len(m)) for k, v := range m { - n[v] = k + n[v] = append(n[v], k) } return n } diff --git a/lib/codeintel/languages/extensions_test.go b/lib/codeintel/languages/extensions_test.go index 12557122310..645765c3cce 100644 --- a/lib/codeintel/languages/extensions_test.go +++ b/lib/codeintel/languages/extensions_test.go @@ -12,7 +12,12 @@ import ( // Languages/extensions that we don't want to regress var nonAmbiguousExtensionsCheck = map[string]string{ - ".js": "JavaScript", + ".apex": "Apex", + ".apxt": "Apex", + ".apxc": "Apex", + ".cls": "Apex", + ".trigger": "Apex", + ".js": "JavaScript", // Linguist removed JSX (but not TSX) as a separate language: // https://github.com/github-linguist/linguist/pull/5133 ".jsx": "JavaScript", @@ -60,19 +65,23 @@ func TestGetLanguageByAlias_NonAmbiguousLanguages(t *testing.T) { } func TestGetLanguageExtensions_UnsupportedExtensions(t *testing.T) { - for language, ext := range unsupportedByEnryNameToExtensionMap { + for language, exts := range unsupportedByEnryNameToExtensionMap { extensions := GetLanguageExtensions(language) - require.Contains(t, extensions, ext, - "maybe a typo in `unsupportedByEnryNameToExtensionMap`?") + for _, ext := range exts { + require.Contains(t, extensions, ext, + "maybe a typo in `unsupportedByEnryNameToExtensionMap`?") + } } } func TestGetLanguageExtensions_NonAmbiguousExtensions(t *testing.T) { langMap := reverseMap(nonAmbiguousExtensionsCheck) - for language, ext := range langMap { + for language, exts := range langMap { extensions := GetLanguageExtensions(language) - require.Contains(t, extensions, ext, - "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating") + for _, ext := range exts { + require.Contains(t, extensions, ext, + "If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating") + } } } @@ -169,15 +178,30 @@ func TestExtensionsConsistency2(t *testing.T) { // cases for Pkl. func TestUnsupportedByEnry(t *testing.T) { for lang := range unsupportedByEnryNameToExtensionMap { - _, found := enrydata.ExtensionsByLanguage[lang] - require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryNameToExtensionMap") + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] + if found { + validateLanguageAgainstGoEnry(t, "unsupportedByEnryNameToExtensionMap", enry_extensions, lang) + } } for _, lang := range unsupportedByEnryAliasMap { - _, found := enrydata.ExtensionsByLanguage[lang] - require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryAliasMap") + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] + if found { + validateLanguageAgainstGoEnry(t, "unsupportedByEnryAliasMap", enry_extensions, lang) + } } for _, lang := range unsupportedByEnryExtensionToNameMap { - _, found := enrydata.ExtensionsByLanguage[lang] - require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryExtensionToNameMap") + enry_extensions, found := enrydata.ExtensionsByLanguage[lang] + if found { + validateLanguageAgainstGoEnry(t, "unsupportedByEnryExtensionToNameMap", enry_extensions, lang) + } } } + +func validateLanguageAgainstGoEnry(t *testing.T, name string, enryExtensions []string, lang string) { + enryExtensions = slices.Clone(enryExtensions) + slices.Sort(enryExtensions) + sgExtensions := slices.Clone(unsupportedByEnryNameToExtensionMap[lang]) + slices.Sort(sgExtensions) + + require.NotEqualf(t, enryExtensions, sgExtensions, "looks like language %q is supported by enry with the same extensions; remove it from %q", lang, name) +}