feat(search): Add support to all Apex language extensions (#64194)

Part of
[GRAPH-759](https://linear.app/sourcegraph/issue/GRAPH-759/issue-with-apex-extension-not-appearing-for-langapex)

Linguist only supports a subset of the file extensions often used for
the Apex programming languages. This PR adds support for the main set
commonly used.

**Key changes**
1. Adds all extensions for Apex
2. Update our logic to handle multiple extensions for one language
3. Update tests to ensure we only manually map languages if they don't
exist OR have different extensions in go-enry (prevents us from
duplicating entries completely from go-enry)

## Test plan
- [x] Update unit tests
- [x] Validate locally by testing the language filter
This commit is contained in:
Matthew Manela 2024-08-01 10:58:59 -04:00 committed by GitHub
parent cd38adb4a7
commit 772e149729
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 49 additions and 18 deletions

View File

@ -41,8 +41,8 @@ func GetLanguageByNameOrAlias(nameOrAlias string) (lang string, ok bool) {
// Mutually consistent with getLanguagesByExtension, see the tests
// for the exact invariants.
func GetLanguageExtensions(language string) []string {
if lang, ok := unsupportedByEnryNameToExtensionMap[language]; ok {
return []string{lang}
if langs, ok := unsupportedByEnryNameToExtensionMap[language]; ok {
return langs
}
ignoreExts, isNiche := nicheExtensionUsages[language]
@ -114,6 +114,13 @@ var overrideAmbiguousExtensionsMap = map[string]string{
}
var unsupportedByEnryExtensionToNameMap = map[string]string{
// Extensions for the Apex programming language
// See https://developer.salesforce.com/docs/atlas.en-us.apexcode.meta/apexcode/apex_dev_guide.htm
".apex": "Apex",
".apxt": "Apex",
".apxc": "Apex",
".cls": "Apex",
".trigger": "Apex",
// See TODO(id: remove-pkl-special-case)
".pkl": "Pkl",
".magik": "Magik",
@ -171,10 +178,10 @@ var unsupportedByEnryAliasMap = func() map[string]string {
return out
}()
func reverseMap(m map[string]string) map[string]string {
n := make(map[string]string, len(m))
func reverseMap(m map[string]string) map[string][]string {
n := make(map[string][]string, len(m))
for k, v := range m {
n[v] = k
n[v] = append(n[v], k)
}
return n
}

View File

@ -12,7 +12,12 @@ import (
// Languages/extensions that we don't want to regress
var nonAmbiguousExtensionsCheck = map[string]string{
".js": "JavaScript",
".apex": "Apex",
".apxt": "Apex",
".apxc": "Apex",
".cls": "Apex",
".trigger": "Apex",
".js": "JavaScript",
// Linguist removed JSX (but not TSX) as a separate language:
// https://github.com/github-linguist/linguist/pull/5133
".jsx": "JavaScript",
@ -60,19 +65,23 @@ func TestGetLanguageByAlias_NonAmbiguousLanguages(t *testing.T) {
}
func TestGetLanguageExtensions_UnsupportedExtensions(t *testing.T) {
for language, ext := range unsupportedByEnryNameToExtensionMap {
for language, exts := range unsupportedByEnryNameToExtensionMap {
extensions := GetLanguageExtensions(language)
require.Contains(t, extensions, ext,
"maybe a typo in `unsupportedByEnryNameToExtensionMap`?")
for _, ext := range exts {
require.Contains(t, extensions, ext,
"maybe a typo in `unsupportedByEnryNameToExtensionMap`?")
}
}
}
func TestGetLanguageExtensions_NonAmbiguousExtensions(t *testing.T) {
langMap := reverseMap(nonAmbiguousExtensionsCheck)
for language, ext := range langMap {
for language, exts := range langMap {
extensions := GetLanguageExtensions(language)
require.Contains(t, extensions, ext,
"If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating")
for _, ext := range exts {
require.Contains(t, extensions, ext,
"If this test fails when updating enry, maybe `overrideAmbiguousExtensionsMap` needs updating")
}
}
}
@ -169,15 +178,30 @@ func TestExtensionsConsistency2(t *testing.T) {
// cases for Pkl.
func TestUnsupportedByEnry(t *testing.T) {
for lang := range unsupportedByEnryNameToExtensionMap {
_, found := enrydata.ExtensionsByLanguage[lang]
require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryNameToExtensionMap")
enry_extensions, found := enrydata.ExtensionsByLanguage[lang]
if found {
validateLanguageAgainstGoEnry(t, "unsupportedByEnryNameToExtensionMap", enry_extensions, lang)
}
}
for _, lang := range unsupportedByEnryAliasMap {
_, found := enrydata.ExtensionsByLanguage[lang]
require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryAliasMap")
enry_extensions, found := enrydata.ExtensionsByLanguage[lang]
if found {
validateLanguageAgainstGoEnry(t, "unsupportedByEnryAliasMap", enry_extensions, lang)
}
}
for _, lang := range unsupportedByEnryExtensionToNameMap {
_, found := enrydata.ExtensionsByLanguage[lang]
require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryExtensionToNameMap")
enry_extensions, found := enrydata.ExtensionsByLanguage[lang]
if found {
validateLanguageAgainstGoEnry(t, "unsupportedByEnryExtensionToNameMap", enry_extensions, lang)
}
}
}
func validateLanguageAgainstGoEnry(t *testing.T, name string, enryExtensions []string, lang string) {
enryExtensions = slices.Clone(enryExtensions)
slices.Sort(enryExtensions)
sgExtensions := slices.Clone(unsupportedByEnryNameToExtensionMap[lang])
slices.Sort(sgExtensions)
require.NotEqualf(t, enryExtensions, sgExtensions, "looks like language %q is supported by enry with the same extensions; remove it from %q", lang, name)
}