mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 14:51:44 +00:00
chore: Centralize languages package as source-of-truth (#63292)
This patch does a few things: - Adds `go-enry` packages to depguard, so that people do not accidentally use enry APIs instead of the corresponding APIs in the `languages` package. - Adds more tests for different functions in the languages package to ensure mutual consistency in how language<->extension mappings are handled. - Adds tests for enry upgrades - Adds comments with IDs so that related parts in the code can be pieced together easily
This commit is contained in:
parent
fea35a2733
commit
3437f8253d
File diff suppressed because it is too large
Load Diff
@ -392,7 +392,6 @@ go_test(
|
||||
"access_tokens_test.go",
|
||||
"client_configuration_test.go",
|
||||
"code_hosts_test.go",
|
||||
"enry_test.go",
|
||||
"event_log_test.go",
|
||||
"event_logs_test.go",
|
||||
"executor_secrets_test.go",
|
||||
@ -541,7 +540,6 @@ go_test(
|
||||
"@com_github_davecgh_go_spew//spew",
|
||||
"@com_github_derision_test_go_mockgen_v2//testutil/assert",
|
||||
"@com_github_derision_test_go_mockgen_v2//testutil/require",
|
||||
"@com_github_go_enry_go_enry_v2//:go-enry",
|
||||
"@com_github_golang_jwt_jwt_v4//:jwt",
|
||||
"@com_github_google_go_cmp//cmp",
|
||||
"@com_github_google_go_cmp//cmp/cmpopts",
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
package graphqlbackend
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
var matlabFile string = `% matlab function to compute square of a value
|
||||
function [out] = square(x)
|
||||
out = x * x;
|
||||
end
|
||||
|
||||
function [out] = fourthpower(x)
|
||||
out = square(square(x));
|
||||
end`
|
||||
|
||||
func TestEnryLangs(t *testing.T) {
|
||||
langs := enry.GetLanguages("foo.m", []byte(matlabFile))
|
||||
require.Equal(t, []string{"MATLAB"}, langs)
|
||||
}
|
||||
@ -9,14 +9,13 @@ import (
|
||||
"io"
|
||||
"io/fs"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-enry/go-enry/v2/data"
|
||||
"github.com/go-enry/go-enry/v2" //nolint:depguard - FIXME: replace this usage of enry with languages package
|
||||
"github.com/go-enry/go-enry/v2/data" //nolint:depguard - FIXME: replace this usage of enry with languages package
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace"
|
||||
|
||||
"github.com/sourcegraph/log"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/internal/trace"
|
||||
"github.com/sourcegraph/sourcegraph/lib/codeintel/languages"
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
@ -13,7 +13,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-enry/go-enry/v2" //nolint:depguard - FIXME: replace this usage of enry with languages package
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/errors"
|
||||
)
|
||||
|
||||
@ -24,6 +24,9 @@ var Deny map[string]string = map[string]string{
|
||||
"regexp$": "Use github.com/grafana/regexp instead",
|
||||
"github.com/hexops/autogold$": "Use github.com/hexops/autogold/v2 instead",
|
||||
"github.com/google/go-github/github$": "Use github.com/google/go-github/v55/github instead. To convert between v48 and v55, use the internal/extsvc/github/githubconvert package",
|
||||
"github.com/go-enry/go-enry$": "Use github.com/sourcegraph/sourcegraph/lib/codeintel/languages instead. If some docs are not clear, please ask in #discuss-graph.",
|
||||
"github.com/go-enry/go-enry/v2$": "Use github.com/sourcegraph/sourcegraph/lib/codeintel/languages instead. If some docs are not clear, please ask in #discuss-graph.",
|
||||
"github.com/go-enry/go-enry/v2/data$": "Use github.com/sourcegraph/sourcegraph/lib/codeintel/languages instead. If some needed API is missing, please ask in #discuss-graph.",
|
||||
}
|
||||
|
||||
func createAnalyzer() *analysis.Analyzer {
|
||||
|
||||
@ -1066,8 +1066,12 @@ func (s *Service) SyntacticUsages(
|
||||
// (Meaning we just need a single Searcher/Zoekt search)
|
||||
searchSymbol := symbolsAtRange[0]
|
||||
|
||||
langs, langErr := languages.GetLanguages(path, func() ([]byte, error) { return nil, errors.New("Ambiguous language") })
|
||||
if langErr != nil || len(langs) == 0 {
|
||||
langs, _ := languages.GetLanguages(path, nil)
|
||||
if len(langs) != 1 {
|
||||
langErr := errors.New("Unknown language")
|
||||
if len(langs) > 1 {
|
||||
langErr = errors.New("Ambiguous language")
|
||||
}
|
||||
return nil, &SyntacticUsagesError{
|
||||
Code: SU_FailedToSearch,
|
||||
UnderlyingError: langErr,
|
||||
|
||||
@ -273,12 +273,14 @@ func NewMetaEnvironment(r searchresult.Match, content string) *MetaEnvironment {
|
||||
Content: string(m.Name),
|
||||
}
|
||||
case *searchresult.FileMatch:
|
||||
// GetLanguages can return multiple matches for ambiguous languages. If there are multiple
|
||||
// we will take the first one.
|
||||
languages, _ := languages.GetLanguages(m.Path, nil)
|
||||
var lang string
|
||||
if len(languages) > 0 {
|
||||
lang = languages[0]
|
||||
// FIXME(id: language-detection-failure-handling):
|
||||
// Handle failure in language detection as well as ambiguity
|
||||
langs, _ := languages.GetLanguages(m.Path, func() ([]byte, error) {
|
||||
return []byte(content), nil
|
||||
})
|
||||
lang := ""
|
||||
if len(langs) > 0 {
|
||||
lang = langs[0]
|
||||
}
|
||||
return &MetaEnvironment{
|
||||
Repo: string(m.Repo.Name),
|
||||
@ -298,12 +300,14 @@ func NewMetaEnvironment(r searchresult.Match, content string) *MetaEnvironment {
|
||||
}
|
||||
case *searchresult.CommitDiffMatch:
|
||||
path := m.Path()
|
||||
// GetLanguages can return multiple matches for ambiguous languages. If there are multiple
|
||||
// we will take the first one.
|
||||
languages, _ := languages.GetLanguages(path, nil)
|
||||
var lang string
|
||||
if len(languages) > 0 {
|
||||
lang = languages[0]
|
||||
// FIXME(id: language-detection-failure-handling):
|
||||
// Handle failure in language detection as well as ambiguity
|
||||
langs, _ := languages.GetLanguages(path, func() ([]byte, error) {
|
||||
return []byte(content), nil
|
||||
})
|
||||
lang := ""
|
||||
if len(langs) > 0 {
|
||||
lang = langs[0]
|
||||
}
|
||||
return &MetaEnvironment{
|
||||
Repo: string(m.Repo.Name),
|
||||
|
||||
@ -722,7 +722,7 @@ func toTextPatternInfo(b query.Basic, resultTypes result.Types, feat *search.Fea
|
||||
func toLangFilters(aliases []string) []string {
|
||||
var filters []string
|
||||
for _, alias := range aliases {
|
||||
lang, _ := languages.GetLanguageByAlias(alias) // Invariant: lang is valid.
|
||||
lang, _ := languages.GetLanguageByNameOrAlias(alias) // Invariant: lang is valid.
|
||||
if !slices.Contains(filters, lang) {
|
||||
filters = append(filters, lang)
|
||||
}
|
||||
|
||||
@ -4,7 +4,7 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/go-enry/go-enry/v2/data"
|
||||
"github.com/go-enry/go-enry/v2/data" //nolint:depguard - FIXME: Expose needed APIs in codeintel/languages
|
||||
"github.com/grafana/regexp"
|
||||
|
||||
"github.com/sourcegraph/sourcegraph/lib/codeintel/languages"
|
||||
@ -58,7 +58,7 @@ var filenamesFromLanguage = func() map[string][]string {
|
||||
// LangToFileRegexp converts a lang: parameter to its corresponding file
|
||||
// patterns for file filters. The lang value must be valid, cf. validate.go
|
||||
func LangToFileRegexp(lang string) string {
|
||||
lang, _ = languages.GetLanguageByAlias(lang) // Invariant: lang is valid.
|
||||
lang, _ = languages.GetLanguageByNameOrAlias(lang) // Invariant: lang is valid.
|
||||
extensions := languages.GetLanguageExtensions(lang)
|
||||
patterns := make([]string, len(extensions))
|
||||
for i, e := range extensions {
|
||||
|
||||
@ -199,7 +199,7 @@ func validateField(field, value string, negated bool, seen map[string]struct{})
|
||||
}
|
||||
|
||||
isLanguage := func() error {
|
||||
_, ok := languages.GetLanguageByAlias(value)
|
||||
_, ok := languages.GetLanguageByNameOrAlias(value)
|
||||
if !ok {
|
||||
return errors.Errorf("unknown language: %q", value)
|
||||
}
|
||||
|
||||
@ -19,8 +19,8 @@ go_library(
|
||||
"//internal/search/query",
|
||||
"//internal/search/repos",
|
||||
"//internal/search/streaming",
|
||||
"//lib/codeintel/languages",
|
||||
"//lib/errors",
|
||||
"@com_github_go_enry_go_enry_v2//:go-enry",
|
||||
"@com_github_grafana_regexp//:regexp",
|
||||
"@io_opentelemetry_go_otel//attribute",
|
||||
"@org_gonum_v1_gonum//stat/combin",
|
||||
|
||||
@ -6,9 +6,9 @@ import (
|
||||
"regexp/syntax" //nolint:depguard // using the grafana fork of regexp clashes with zoekt, which uses the std regexp/syntax.
|
||||
"strings"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/grafana/regexp"
|
||||
"github.com/sourcegraph/sourcegraph/internal/search/query"
|
||||
"github.com/sourcegraph/sourcegraph/lib/codeintel/languages"
|
||||
)
|
||||
|
||||
// rule represents a transformation function on a Basic query. Transformation
|
||||
@ -442,7 +442,7 @@ func langPatterns(b query.Basic) *query.Basic {
|
||||
var lang string // store the first pattern that matches a recognized language.
|
||||
isNegated := false
|
||||
newPattern := query.MapPattern(rawPatternTree, func(value string, negated bool, annotation query.Annotation) query.Node {
|
||||
langAlias, ok := enry.GetLanguageByAlias(value)
|
||||
langAlias, ok := languages.GetLanguageByNameOrAlias(value)
|
||||
if !ok || changed {
|
||||
return query.Pattern{
|
||||
Value: value,
|
||||
|
||||
@ -81,7 +81,7 @@ func QueryToZoektQuery(b query.Basic, resultTypes result.Types, feat *search.Fea
|
||||
}
|
||||
|
||||
func toLangFilter(lang string) zoekt.Q {
|
||||
lang, _ = languages.GetLanguageByAlias(lang) // Invariant: lang is valid.
|
||||
lang, _ = languages.GetLanguageByNameOrAlias(lang) // Invariant: lang is valid.
|
||||
return &zoekt.Language{Language: lang}
|
||||
}
|
||||
|
||||
|
||||
@ -4,6 +4,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
|
||||
go_library(
|
||||
name = "languages",
|
||||
srcs = [
|
||||
"enry_vendored.go",
|
||||
"extensions.go",
|
||||
"languages.go",
|
||||
],
|
||||
@ -23,6 +24,7 @@ go_test(
|
||||
tags = [TAG_PLATFORM_GRAPH],
|
||||
deps = [
|
||||
"@com_github_go_enry_go_enry_v2//:go-enry",
|
||||
"@com_github_go_enry_go_enry_v2//data",
|
||||
"@com_github_stretchr_testify//require",
|
||||
"@net_pgregory_rapid//:rapid",
|
||||
],
|
||||
|
||||
15
lib/codeintel/languages/enry_vendored.go
Normal file
15
lib/codeintel/languages/enry_vendored.go
Normal file
@ -0,0 +1,15 @@
|
||||
package languages
|
||||
|
||||
import "strings"
|
||||
|
||||
// This file contains functions private functions
|
||||
// vendored from the go-enry codebase.
|
||||
|
||||
// convertToAliasKey is vendored from go-enry to make sure
|
||||
// we're normalizing strings the same way.
|
||||
func convertToAliasKey(langName string) string {
|
||||
ak := strings.SplitN(langName, `,`, 2)[0]
|
||||
ak = strings.Replace(ak, ` `, `_`, -1)
|
||||
ak = strings.ToLower(ak)
|
||||
return ak
|
||||
}
|
||||
@ -2,30 +2,59 @@ package languages
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"slices"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-enry/go-enry/v2" //nolint:depguard - Only this package can use enry
|
||||
)
|
||||
|
||||
// getLanguagesByAlias is a replacement for enry.GetLanguagesByAlias
|
||||
// It supports languages that are missing in go-enry
|
||||
func GetLanguageByAlias(alias string) (lang string, ok bool) {
|
||||
normalizedAlias := strings.ToLower(alias)
|
||||
if lang, ok = unsupportedByEnryAliasMap[normalizedAlias]; ok {
|
||||
// GetLanguageByNameOrAlias returns the standardized name for
|
||||
// a language based on its name (in which case this is an identity operation)
|
||||
// or based on its alias, which is potentially an alternate name for
|
||||
// the language.
|
||||
//
|
||||
// Aliases are fully lowercase, and map N-1 to languages.
|
||||
//
|
||||
// For example,
|
||||
//
|
||||
// GetLanguageByNameOrAlias("ada") == "Ada", true
|
||||
// GetLanguageByNameOrAlias("ada95") == "Ada", true
|
||||
//
|
||||
// Historical note: This function was added for replacing usages of
|
||||
// enry.GetLanguageByAlias, which, unlike the name suggests, also
|
||||
// handles non-normalized names such as those with spaces.
|
||||
func GetLanguageByNameOrAlias(nameOrAlias string) (lang string, ok bool) {
|
||||
alias := convertToAliasKey(nameOrAlias)
|
||||
if lang, ok = unsupportedByEnryAliasMap[alias]; ok {
|
||||
return lang, true
|
||||
}
|
||||
|
||||
return enry.GetLanguageByAlias(normalizedAlias)
|
||||
return enry.GetLanguageByAlias(alias)
|
||||
}
|
||||
|
||||
// getLanguagesByExtension is a replacement for enry.GetLanguagesByExtension
|
||||
// It supports languages that are missing in go-enry
|
||||
func GetLanguageExtensions(alias string) []string {
|
||||
if lang, ok := unsupportedByEnryNameToExtensionMap[alias]; ok {
|
||||
// GetLanguageExtensions returns the list of file extensions for a given
|
||||
// language. Returned extensions are always prefixed with a '.'.
|
||||
//
|
||||
// The returned slice will be empty iff the language is not known.
|
||||
//
|
||||
// Handles more languages than enry.GetLanguageExtensions.
|
||||
//
|
||||
// Mutually consistent with getLanguagesByExtension, see the tests
|
||||
// for the exact invariants.
|
||||
func GetLanguageExtensions(language string) []string {
|
||||
if lang, ok := unsupportedByEnryNameToExtensionMap[language]; ok {
|
||||
return []string{lang}
|
||||
}
|
||||
|
||||
return enry.GetLanguageExtensions(alias)
|
||||
ignoreExts, isNiche := nicheExtensionUsages[language]
|
||||
// Force a copy to avoid accidentally modifying the global variable
|
||||
enryExts := slices.Clone(enry.GetLanguageExtensions(language))
|
||||
if !isNiche {
|
||||
return slices.Clone(enryExts)
|
||||
}
|
||||
return slices.DeleteFunc(enryExts, func(ext string) bool {
|
||||
_, shouldIgnore := ignoreExts[ext]
|
||||
return shouldIgnore
|
||||
})
|
||||
}
|
||||
|
||||
// getLanguagesByExtension is a replacement for enry.GetLanguagesByExtension
|
||||
@ -85,23 +114,62 @@ var overrideAmbiguousExtensionsMap = map[string]string{
|
||||
}
|
||||
|
||||
var unsupportedByEnryExtensionToNameMap = map[string]string{
|
||||
// Pkl Configuration Language (https://pkl-lang.org/)
|
||||
// NOTE: Add to linguist on 6/7/24
|
||||
// can remove once go-enry package updates
|
||||
// to that linguist version
|
||||
".pkl": "Pkl",
|
||||
// Magik Language
|
||||
// See TODO(id: remove-pkl-special-case)
|
||||
".pkl": "Pkl",
|
||||
".magik": "Magik",
|
||||
}
|
||||
|
||||
// nicheExtensionUsage keeps track of which (lang, extension) mappings
|
||||
// should not be considered.
|
||||
//
|
||||
// We cannot wholesale ignore these languages, as this list includes
|
||||
// languages like XML, but it can contain unusual extensions like '.tsx'
|
||||
// which we generally want to classify as TypeScript.
|
||||
var nicheExtensionUsages = func() map[string]map[string]struct{} {
|
||||
niche := map[string]map[string]struct{}{}
|
||||
considered := map[string]struct{}{}
|
||||
for _, lang := range overrideAmbiguousExtensionsMap {
|
||||
considered[lang] = struct{}{}
|
||||
}
|
||||
for ext := range overrideAmbiguousExtensionsMap {
|
||||
langs := enry.GetLanguagesByExtension("foo"+ext, nil, nil)
|
||||
for _, lang := range langs {
|
||||
if _, found := considered[lang]; !found {
|
||||
if m, hasMap := niche[lang]; hasMap {
|
||||
m[ext] = struct{}{}
|
||||
} else {
|
||||
niche[lang] = map[string]struct{}{ext: {}}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for specialOverrideExt, lang := range unsupportedByEnryExtensionToNameMap {
|
||||
considered[lang] = struct{}{}
|
||||
langs := enry.GetLanguagesByExtension("foo"+specialOverrideExt, nil, nil)
|
||||
for _, lang := range langs {
|
||||
if _, found := considered[lang]; !found {
|
||||
if m, hasMap := niche[lang]; hasMap {
|
||||
m[specialOverrideExt] = struct{}{}
|
||||
} else {
|
||||
niche[lang] = map[string]struct{}{specialOverrideExt: {}}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return niche
|
||||
}()
|
||||
|
||||
var unsupportedByEnryNameToExtensionMap = reverseMap(unsupportedByEnryExtensionToNameMap)
|
||||
|
||||
var unsupportedByEnryAliasMap = map[string]string{
|
||||
// Pkl Configuration Language (https://pkl-lang.org/)
|
||||
"pkl": "Pkl",
|
||||
// Magik Language
|
||||
"magik": "Magik",
|
||||
}
|
||||
// unsupportedByEnryAliasMap maps alias -> language name for languages
|
||||
// not tracked by go-enry.
|
||||
var unsupportedByEnryAliasMap = func() map[string]string {
|
||||
out := map[string]string{}
|
||||
for _, lang := range unsupportedByEnryExtensionToNameMap {
|
||||
out[convertToAliasKey(lang)] = lang
|
||||
}
|
||||
return out
|
||||
}()
|
||||
|
||||
func reverseMap(m map[string]string) map[string]string {
|
||||
n := make(map[string]string, len(m))
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
package languages
|
||||
|
||||
import (
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-enry/go-enry/v2" //nolint:depguard - This package is allowed to use enry
|
||||
enrydata "github.com/go-enry/go-enry/v2/data" //nolint:depguard - This package is allowed to use enry
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@ -42,7 +45,7 @@ var nonAmbiguousExtensionsCheck = map[string]string{
|
||||
|
||||
func TestGetLanguageByAlias_UnsupportedLanguages(t *testing.T) {
|
||||
for alias, name := range unsupportedByEnryAliasMap {
|
||||
resName, _ := GetLanguageByAlias(alias)
|
||||
resName, _ := GetLanguageByNameOrAlias(alias)
|
||||
require.Equal(t, name, resName,
|
||||
"maybe a typo in `unsupportedByEnryAliasMap`?")
|
||||
}
|
||||
@ -50,7 +53,7 @@ func TestGetLanguageByAlias_UnsupportedLanguages(t *testing.T) {
|
||||
|
||||
func TestGetLanguageByAlias_NonAmbiguousLanguages(t *testing.T) {
|
||||
for _, language := range nonAmbiguousExtensionsCheck {
|
||||
_, ok := GetLanguageByAlias(language)
|
||||
_, ok := GetLanguageByNameOrAlias(language)
|
||||
require.True(t, ok,
|
||||
"unable to find language %s in go-enry", language)
|
||||
}
|
||||
@ -113,3 +116,68 @@ func TestGetLanguagesByExtension_BinaryExtensions(t *testing.T) {
|
||||
filename)
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtensionsConsistency(t *testing.T) {
|
||||
for ext, overrideLang := range overrideAmbiguousExtensionsMap {
|
||||
filepath := "foo" + ext
|
||||
enryLangsForExt := enry.GetLanguagesByExtension(filepath, nil, nil)
|
||||
require.Containsf(t, enryLangsForExt, overrideLang, "overrideAmbiguousExtensionsMap maps extension %q to language %q but "+
|
||||
"that mapping is not present in enry's list %v", ext, overrideLang, enryLangsForExt)
|
||||
require.Greaterf(t, len(enryLangsForExt), 1, "overrideAmbiguousExtensionsMap states that"+
|
||||
"%q extension is ambiguous, but only found langs: %v", ext, enryLangsForExt)
|
||||
|
||||
candidates, isLikelyBinary := getLanguagesByExtension(filepath)
|
||||
require.False(t, isLikelyBinary, "ambiguous files are all source code")
|
||||
require.True(t, len(candidates) == 1, "getLanguagesByExtension should respect overrideAmbiguousExtensionsMap")
|
||||
|
||||
shouldBeIgnoredLangsForExt := slices.DeleteFunc(enryLangsForExt, func(s string) bool {
|
||||
return s == overrideLang
|
||||
})
|
||||
for _, shouldBeIgnoredLang := range shouldBeIgnoredLangsForExt {
|
||||
ignoredExts, found := nicheExtensionUsages[shouldBeIgnoredLang]
|
||||
require.Truef(t, found, "expected lang: %q to have an entry in nicheExtensionUsages for consistency with GetLanguagesByExtension", shouldBeIgnoredLang)
|
||||
require.Truef(t, len(ignoredExts) >= 1, "sets in nicheExtensionUsages must be non-empty")
|
||||
|
||||
nonNicheExts := GetLanguageExtensions(shouldBeIgnoredLang)
|
||||
for ignoredExt, _ := range ignoredExts {
|
||||
require.Falsef(t, slices.Contains(nonNicheExts, ignoredExt),
|
||||
"GetLanguageExtensions should not return %q for lang %q for consistency with GetLanguagesByExtension",
|
||||
ignoredExt, shouldBeIgnoredLang)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtensionsConsistency2(t *testing.T) {
|
||||
for lang, _ := range enrydata.ExtensionsByLanguage {
|
||||
for _, ext := range GetLanguageExtensions(lang) {
|
||||
if strings.Count(ext, ".") > 1 {
|
||||
// Ignore unusual edge cases like .coffee.md for Literate CoffeeScript
|
||||
continue
|
||||
}
|
||||
langsByExt, isLikelyBinary := getLanguagesByExtension("foo" + ext)
|
||||
if !isLikelyBinary {
|
||||
require.Truef(t, slices.Contains(langsByExt, lang),
|
||||
"expected getLanguagesByExtension result %v to contain %q (extension: %q)", langsByExt, lang, ext)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(id: remove-pkl-special-case) Linguist v7.30.0 adds support for Pkl,
|
||||
// so when we upgrade to a matching go-enry version, we can remove special
|
||||
// cases for Pkl.
|
||||
func TestUnsupportedByEnry(t *testing.T) {
|
||||
for lang := range unsupportedByEnryNameToExtensionMap {
|
||||
_, found := enrydata.ExtensionsByLanguage[lang]
|
||||
require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryNameToExtensionMap")
|
||||
}
|
||||
for _, lang := range unsupportedByEnryAliasMap {
|
||||
_, found := enrydata.ExtensionsByLanguage[lang]
|
||||
require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryAliasMap")
|
||||
}
|
||||
for _, lang := range unsupportedByEnryExtensionToNameMap {
|
||||
_, found := enrydata.ExtensionsByLanguage[lang]
|
||||
require.False(t, found, "looks like language %q is supported by enry; remove it from unsupportedByEnryExtensionToNameMap")
|
||||
}
|
||||
}
|
||||
|
||||
@ -4,7 +4,7 @@ import (
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-enry/go-enry/v2" //nolint:depguard - This package is allowed to use enry.
|
||||
)
|
||||
|
||||
// Make sure all names are lowercase here, since they are normalized
|
||||
@ -62,52 +62,57 @@ func GetMostLikelyLanguage(path, contents string) (lang string, found bool) {
|
||||
// for simple `.h` files with just comments and macros, they may
|
||||
// be valid C, C++ or any of their derivative languages (e.g. Objective-C).
|
||||
func GetLanguages(path string, getContent func() ([]byte, error)) ([]string, error) {
|
||||
langs := enry.GetLanguagesByFilename(path, nil, nil)
|
||||
if len(langs) == 1 {
|
||||
return langs, nil
|
||||
}
|
||||
newLangs, isLikelyBinaryFile := getLanguagesByExtension(path)
|
||||
if isLikelyBinaryFile {
|
||||
return nil, nil
|
||||
}
|
||||
switch len(newLangs) {
|
||||
case 0:
|
||||
break
|
||||
case 1:
|
||||
return newLangs, nil
|
||||
default:
|
||||
langs = newLangs
|
||||
}
|
||||
if getContent == nil {
|
||||
return langs, nil
|
||||
}
|
||||
content, err := getContent()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(content) == 0 {
|
||||
return langs, nil
|
||||
}
|
||||
if enry.IsBinary(content) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// enry doesn't expose a way to call GetLanguages with a specific set of
|
||||
// strategies, so just hand-roll that code here.
|
||||
var languages = langs
|
||||
for _, strategy := range []enry.Strategy{enry.GetLanguagesByModeline, getLanguagesByShebang, enry.GetLanguagesByContent, enry.GetLanguagesByClassifier} {
|
||||
candidates := strategy(path, content, languages)
|
||||
switch len(candidates) {
|
||||
case 0:
|
||||
continue
|
||||
case 1:
|
||||
return candidates, nil
|
||||
default:
|
||||
languages = candidates
|
||||
impl := func() ([]string, error) {
|
||||
langs := enry.GetLanguagesByFilename(path, nil, nil)
|
||||
if len(langs) == 1 {
|
||||
return langs, nil
|
||||
}
|
||||
newLangs, isLikelyBinaryFile := getLanguagesByExtension(path)
|
||||
if isLikelyBinaryFile {
|
||||
return nil, nil
|
||||
}
|
||||
switch len(newLangs) {
|
||||
case 0:
|
||||
break
|
||||
case 1:
|
||||
return newLangs, nil
|
||||
default:
|
||||
langs = newLangs
|
||||
}
|
||||
if getContent == nil {
|
||||
return langs, nil
|
||||
}
|
||||
content, err := getContent()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(content) == 0 {
|
||||
return langs, nil
|
||||
}
|
||||
if enry.IsBinary(content) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// enry doesn't expose a way to call GetLanguages with a specific set of
|
||||
// strategies, so just hand-roll that code here.
|
||||
var languages = langs
|
||||
for _, strategy := range []enry.Strategy{enry.GetLanguagesByModeline, getLanguagesByShebang, enry.GetLanguagesByContent, enry.GetLanguagesByClassifier} {
|
||||
candidates := strategy(path, content, languages)
|
||||
switch len(candidates) {
|
||||
case 0:
|
||||
continue
|
||||
case 1:
|
||||
return candidates, nil
|
||||
default:
|
||||
languages = candidates
|
||||
}
|
||||
}
|
||||
|
||||
return languages, nil
|
||||
}
|
||||
|
||||
return languages, nil
|
||||
langs, err := impl()
|
||||
return slices.Clone(langs), err
|
||||
}
|
||||
|
||||
// getLanguagesByShebang is a replacement for enry.GetLanguagesByShebang.
|
||||
@ -125,5 +130,5 @@ func getLanguagesByShebang(path string, content []byte, candidates []string) []s
|
||||
return []string{"Raku"}
|
||||
}
|
||||
}
|
||||
return languages
|
||||
return slices.Clone(languages)
|
||||
}
|
||||
|
||||
@ -3,7 +3,7 @@ package languages
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-enry/go-enry/v2" //nolint:depguard - This package is allowed to use enry
|
||||
"github.com/stretchr/testify/require"
|
||||
"pgregory.net/rapid"
|
||||
)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user