diff --git a/doc/code_navigation/references/inference_configuration.md b/doc/code_navigation/references/inference_configuration.md index b0c3ba542f0..8db7ecbee4d 100644 --- a/doc/code_navigation/references/inference_configuration.md +++ b/doc/code_navigation/references/inference_configuration.md @@ -60,10 +60,10 @@ local snek_recognizer = recognizer.new_path_recognizer { pattern.new_path_basename("Snek.module"), -- Ignore any files in test or vendor directories - pattern.new_path_exclude(pattern.new_path_combine { + pattern.new_path_exclude( pattern.new_path_segment("test"), - pattern.new_path_segment("vendor"), - }), + pattern.new_path_segment("vendor") + ), }, -- Called with list of matching Snek.module files @@ -94,37 +94,88 @@ return require("sg.autoindex.config").new({ There are a number of specific and general-purpose Lua libraries made accessible via the built-in `require`. +The type signatures for the functions below use the following syntax: + +- `(A1, ..., An) -> R`: Function type with arguments of type `A1, ..., An` and return type `R`. +- `array[A]`: Table with indexes 1 to N of elements of type `A`. +- `table[K, V]`: Table with keys of type `K` and values of type `V`. +- `A | B`: Union type (includes values of type `A` and type `B`). +- `A...`: Variadic (0 or more values of A, without being wrapped in a table). +- `"mystring"`: Literal string type with only `"mystring"` as the allowed value. +- `{K1: V1, K2: V2, ...}`: Heterogenous table (object) with a key of type `K1` mapping to a value of type `V1` etc. +- `void`: no value returned from function + ### `sg.autoindex.recognizer` This auto-indexing-specific library defines the following two functions. -- `new_path_recognizer` creates a `recognizer` from a config object containing `patterns` and `generate` fields. See the [example](#example) above for basic usage. +- `new_path_recognizer` creates a `Recognizer` from a config object containing `patterns` and `generate` fields. See the [example](#example) above for basic usage. + - Type: + ``` + ({ + "patterns": array[pattern], + "patterns_for_content": array[pattern], + "generate": (registration_api, paths: array[string], contents_by_path: table[string, string]) -> array[index_job], + }) -> recognizer + ``` + where `index_job` is an object with the following shape: + ``` + index_job = { + "indexer": string, -- Docker image for the indexer + "root": string, -- working directory for invoking the indexer + "steps": array[{ -- preparatory steps to run before invoking the indexer (e.g. installing dependencies) + "root": string, -- working directory for this step + "image": string -- Docker image to use for preparatory step + "commands": array[string] -- List of commands to run inside the Docker image + }], + "indexer_args": array[string], -- command-line arguments to be passed to the indexer + "outfile": string, -- path to the index generated by the indexer + } + ``` - `new_fallback_recognizer` creates a `recognizer` from an ordered list of `recognizer`s. Each `recognizer` is called sequentially, until one of them emits non-empty results. + - Type: `(array[recognizer]) -> recognizer` + +The `registration_api` object has the following API: +- `register` which queues a `recognizer` to be run at a later stage. + This makes it possible to add more recognizers dynamically, + such as based on whether specific configuration files were found or not. + - Type: `(recognizer) -> void` ### `sg.autoindex.patterns` This auto-indexing-specific library defines the following four path pattern constructors. -- `new_path_literal(pattern)` creates a `pattern` that matches an exact filepath. -- `new_path_segment(pattern)` creates a `pattern` that matches a directory name. -- `new_path_basename(pattern)` creates a `pattern` that matches a basename exactly. -- `new_path_extension(ext_no_dot)` creates a `pattern` that matches files with a given extension. +- `new_path_literal(fullpath)` creates a `pattern` that matches an exact filepath. + - Type: `(string) -> pattern` +- `new_path_segment(segment)` creates a `pattern` that matches a directory name. + - Type: `(string) -> pattern` +- `new_path_basename(basename)` creates a `pattern` that matches a basename exactly. + - Type: `(string) -> pattern` +- `new_path_extension(ext_no_leading_dot)` creates a `pattern` that matches files with a given extension. + - Type: `(string) -> pattern` This library also defines the following two pattern collection constructors. - `new_path_combine(patterns)` creates a pattern collection object (to be used with [recognizers](#sg-autoindex-recognizers)) from the given set of path `pattern`s. + - Type: `((pattern | array[pattern])...) -> pattern` - `new_path_exclude(patterns)` creates a new _inverted_ pattern collection object. Paths matching these `pattern`s are filtered out from the set of matching filepaths given to a recognizer's `generate` function. + - Type: `((pattern | array[pattern])...) -> pattern` -### `paths` +### `path` -This library defines the following five path utility functions: +This library defines the following utility functions: -- `ancestors(path)` returns a path's parent, grandparent, etc as a list. -- `basename(path)` returns the basename of the given path. -- `dirname(path)` returns the dirname of the given path. -- `join(paths)` returns a filepath created by joining the given path segments via filepath separator. -- `split(path)` split a path into an ordered sequence of path segments. +- `ancestors(path)` returns a list `{dirname(path), dirname(dirname(path)), ...}`. The last element in the list will be an empty string. + - Type: `(string) -> array[string]` +- `basename(path)` returns the basename of the given path as defined by Go's [filepath.Base](https://pkg.go.dev/path/filepath#Base). + - Type: `(string) -> string` +- `dirname(path)` returns the dirname of the given path as defined by Go's [filepath.Dir](https://pkg.go.dev/path/filepath#Dir), except that it (1) returns an empty path instead of `"."` if the path is empty and (2) removes a leading `/` if present. + - Type: `string -> string` +- `join(path1, path2)` returns a filepath created by joining the given path segments via filepath separator. + - Type: `(string, string) -> string` +- `split(path)` is a convenience function that returns `dirname(path), basename(path)`. + - Type: `(string) -> string, string` ### `json` diff --git a/internal/codeintel/autoindexing/internal/inference/libs/patterns.go b/internal/codeintel/autoindexing/internal/inference/libs/patterns.go index 03f00da0015..412af5a127e 100644 --- a/internal/codeintel/autoindexing/internal/inference/libs/patterns.go +++ b/internal/codeintel/autoindexing/internal/inference/libs/patterns.go @@ -31,6 +31,7 @@ func (api patternAPI) LuaAPI() map[string]lua.LGFunction { } return map[string]lua.LGFunction{ + // type: (string, array[string]) -> pattern "backdoor": util.WrapLuaFunction(func(state *lua.LState) error { glob := state.CheckString(1) pathspecTable := state.CheckTable(2) @@ -48,7 +49,9 @@ func (api patternAPI) LuaAPI() map[string]lua.LGFunction { state.Push(luar.New(state, luatypes.NewPattern(glob, pathspecs))) return nil }), + // type: ((pattern | array[pattern])...) -> pattern "path_combine": util.WrapLuaFunction(newPathPatternCombineConstructor(luatypes.NewCombinedPattern)), + // type: ((pattern | array[pattern])...) -> pattern "path_exclude": util.WrapLuaFunction(newPathPatternCombineConstructor(luatypes.NewExcludePattern)), } } diff --git a/internal/codeintel/autoindexing/internal/inference/libs/recognizers.go b/internal/codeintel/autoindexing/internal/inference/libs/recognizers.go index a8672155f25..201d01d9748 100644 --- a/internal/codeintel/autoindexing/internal/inference/libs/recognizers.go +++ b/internal/codeintel/autoindexing/internal/inference/libs/recognizers.go @@ -14,12 +14,18 @@ type recognizerAPI struct{} func (api recognizerAPI) LuaAPI() map[string]lua.LGFunction { return map[string]lua.LGFunction{ + // type: ({ + // "patterns": array[pattern], + // "patterns_for_content": array[pattern], + // "generate": (registration_api, paths: array[string], contents_by_path: table[string, string]) -> void, + // "hints": (registration_api, paths: array[string]) -> void + // }) -> recognizer "path_recognizer": util.WrapLuaFunction(func(state *lua.LState) error { recognizer, err := luatypes.RecognizerFromTable(state.CheckTable(1)) state.Push(luar.New(state, recognizer)) return err }), - + // type: (array[recognizer]) -> recognizer "fallback_recognizer": util.WrapLuaFunction(func(state *lua.LState) error { recognizers, err := util.MapSlice(state.CheckTable(1), func(value lua.LValue) (*luatypes.Recognizer, error) { return util.TypecheckUserData[*luatypes.Recognizer](value, "*Recognizer") diff --git a/internal/codeintel/autoindexing/internal/inference/lua/patterns.lua b/internal/codeintel/autoindexing/internal/inference/lua/patterns.lua index 9ba1f9b07b2..fe0b080cb8c 100644 --- a/internal/codeintel/autoindexing/internal/inference/lua/patterns.lua +++ b/internal/codeintel/autoindexing/internal/inference/lua/patterns.lua @@ -1,43 +1,50 @@ -local patterns = require "internal_patterns" +local pattern_lib = require "internal_patterns" local M = {} +-- type: (string, array[string]) -> pattern local new_pattern = function(glob, pathspecs) - return patterns.backdoor(glob, pathspecs) + return pattern_lib.backdoor(glob, pathspecs) end -- glob: /BUILD.bazel -- pathspec: BUILD.bazel -M.new_path_literal = function(pattern) - return new_pattern("/" .. pattern, {pattern}) +-- type: (string) -> pattern +M.new_path_literal = function(globlike) + return new_pattern("/" .. globlike, {globlike}) end -- glob: web/ -- pathspec: web/* (root) -- pathspec: */web/* (non-root) -M.new_path_segment = function(pattern) - return new_pattern(pattern .. "/", {pattern .. "/*", "*/" .. pattern .. "/*"}) +-- type: (string) -> pattern +M.new_path_segment = function(globlike) + return new_pattern(globlike .. "/", {globlike .. "/*", "*/" .. globlike .. "/*"}) end -- glob: gen.go -- pathspec: gen.go (root) -- pathspec: */gen.go (non-root) -M.new_path_basename = function(pattern) - return new_pattern(pattern, {pattern, "*/" .. pattern}) +-- type: (string) -> pattern +M.new_path_basename = function(globlike) + return new_pattern(globlike, {globlike, "*/" .. globlike}) end -- glob: *.md -- pathspec: *.md -M.new_path_extension = function(pattern) - return new_pattern("*." .. pattern, {"*." .. pattern}) +-- type: (string) -> pattern +M.new_path_extension = function(globlike) + return new_pattern("*." .. globlike, {"*." .. globlike}) end -M.new_path_combine = function(pattern) - return patterns.path_combine(pattern) +-- type: ((pattern | table[pattern])...) -> pattern +M.new_path_combine = function(one_or_more_patterns) + return pattern_lib.path_combine(one_or_more_patterns) end -M.new_path_exclude = function(pattern) - return patterns.path_exclude(pattern) +-- type: ((pattern | table[pattern])...) -> pattern +M.new_path_exclude = function(one_or_more_patterns) + return pattern_lib.path_exclude(one_or_more_patterns) end return M diff --git a/internal/codeintel/autoindexing/internal/inference/lua/recognizer.lua b/internal/codeintel/autoindexing/internal/inference/lua/recognizer.lua index 9e00f878d02..ce00fd5561d 100644 --- a/internal/codeintel/autoindexing/internal/inference/lua/recognizer.lua +++ b/internal/codeintel/autoindexing/internal/inference/lua/recognizer.lua @@ -30,10 +30,17 @@ local normalize = function(config) return config end +-- type: ({ +-- "patterns": array[pattern], +-- "patterns_for_content": array[pattern], +-- "generate": (registration_api, paths: array[string], contents_by_path: table[string, string]) -> void, +-- "hints": (registration_api, paths: array[string]) -> void +-- }) -> recognizer M.new_path_recognizer = function(config) return recognizers.path_recognizer(normalize(config)) end +-- type: (array[recognizer]) -> recognizer M.new_fallback_recognizer = function(config) return recognizers.fallback_recognizer(config) end diff --git a/internal/codeintel/autoindexing/internal/inference/service.go b/internal/codeintel/autoindexing/internal/inference/service.go index cccad98ea14..051ea191523 100644 --- a/internal/codeintel/autoindexing/internal/inference/service.go +++ b/internal/codeintel/autoindexing/internal/inference/service.go @@ -452,6 +452,11 @@ type registrationAPI struct { recognizers []*luatypes.Recognizer } +// Register adds another recognizer to be run at a later point. +// +// WARNING: This function is exposed directly to Lua through the 'api' parameter +// of the generate(..) function, so changing the signature may break existing +// auto-indexing scripts. func (api *registrationAPI) Register(recognizer *luatypes.Recognizer) { api.recognizers = append(api.recognizers, recognizer) } diff --git a/internal/luasandbox/libs/paths.go b/internal/luasandbox/libs/paths.go index b3ea7e2fddf..355c6c085a0 100644 --- a/internal/luasandbox/libs/paths.go +++ b/internal/luasandbox/libs/paths.go @@ -15,21 +15,22 @@ type pathAPI struct{} func (api pathAPI) LuaAPI() map[string]lua.LGFunction { return map[string]lua.LGFunction{ + // type: (string) -> array[string] "ancestors": util.WrapLuaFunction(func(state *lua.LState) error { state.Push(luar.New(state, ancestorDirs(state.CheckString(1)))) return nil }), - + // type: (string) -> string "basename": util.WrapLuaFunction(func(state *lua.LState) error { state.Push(luar.New(state, filepath.Base(state.CheckString(1)))) return nil }), - + // type: (string) -> string "dirname": util.WrapLuaFunction(func(state *lua.LState) error { state.Push(luar.New(state, dirWithoutDot(state.CheckString(1)))) return nil }), - + // type: (string, string) -> string "join": util.WrapLuaFunction(func(state *lua.LState) error { state.Push(luar.New(state, filepath.Join(state.CheckString(1), state.CheckString(2)))) return nil diff --git a/internal/luasandbox/lua/path.lua b/internal/luasandbox/lua/path.lua index eba19763504..55809da876a 100644 --- a/internal/luasandbox/lua/path.lua +++ b/internal/luasandbox/lua/path.lua @@ -2,6 +2,7 @@ local internal_path = require "internal_path" local M = {} +-- type: (string) -> array[string] M.ancestors = function(path) local ax = internal_path.ancestors(path) @@ -13,18 +14,22 @@ M.ancestors = function(path) return t end +-- type: (string) -> string M.basename = function(path) return internal_path.basename(path) end +-- type: (string) -> string M.dirname = function(path) return internal_path.dirname(path) end +-- type: (string, string) -> string M.join = function(p1, p2) return internal_path.join(p1, p2) end +-- type: string -> string, string M.split = function(path) return M.dirname(path), M.basename(path) end