Add MATLAB search-based code navigation (#56800)

* Add MATLAB tree-sitter grammar
* MATLAB scip-syntax based highlighting
* MATLAB scip-syntax based local code navigation
This commit is contained in:
Auguste Rame 2023-10-06 17:54:52 -04:00 committed by GitHub
parent db2196e503
commit 628bf10d01
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 558 additions and 55 deletions

View File

@ -1,5 +1,5 @@
{
"checksum": "edfef804723dc794cca785f7de2dc8def338c0fa9829ef5b2afab75ca692c755",
"checksum": "6b6ba3c51455d45ee7ca8cffdf32b99d0347fd1e79ac8571c5345a9a6e3d0932",
"crates": {
"addr2line 0.20.0": {
"name": "addr2line",
@ -9719,6 +9719,10 @@
"id": "tree-sitter-kotlin 0.2.11",
"target": "tree_sitter_kotlin"
},
{
"id": "tree-sitter-matlab 1.0.2",
"target": "tree_sitter_matlab"
},
{
"id": "tree-sitter-nickel 0.0.1",
"target": "tree_sitter_nickel"
@ -12926,6 +12930,74 @@
},
"license": "MIT"
},
"tree-sitter-matlab 1.0.2": {
"name": "tree-sitter-matlab",
"version": "1.0.2",
"repository": {
"Git": {
"remote": "https://github.com/acristoffers/tree-sitter-matlab",
"commitish": {
"Rev": "6071891a8c39600203eba20513666cf93b4d650a"
}
}
},
"targets": [
{
"Library": {
"crate_name": "tree_sitter_matlab",
"crate_root": "bindings/rust/lib.rs",
"srcs": [
"**/*.rs"
]
}
},
{
"BuildScript": {
"crate_name": "build_script_build",
"crate_root": "bindings/rust/build.rs",
"srcs": [
"**/*.rs"
]
}
}
],
"library_target_name": "tree_sitter_matlab",
"common_attrs": {
"compile_data_glob": [
"**"
],
"deps": {
"common": [
{
"id": "tree-sitter 0.20.10",
"target": "tree_sitter"
},
{
"id": "tree-sitter-matlab 1.0.2",
"target": "build_script_build"
}
],
"selects": {}
},
"edition": "2018",
"version": "1.0.2"
},
"build_script_attrs": {
"data_glob": [
"**"
],
"deps": {
"common": [
{
"id": "cc 1.0.83",
"target": "cc"
}
],
"selects": {}
}
},
"license": "MIT"
},
"tree-sitter-nickel 0.0.1": {
"name": "tree-sitter-nickel",
"version": "0.0.1",

View File

@ -1773,6 +1773,7 @@ dependencies = [
"tree-sitter-javascript",
"tree-sitter-jsonnet",
"tree-sitter-kotlin",
"tree-sitter-matlab",
"tree-sitter-nickel",
"tree-sitter-perl",
"tree-sitter-pod",
@ -2373,6 +2374,15 @@ dependencies = [
"tree-sitter",
]
[[package]]
name = "tree-sitter-matlab"
version = "1.0.2"
source = "git+https://github.com/acristoffers/tree-sitter-matlab?rev=6071891a8c39600203eba20513666cf93b4d650a#6071891a8c39600203eba20513666cf93b4d650a"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-nickel"
version = "0.0.1"

View File

@ -0,0 +1,26 @@
(assignment left: (identifier) @definition.var (#set! "reassignment_behavior" "oldest_is_definition"))
(global_operator (identifier) @definition.var (#set! "reassignment_behavior" "oldest_is_definition"))
(persistent_operator (identifier) @definition.var (#set! "reassignment_behavior" "oldest_is_definition"))
(function_definition) @scope
(function_definition
name: (identifier) @definition.function
)
(function_arguments
(identifier) @definition.term
)
(function_output
[
(multioutput_variable
(identifier) @definition.term
)
(identifier) @definition.term
]
)
(class_definition name: (identifier) @definition.type) @scope
(lambda) @scope
(lambda (arguments (identifier) @definition.term))
(identifier) @reference

View File

@ -284,11 +284,13 @@ mod locals {
create_locals_configuration!(go, BundledParser::Go, "go");
create_locals_configuration!(perl, BundledParser::Perl, "perl");
create_locals_configuration!(matlab, BundledParser::Matlab, "matlab");
pub fn get_local_configuration(parser: BundledParser) -> Option<&'static LocalConfiguration> {
match parser {
BundledParser::Go => Some(go()),
BundledParser::Perl => Some(perl()),
BundledParser::Matlab => Some(matlab()),
_ => None,
}
}

View File

@ -14,7 +14,7 @@ use crate::languages::LocalConfiguration;
pub struct Scope<'a> {
pub scope: Node<'a>,
pub range: PackedRange,
pub definitions: HashMap<&'a str, Definition<'a>>,
pub lvalues: HashMap<&'a str, LValue<'a>>,
pub references: HashMap<&'a str, Vec<Reference<'a>>>,
pub children: Vec<Scope<'a>>,
}
@ -44,7 +44,7 @@ impl<'a> Scope<'a> {
Self {
scope,
range: scope.into(),
definitions: HashMap::default(),
lvalues: HashMap::default(),
references: HashMap::default(),
children: vec![],
}
@ -62,27 +62,27 @@ impl<'a> Scope<'a> {
}
}
pub fn insert_definition(&mut self, definition: Definition<'a>) {
pub fn insert_lvalue(&mut self, lvalue: LValue<'a>) {
// TODO: Probably should assert that this the root node?
if definition.scope_modifier == ScopeModifier::Global {
self.definitions.insert(definition.identifier, definition);
if lvalue.scope_modifier == ScopeModifier::Global {
self.lvalues.insert(lvalue.identifier, lvalue);
return;
}
if let Some(child) = self
.children
.iter_mut()
.find(|child| child.range.contains(&definition.range))
.find(|child| child.range.contains(&lvalue.range))
{
child.insert_definition(definition)
child.insert_lvalue(lvalue)
} else {
self.definitions.insert(definition.identifier, definition);
self.lvalues.insert(lvalue.identifier, lvalue);
}
}
pub fn insert_reference(&mut self, reference: Reference<'a>) {
if let Some(definition) = self.definitions.get(&reference.identifier) {
if definition.node.id() == reference.node.id() {
if let Some(lvalue) = self.lvalues.get(&reference.identifier) {
if lvalue.node.id() == reference.node.id() {
return;
}
}
@ -91,9 +91,18 @@ impl<'a> Scope<'a> {
&(reference.range.start_line, reference.range.start_col),
|r| (r.range.start_line, r.range.start_col),
) {
Ok(_) => {
// self.children[idx].insert_reference(reference);
todo!("I'm not sure what to do yet, think more now");
Ok(idx) => {
let child = &self.children[idx];
if child.range.end_line == reference.range.end_line
&& child.range.end_col == reference.range.end_col
{
eprintln!(
"Two or more scopes with identical ranges ({:#?}) detected while performing heuristic local code navigation indexing. This is likely an issue with a tree-sitter query. This will be ignored.", reference.range
);
return;
}
self.children[idx].insert_reference(reference);
}
Err(idx) => match idx {
0 => self
@ -125,7 +134,7 @@ impl<'a> Scope<'a> {
let mut empty_children = vec![];
for (idx, child) in self.children.iter().enumerate() {
if child.definitions.is_empty() {
if child.lvalues.is_empty() {
empty_children.push(idx);
}
}
@ -146,32 +155,79 @@ impl<'a> Scope<'a> {
pub fn into_occurrences(&mut self, hint: usize) -> Vec<Occurrence> {
let mut occs = Vec::with_capacity(hint);
self.rec_into_occurrences(&mut 0, &mut occs);
let mut declarations_above = vec![];
self.rec_into_occurrences(&mut 0, &mut occs, &mut declarations_above);
occs
}
fn rec_into_occurrences(&self, id: &mut usize, occurrences: &mut Vec<Occurrence>) {
fn rec_into_occurrences(
&self,
id: &mut usize,
occurrences: &mut Vec<Occurrence>,
declarations_above: &mut Vec<HashMap<&'a str, usize>>,
) {
let mut our_declarations_above = HashMap::<&str, usize>::default();
// TODO: I'm a little sad about this.
// We could probably make this a runtime option, where `self` has a `sorted` value
// that decides whether we need to or not. But on a huge file, this made no difference.
let mut values = self.definitions.values().collect::<Vec<_>>();
let mut values = self.lvalues.values().collect::<Vec<_>>();
values.sort_by_key(|d| &d.range);
for definition in values {
for lvalue in values {
*id += 1;
let symbol = format_symbol(Symbol::new_local(*id));
let symbol_roles = scip::types::SymbolRole::Definition.value();
let symbol = match lvalue.reassignment_behavior {
ReassignmentBehavior::NewestIsDefinition => {
let symbol = format_symbol(Symbol::new_local(*id));
our_declarations_above.insert(lvalue.identifier, *id);
let symbol_roles = scip::types::SymbolRole::Definition.value();
occurrences.push(scip::types::Occurrence {
range: definition.node.to_scip_range(),
symbol: symbol.clone(),
symbol_roles,
// syntax_kind: todo!(),
..Default::default()
});
occurrences.push(scip::types::Occurrence {
range: lvalue.node.to_scip_range(),
symbol: symbol.clone(),
symbol_roles,
..Default::default()
});
if let Some(references) = self.references.get(definition.identifier) {
symbol
}
ReassignmentBehavior::OldestIsDefinition => {
if let Some(above) = declarations_above
.into_iter()
.rev()
.find(|x| x.contains_key(lvalue.identifier))
{
let symbol = format_symbol(Symbol::new_local(
*above.get(lvalue.identifier).unwrap(),
));
occurrences.push(scip::types::Occurrence {
range: lvalue.node.to_scip_range(),
symbol: symbol.clone(),
..Default::default()
});
continue;
} else {
let symbol = format_symbol(Symbol::new_local(*id));
our_declarations_above.insert(lvalue.identifier, *id);
let symbol_roles = scip::types::SymbolRole::Definition.value();
occurrences.push(scip::types::Occurrence {
range: lvalue.node.to_scip_range(),
symbol: symbol.clone(),
symbol_roles,
..Default::default()
});
symbol
}
}
};
if let Some(references) = self.references.get(lvalue.identifier) {
for reference in references {
occurrences.push(scip::types::Occurrence {
range: reference.node.to_scip_range(),
@ -183,22 +239,27 @@ impl<'a> Scope<'a> {
self.children
.iter()
.for_each(|c| c.occurrences_for_children(definition, symbol.as_str(), occurrences));
.for_each(|c| c.occurrences_for_children(lvalue, symbol.as_str(), occurrences));
}
declarations_above.push(our_declarations_above);
self.children
.iter()
.for_each(|c| c.rec_into_occurrences(id, occurrences));
.for_each(|c| c.rec_into_occurrences(id, occurrences, declarations_above));
declarations_above.pop();
}
fn occurrences_for_children(
self: &Scope<'a>,
def: &Definition<'a>,
def: &LValue<'a>,
symbol: &str,
occurrences: &mut Vec<Occurrence>,
) {
if self.definitions.contains_key(def.identifier) {
return;
if let Some(def) = self.lvalues.get(def.identifier) {
match def.reassignment_behavior {
ReassignmentBehavior::NewestIsDefinition => return,
ReassignmentBehavior::OldestIsDefinition => {}
}
}
if let Some(references) = self.references.get(def.identifier) {
@ -217,12 +278,8 @@ impl<'a> Scope<'a> {
}
#[allow(dead_code)]
fn find_scopes_with(
&'a self,
scopes: &mut Vec<&Scope<'a>>,
// predicate: impl Fn(&Scope<'a>) -> bool,
) {
if self.definitions.is_empty() {
fn find_scopes_with(&'a self, scopes: &mut Vec<&Scope<'a>>) {
if self.lvalues.is_empty() {
scopes.push(self);
}
@ -251,13 +308,31 @@ pub enum ScopeModifier {
Global,
}
/// Define how strong a definition is, useful for languages that use
/// the same syntax for defining a variable and setting it, like Python.
#[derive(Debug, Default, PartialEq, Eq)]
pub enum ReassignmentBehavior {
/// a = 10
/// ^ local 1
/// a = 10
/// ^ local 2
#[default]
NewestIsDefinition,
/// a = 10
/// ^ local 1
/// a = 10
/// ^ local 1 (reference)
OldestIsDefinition,
}
#[derive(Debug)]
pub struct Definition<'a> {
pub struct LValue<'a> {
pub group: &'a str,
pub identifier: &'a str,
pub node: Node<'a>,
pub range: PackedRange,
pub scope_modifier: ScopeModifier,
pub reassignment_behavior: ReassignmentBehavior,
}
#[derive(Debug)]
@ -279,16 +354,17 @@ pub fn parse_tree<'a>(
let capture_names = config.query.capture_names();
let mut scopes = vec![];
let mut definitions = vec![];
let mut lvalues = vec![];
let mut references = vec![];
for m in cursor.matches(&config.query, root_node, source_bytes) {
let mut node = None;
let mut scope = None;
let mut definition = None;
let mut lvalue = None;
let mut reference = None;
let mut scope_modifier = None;
let mut reassignment_behavior = None;
for capture in m.captures {
let capture_name = match capture_names.get(capture.index as usize) {
@ -298,11 +374,15 @@ pub fn parse_tree<'a>(
node = Some(capture.node);
// TODO: Change all captures to lvalue in later PR
// I don't want to do this now as @definition.[...]
// is the standard capture we use throughout our codebase
// beyond just locals, so I want to keep things consistent
if capture_name.starts_with("definition") {
assert!(definition.is_none(), "only one definition per match");
definition = Some(capture_name);
assert!(lvalue.is_none(), "only one definition per match");
lvalue = Some(capture_name);
// Handle scope modifiers
// Handle modifiers
let properties = config.query.property_settings(m.pattern_index);
for prop in properties {
if &(*prop.key) == "scope" {
@ -310,9 +390,19 @@ pub fn parse_tree<'a>(
Some("global") => scope_modifier = Some(ScopeModifier::Global),
Some("parent") => scope_modifier = Some(ScopeModifier::Parent),
Some("local") => scope_modifier = Some(ScopeModifier::Local),
// TODO: Should probably error instead
Some(other) => panic!("unknown scope-testing: {}", other),
None => {}
Some(_) | None => unreachable!(),
}
} else if &(*prop.key) == "reassignment_behavior" {
match prop.value.as_deref() {
Some("newest_is_definition") => {
reassignment_behavior =
Some(ReassignmentBehavior::NewestIsDefinition)
}
Some("oldest_is_definition") => {
reassignment_behavior =
Some(ReassignmentBehavior::OldestIsDefinition)
}
Some(_) | None => unreachable!(),
}
}
}
@ -334,19 +424,22 @@ pub fn parse_tree<'a>(
None => continue,
};
if let Some(group) = definition {
if let Some(group) = lvalue {
let identifier = match node.utf8_text(source_bytes) {
Ok(identifier) => identifier,
Err(_) => continue,
};
let scope_modifier = scope_modifier.unwrap_or_default();
definitions.push(Definition {
let reassignment_behavior = reassignment_behavior.unwrap_or_default();
lvalues.push(LValue {
range: node.into(),
group,
identifier,
node,
scope_modifier,
reassignment_behavior,
});
} else if let Some(group) = reference {
let identifier = match node.utf8_text(source_bytes) {
@ -381,15 +474,15 @@ pub fn parse_tree<'a>(
)
});
let capacity = definitions.len() + references.len();
let capacity = lvalues.len() + references.len();
// Add all the scopes to our tree
while let Some(m) = scopes.pop() {
root.insert_scope(m);
}
while let Some(m) = definitions.pop() {
root.insert_definition(m);
while let Some(m) = lvalues.pop() {
root.insert_lvalue(m);
}
root.clean_empty_scopes();
@ -492,4 +585,16 @@ mod test {
Ok(())
}
#[test]
fn test_can_do_matlab() -> Result<()> {
let mut config = crate::languages::get_local_configuration(BundledParser::Matlab).unwrap();
let source_code = include_str!("../testdata/locals.m");
let doc = parse_file_for_lang(&mut config, source_code)?;
let dumped = snapshot_syntax_document(&doc, source_code);
insta::assert_snapshot!(dumped);
Ok(())
}
}

View File

@ -0,0 +1,141 @@
---
source: crates/scip-syntax/src/locals.rs
expression: dumped
---
a = 1;
//^ definition local 1
function myFunc()
// ^^^^^^ definition local 10
e = 5;
// ^ definition local 11
f = 6;
// ^ definition local 12
end
function myNestedFunc()
// ^^^^^^^^^^^^ definition local 13
g = 7;
// ^ definition local 14
function nestedChildFunc()
// ^^^^^^^^^^^^^^^ definition local 15
h = 8;
// ^ definition local 16
end
end
global i j
// ^ definition local 2
// ^ definition local 3
i = 9;
//^ reference local 2
j = 10;
//^ reference local 3
function myPersistFunc()
// ^^^^^^^^^^^^^ definition local 17
persistent k
// ^ definition local 18
if isempty(k)
// ^ reference local 18
k = 11;
// ^ reference local 18
end
end
function myScopeFunc()
// ^^^^^^^^^^^ definition local 19
m = 12;
// ^ definition local 20
n = 13;
// ^ definition local 21
global n
// ^ reference local 21
o = 14;
// ^ definition local 22
persistent o
// ^ reference local 22
end
function [a, b] = myFunction()
// ^ definition local 23
// ^ definition local 24
// ^^^^^^^^^^ definition local 25
a = 1;
// ^ reference local 23
b = [2, 3];
// ^ reference local 24
end
classdef MyClass
// ^^^^^^^ definition local 4
properties
Prop1
end
methods
function obj = MyClass(prop1)
// ^^^ definition local 26
// ^^^^^^^ definition local 27
// ^^^^^ definition local 28
obj.Prop1 = prop1;
// ^^^ reference local 26
// ^^^^^ reference local 28
end
function result = method1(obj)
// ^^^^^^ definition local 29
// ^^^^^^^ definition local 30
// ^^^ definition local 31
result = obj.Prop1;
// ^^^^^^ reference local 29
// ^^^ reference local 31
end
end
end
myObject = MyClass(5);
//^^^^^^^^ definition local 5
// ^^^^^^^ reference local 4
result = myObject.method1();
//^^^^^^ definition local 6
// ^^^^^^^^ reference local 5
addTwoNumbers = @(x, y) x + y;
//^^^^^^^^^^^^^ definition local 7
// ^ definition local 32
// ^ definition local 33
// ^ reference local 32
// ^ reference local 33
% TODO handle clear
% See https://github.com/sourcegraph/sourcegraph/issues/57399
slay = 12 % definition is here
//^^^^ definition local 8
clear slay
slay = 10 % and now it's here!
//^^^^ reference local 8
pog1 = 10
//^^^^ definition local 9
pog1 = 20
//^^^^ reference local 9
function f()
// ^ definition local 34
if false
pog2 = 1;
// ^^^^ definition local 35
else
% TODO: this should also be marked as a definition
pog2 = 2;
// ^^^^ reference local 35
end
disp(pog2);
// ^^^^ reference local 35
end

View File

@ -0,0 +1,81 @@
a = 1;
function myFunc()
e = 5;
f = 6;
end
function myNestedFunc()
g = 7;
function nestedChildFunc()
h = 8;
end
end
global i j
i = 9;
j = 10;
function myPersistFunc()
persistent k
if isempty(k)
k = 11;
end
end
function myScopeFunc()
m = 12;
n = 13;
global n
o = 14;
persistent o
end
function [a, b] = myFunction()
a = 1;
b = [2, 3];
end
classdef MyClass
properties
Prop1
end
methods
function obj = MyClass(prop1)
obj.Prop1 = prop1;
end
function result = method1(obj)
result = obj.Prop1;
end
end
end
myObject = MyClass(5);
result = myObject.method1();
addTwoNumbers = @(x, y) x + y;
% TODO handle clear
% See https://github.com/sourcegraph/sourcegraph/issues/57399
slay = 12 % definition is here
clear slay
slay = 10 % and now it's here!
pog1 = 10
pog1 = 20
function f()
if false
pog2 = 1;
else
% TODO: this should also be marked as a definition
pog2 = 2;
end
disp(pog2);
end

View File

@ -29,6 +29,7 @@ tree-sitter-typescript = "0.20.2"
tree-sitter-jsonnet = { git = "https://github.com/sourcegraph/tree-sitter-jsonnet", rev = "009e6f06266f46ae07077dd6c8026ded56ab7dd8" }
tree-sitter-kotlin = { git = "https://github.com/fwcd/tree-sitter-kotlin", rev = "100d79fd96b56a1b99099a8d2f3c114b8687acfb" }
tree-sitter-matlab = { git = "https://github.com/acristoffers/tree-sitter-matlab", rev = "6071891a8c39600203eba20513666cf93b4d650a" }
tree-sitter-nickel = { git = "https://github.com/nickel-lang/tree-sitter-nickel", rev = "d6c7eeb751038f934b5b1aa7ff236376d0235c56" }
tree-sitter-perl = { git = "https://github.com/sourcegraph/tree-sitter-perl", rev = "e1b4844afd17b7dc019a436b1ac890568d79a1f2" }
tree-sitter-pod = { git = "https://github.com/sourcegraph/tree-sitter-pod", rev = "f422a0dca6847c692e811f06fd92c6a75d647222" }

View File

@ -0,0 +1,57 @@
(comment) @comment
(identifier) @identifier
"function" @keyword.function
(return_statement) @keyword.return
(string) @string
(escape_sequence) @string.escape
[
(global_operator)
(persistent_operator)
"+"
".+"
"-"
".-"
"*"
".*"
"/"
"./"
"\\"
".\\"
"^"
".^"
"|"
"&"
"&&"
"||"
"<"
"<="
"=="
"~="
">="
">"
"@"
"?"
"~"
] @operator
[
"true"
"false"
] @boolean
(number) @number
[
"end"
"if"
"else"
"for"
"case"
"switch"
"otherwise"
(continue_statement)
(break_statement)
] @keyword

View File

@ -151,6 +151,7 @@ lazy_static::lazy_static! {
Javascript,
Jsonnet,
Kotlin,
Matlab,
Nickel,
Perl,
Pod,

View File

@ -11,6 +11,7 @@ pub enum BundledParser {
Javascript,
Jsonnet,
Kotlin,
Matlab,
Nickel,
Perl,
Pod,
@ -38,6 +39,7 @@ impl BundledParser {
BundledParser::Javascript => tree_sitter_javascript::language(),
BundledParser::Jsonnet => tree_sitter_jsonnet::language(),
BundledParser::Kotlin => tree_sitter_kotlin::language(),
BundledParser::Matlab => tree_sitter_matlab::language(),
BundledParser::Nickel => tree_sitter_nickel::language(),
BundledParser::Perl => tree_sitter_perl::language(),
BundledParser::Pod => tree_sitter_pod::language(),
@ -63,6 +65,7 @@ impl BundledParser {
"javascript" => Some(BundledParser::Javascript),
"jsonnet" => Some(BundledParser::Jsonnet),
"kotlin" => Some(BundledParser::Kotlin),
"matlab" => Some(BundledParser::Matlab),
"nickel" => Some(BundledParser::Nickel),
"perl" => Some(BundledParser::Perl),
"pod" => Some(BundledParser::Pod),
@ -89,6 +92,7 @@ impl BundledParser {
BundledParser::Javascript => "javascript",
BundledParser::Jsonnet => "jsonnet",
BundledParser::Kotlin => "kotlin",
BundledParser::Matlab => "matlab",
BundledParser::Nickel => "nickel",
BundledParser::Perl => "perl",
BundledParser::Pod => "pod",
@ -114,6 +118,7 @@ impl BundledParser {
"java" => Some(BundledParser::Java),
"js" => Some(BundledParser::Javascript),
"jsonnet" => Some(BundledParser::Jsonnet),
"m" => Some(BundledParser::Matlab),
"kt" => Some(BundledParser::Kotlin),
"ncl" => Some(BundledParser::Nickel),
"pl" => Some(BundledParser::Perl),

View File

@ -11,6 +11,7 @@ var treesitterSupportedFiletypes = map[string]struct{}{
"jsonnet": {},
"jsx": {},
"kotlin": {},
"matlab": {},
"nickel": {},
"perl": {},
"python": {},

View File

@ -133,7 +133,8 @@ var baseEngineConfig = syntaxEngineConfig{
Default: EngineTreeSitter,
Overrides: map[string]EngineType{
// Languages enabled for advanced syntax features
"perl": EngineScipSyntax,
"perl": EngineScipSyntax,
"matlab": EngineScipSyntax,
},
}