mirror of
https://github.com/sourcegraph/sourcegraph.git
synced 2026-02-06 13:11:49 +00:00
Syntactic Indexing: add TAR archive indexing mode to scip-syntax CLI (#63097)
Fixes GRAPH-651 Fixes GRAPH-650 New features: - `index tar <input>` allows indexing tar archives when `input` is a file, and `stdin` when `input` is literal string `-` - **BREAKING: Instead of flags controlling the type of input source, we now have subcommands: `index files`, `index workspace`, `index tar`** Refactoring: - Tests were improved, with helper functions broken down into composable pieces, and producing 1 snapshot per test, making it easier to manage - Closures were removed from the indexing code, instead replaced with functions. - Calls to `.unwrap` were replaced with better error handling code - Path canonicalisation was replaced with path cleanup + absolutisation - to avoid following symlinks (which is what `canonicalize` would do) Most of the refactoring was triggered by the changes required to add more tests. ## Test plan - New integration tests --------- Co-authored-by: Christoph Hegemann <christoph.hegemann@sourcegraph.com>
This commit is contained in:
parent
c157fa82ff
commit
dfa60d6c9b
1130
docker-images/syntax-highlighter/Cargo.Bazel.lock
generated
1130
docker-images/syntax-highlighter/Cargo.Bazel.lock
generated
File diff suppressed because it is too large
Load Diff
169
docker-images/syntax-highlighter/Cargo.lock
generated
169
docker-images/syntax-highlighter/Cargo.lock
generated
@ -625,23 +625,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.2"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f"
|
||||
checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
|
||||
dependencies = [
|
||||
"errno-dragonfly",
|
||||
"libc",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "errno-dragonfly"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -656,9 +645,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.0.0"
|
||||
version = "2.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764"
|
||||
checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a"
|
||||
|
||||
[[package]]
|
||||
name = "fd-lock"
|
||||
@ -685,6 +674,18 @@ dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall 0.4.1",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.27"
|
||||
@ -1068,9 +1069,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
version = "0.2.155"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||
|
||||
[[package]]
|
||||
name = "line-wrap"
|
||||
@ -1089,9 +1090,9 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.5"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
|
||||
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
@ -1350,6 +1351,12 @@ version = "1.0.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c"
|
||||
|
||||
[[package]]
|
||||
name = "path-clean"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef"
|
||||
|
||||
[[package]]
|
||||
name = "pear"
|
||||
version = "0.2.7"
|
||||
@ -1644,6 +1651,15 @@ dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.4.3"
|
||||
@ -1815,15 +1831,15 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.8"
|
||||
version = "0.38.34"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f"
|
||||
checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
|
||||
dependencies = [
|
||||
"bitflags 2.4.0",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -1898,6 +1914,7 @@ dependencies = [
|
||||
"insta",
|
||||
"lazy_static",
|
||||
"paste",
|
||||
"path-clean",
|
||||
"predicates",
|
||||
"protobuf",
|
||||
"scip",
|
||||
@ -1905,6 +1922,8 @@ dependencies = [
|
||||
"serde_json",
|
||||
"string-interner",
|
||||
"syntax-analysis",
|
||||
"tar",
|
||||
"tempfile",
|
||||
"tree-sitter-all-languages",
|
||||
"walkdir",
|
||||
]
|
||||
@ -2161,16 +2180,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369"
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.8.0"
|
||||
name = "tar"
|
||||
version = "0.4.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb94d2f3cc536af71caac6b6fcebf65860b347e7ce0cc9ebe8f70d3e521054ef"
|
||||
checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb"
|
||||
dependencies = [
|
||||
"filetime",
|
||||
"libc",
|
||||
"xattr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempfile"
|
||||
version = "3.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"fastrand",
|
||||
"redox_syscall 0.3.5",
|
||||
"rustix",
|
||||
"windows-sys 0.48.0",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2892,6 +2921,15 @@ dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.42.2"
|
||||
@ -2922,6 +2960,22 @@ dependencies = [
|
||||
"windows_x86_64_msvc 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm 0.52.5",
|
||||
"windows_aarch64_msvc 0.52.5",
|
||||
"windows_i686_gnu 0.52.5",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc 0.52.5",
|
||||
"windows_x86_64_gnu 0.52.5",
|
||||
"windows_x86_64_gnullvm 0.52.5",
|
||||
"windows_x86_64_msvc 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.42.2"
|
||||
@ -2934,6 +2988,12 @@ version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.42.2"
|
||||
@ -2946,6 +3006,12 @@ version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.42.2"
|
||||
@ -2958,6 +3024,18 @@ version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.42.2"
|
||||
@ -2970,6 +3048,12 @@ version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.42.2"
|
||||
@ -2982,6 +3066,12 @@ version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.42.2"
|
||||
@ -2994,6 +3084,12 @@ version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.42.2"
|
||||
@ -3006,6 +3102,12 @@ version = "0.48.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.5.14"
|
||||
@ -3024,6 +3126,17 @@ dependencies = [
|
||||
"tap",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yaml-rust"
|
||||
version = "0.4.5"
|
||||
|
||||
@ -59,6 +59,7 @@ syntect = { git = "https://github.com/sourcegraph/syntect", rev = "7e02c5b4085e6
|
||||
tree-sitter = "0.20.9"
|
||||
tree-sitter-highlight = "0.20.1"
|
||||
walkdir = "2"
|
||||
path-clean = "1"
|
||||
|
||||
scip = "0.3.2"
|
||||
protobuf = "3"
|
||||
|
||||
@ -94,6 +94,7 @@ rust_test(
|
||||
tags = [TAG_PLATFORM_GRAPH],
|
||||
deps = all_crate_deps(
|
||||
normal = True,
|
||||
normal_dev = True,
|
||||
) + [
|
||||
":scip-syntax",
|
||||
":scip_syntax_lib",
|
||||
|
||||
@ -23,6 +23,11 @@ serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
string-interner = { workspace = true }
|
||||
walkdir = { workspace = true }
|
||||
path-clean = { workspace = true }
|
||||
|
||||
syntax-analysis = { path = "../syntax-analysis" }
|
||||
tree-sitter-all-languages = { path = "../tree-sitter-all-languages" }
|
||||
tar = "0.4.40"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile="3.10.1"
|
||||
|
||||
@ -1,7 +1,13 @@
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::{
|
||||
env,
|
||||
fs::File,
|
||||
io::{self, prelude::*},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use clap::ValueEnum;
|
||||
use path_clean;
|
||||
use scip::{types::Document, write_message_to_file};
|
||||
use syntax_analysis::{get_globals, get_locals};
|
||||
use tree_sitter_all_languages::ParserId;
|
||||
@ -20,7 +26,7 @@ pub struct IndexOptions {
|
||||
pub fail_fast: bool,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum)]
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ValueEnum, Debug)]
|
||||
pub enum AnalysisMode {
|
||||
/// Only extract occurrences of local definitions
|
||||
Locals,
|
||||
@ -39,12 +45,32 @@ impl AnalysisMode {
|
||||
}
|
||||
}
|
||||
|
||||
pub enum TarMode {
|
||||
/// Data is streamed from STDIN
|
||||
Stdin,
|
||||
|
||||
/// Data is read from a .tar file
|
||||
File { location: PathBuf },
|
||||
}
|
||||
|
||||
pub enum IndexMode {
|
||||
/// Index only this list of files, without checking file extensions
|
||||
Files { list: Vec<String> },
|
||||
/// Discover all files that can be handled by the chosen language
|
||||
/// in the passed location (which has to be a directory)
|
||||
Workspace { location: PathBuf },
|
||||
|
||||
/// Discover all files that can be handled by the chosen language
|
||||
/// in either a .tar file, or from STDIN to which TAR data is streamed
|
||||
TarArchive { input: TarMode },
|
||||
}
|
||||
|
||||
fn make_absolute(cwd: &Path, path: &Path) -> PathBuf {
|
||||
if path.is_absolute() {
|
||||
path.to_owned()
|
||||
} else {
|
||||
path_clean::clean(cwd.join(path))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_command(
|
||||
@ -55,20 +81,17 @@ pub fn index_command(
|
||||
evaluate_against: Option<PathBuf>,
|
||||
options: IndexOptions,
|
||||
) -> Result<()> {
|
||||
let p = ParserId::from_name(&language).unwrap();
|
||||
let project_root = {
|
||||
match index_mode {
|
||||
IndexMode::Files { .. } => project_root,
|
||||
IndexMode::Workspace { ref location } => location.clone(),
|
||||
}
|
||||
};
|
||||
let parser_id = ParserId::from_name(&language)
|
||||
.context(format!("No parser found for language {language}"))?;
|
||||
|
||||
let canonical_project_root = project_root.canonicalize().with_context(|| {
|
||||
format!(
|
||||
"Failed to canonicalize project root: {}",
|
||||
project_root.display()
|
||||
)
|
||||
})?;
|
||||
let cwd = env::current_dir().context("Failed to get the current working directory")?;
|
||||
let absolute_project_root = make_absolute(
|
||||
&cwd,
|
||||
match &index_mode {
|
||||
IndexMode::Workspace { location } => location,
|
||||
_ => &project_root,
|
||||
},
|
||||
);
|
||||
|
||||
let mut index = scip::types::Index {
|
||||
metadata: Some(scip::types::Metadata {
|
||||
@ -79,68 +102,44 @@ pub fn index_command(
|
||||
..Default::default()
|
||||
})
|
||||
.into(),
|
||||
project_root: format!("file://{}", canonical_project_root.display()),
|
||||
project_root: format!("file://{}", absolute_project_root.display()),
|
||||
..Default::default()
|
||||
})
|
||||
.into(),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let mut index_file = |filepath: &Path| -> Result<()> {
|
||||
let contents = std::fs::read_to_string(filepath)
|
||||
.with_context(|| format!("Failed to read file at {}", filepath.display()))?;
|
||||
let filepath = if filepath.is_absolute() {
|
||||
filepath.to_owned()
|
||||
} else {
|
||||
filepath.canonicalize().with_context(|| {
|
||||
format!("Failed to canonicalize file path: {}", filepath.display())
|
||||
})?
|
||||
};
|
||||
let relative_path = filepath
|
||||
.strip_prefix(canonical_project_root.clone())
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to strip project root prefix: root={} file={}",
|
||||
canonical_project_root.display(),
|
||||
filepath.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
match index_content(&contents, p, &options) {
|
||||
Ok(mut document) => {
|
||||
document.relative_path = relative_path.display().to_string();
|
||||
index.documents.push(document);
|
||||
Ok(())
|
||||
}
|
||||
Err(error) => {
|
||||
if options.fail_fast {
|
||||
Err(anyhow!(
|
||||
"Failed to index {}: {:?}",
|
||||
filepath.display(),
|
||||
error
|
||||
))
|
||||
} else {
|
||||
eprintln!("Failed to index {}: {:?}", filepath.display(), error);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let extensions = ParserId::language_extensions(&parser_id);
|
||||
|
||||
match index_mode {
|
||||
IndexMode::Files { list } => {
|
||||
let bar = create_progress_bar(list.len() as u64);
|
||||
for filename in list {
|
||||
let filepath = PathBuf::from(filename).canonicalize().unwrap();
|
||||
bar.set_message(filepath.display().to_string());
|
||||
index_file(&filepath)?;
|
||||
bar.set_message(filename.clone());
|
||||
let filepath = make_absolute(&cwd, &PathBuf::from(filename));
|
||||
let document = index_file(&filepath, parser_id, &absolute_project_root, &options)?;
|
||||
index.documents.push(document);
|
||||
bar.inc(1);
|
||||
}
|
||||
|
||||
bar.finish();
|
||||
}
|
||||
IndexMode::TarArchive { input } => match input {
|
||||
TarMode::File { location } => {
|
||||
let mut ar = tar::Archive::new(File::open(location)?);
|
||||
let entries = ar.entries()?;
|
||||
let documents = index_tar_entries(entries, parser_id, &options)?;
|
||||
index.documents.extend(documents);
|
||||
}
|
||||
TarMode::Stdin => {
|
||||
let stdin = io::stdin();
|
||||
let mut ar: tar::Archive<_> = tar::Archive::new(stdin);
|
||||
let entries = ar.entries()?;
|
||||
let documents = index_tar_entries(entries, parser_id, &options)?;
|
||||
index.documents.extend(documents);
|
||||
}
|
||||
},
|
||||
IndexMode::Workspace { location } => {
|
||||
let extensions = ParserId::language_extensions(&p);
|
||||
let bar = create_spinner();
|
||||
|
||||
for entry in walkdir::WalkDir::new(location) {
|
||||
@ -153,17 +152,21 @@ pub fn index_command(
|
||||
};
|
||||
if extensions.contains(extension) {
|
||||
bar.set_message(entry.path().display().to_string());
|
||||
index_file(&entry.into_path())?;
|
||||
let document = index_file(
|
||||
&entry.into_path(),
|
||||
parser_id,
|
||||
&absolute_project_root,
|
||||
&options,
|
||||
)?;
|
||||
index.documents.push(document);
|
||||
bar.tick();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
|
||||
eprintln!(
|
||||
"Writing index for {} documents into {}",
|
||||
"\nWriting index for {} documents into {}",
|
||||
index.documents.len(),
|
||||
out.display()
|
||||
);
|
||||
@ -179,11 +182,102 @@ pub fn index_command(
|
||||
.write_summary(&mut std::io::stdout(), Default::default())?
|
||||
}
|
||||
|
||||
write_message_to_file(out.clone(), index)
|
||||
write_message_to_file(&out, index)
|
||||
.map_err(|err| anyhow!("{err:?}"))
|
||||
.with_context(|| format!("When writing index to {}", out.display()))
|
||||
}
|
||||
|
||||
fn index_file(
|
||||
filepath: &Path,
|
||||
parser_id: ParserId,
|
||||
absolute_project_root: &Path,
|
||||
options: &IndexOptions,
|
||||
) -> Result<Document> {
|
||||
let contents = std::fs::read_to_string(filepath)
|
||||
.with_context(|| format!("Failed to read file at {}", filepath.display()))?;
|
||||
|
||||
let relative_path = filepath
|
||||
.strip_prefix(absolute_project_root)
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"Failed to strip project root prefix: root={} file={}",
|
||||
absolute_project_root.display(),
|
||||
filepath.display()
|
||||
)
|
||||
})?;
|
||||
|
||||
match index_content(&contents, parser_id, options) {
|
||||
Ok(mut document) => {
|
||||
document.relative_path = relative_path.display().to_string();
|
||||
Ok(document)
|
||||
}
|
||||
Err(error) => {
|
||||
bail!("Failed to index {}: {:?}", filepath.display(), error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn index_tar_entries<R: Read>(
|
||||
entries: tar::Entries<'_, R>,
|
||||
parser: ParserId,
|
||||
options: &IndexOptions,
|
||||
) -> anyhow::Result<Vec<Document>> {
|
||||
let extensions = ParserId::language_extensions(&parser);
|
||||
let mut contents = String::new();
|
||||
let mut documents: Vec<Document> = vec![];
|
||||
let mut progress = 0;
|
||||
let spinner = create_spinner();
|
||||
for entry in entries {
|
||||
let mut e = entry?;
|
||||
let path = PathBuf::from(e.path()?);
|
||||
|
||||
if matches!(path.extension().and_then(|e| e.to_str()), Some(ext) if extensions.contains(ext))
|
||||
{
|
||||
match e.read_to_string(&mut contents) {
|
||||
Ok(size) => {
|
||||
match index_content(&contents, parser, options) {
|
||||
Ok(mut document) => {
|
||||
document.relative_path = path.display().to_string();
|
||||
documents.push(document);
|
||||
}
|
||||
Err(error) => {
|
||||
if options.fail_fast {
|
||||
anyhow::bail!("Failed to index {}: {:?}", path.display(), error);
|
||||
} else {
|
||||
eprintln!("Failed to index {}: {:?}", path.display(), error);
|
||||
}
|
||||
}
|
||||
}
|
||||
if size > 0 {
|
||||
contents.clear();
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
if options.fail_fast {
|
||||
anyhow::bail!(
|
||||
"Failed to read contents of path {}: {:?}",
|
||||
path.display(),
|
||||
error
|
||||
)
|
||||
} else {
|
||||
eprintln!(
|
||||
"Failed to read contents of path {}: {:?}",
|
||||
path.display(),
|
||||
error
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
progress += 1;
|
||||
spinner.set_message(format!("[{}]: {}", progress, path.display()));
|
||||
spinner.tick();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(documents)
|
||||
}
|
||||
|
||||
fn index_content(contents: &str, parser: ParserId, options: &IndexOptions) -> Result<Document> {
|
||||
let mut document: Document;
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
use std::{path::PathBuf, process};
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use scip_syntax::index::{index_command, AnalysisMode, IndexMode, IndexOptions};
|
||||
use scip_syntax::index::{index_command, AnalysisMode, IndexMode, IndexOptions, TarMode};
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(author, version, about, long_about = None)]
|
||||
@ -11,44 +11,78 @@ struct Cli {
|
||||
command: Commands,
|
||||
}
|
||||
|
||||
#[derive(Parser, Clone, Debug)]
|
||||
struct IndexCommandOptions {
|
||||
/// Which language parser to use to process the files
|
||||
#[arg(short, long)]
|
||||
language: String,
|
||||
|
||||
/// Path where the SCIP index will be written
|
||||
#[arg(short, long, default_value = "./index.scip")]
|
||||
out: String,
|
||||
|
||||
/// Analysis mode
|
||||
#[arg(short, long, default_value = "full")]
|
||||
mode: AnalysisMode,
|
||||
|
||||
/// Fail on first error
|
||||
#[arg(long, default_value_t = false)]
|
||||
fail_fast: bool,
|
||||
|
||||
/// Project root to write to SCIP index
|
||||
#[arg(short, long, default_value = "./")]
|
||||
project_root: String,
|
||||
|
||||
/// Evaluate the build index against an index from a file
|
||||
#[arg(long)]
|
||||
evaluate: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum IndexCommand {
|
||||
/// Index a folder, automatically detecting files
|
||||
/// to be processed by the chosen language
|
||||
Workspace {
|
||||
/// Folder to index - will be chosen as project root,
|
||||
/// and files will be discovered according to
|
||||
/// configured extensions for the selected language
|
||||
/// Has to be absolute path.
|
||||
dir: String,
|
||||
|
||||
#[command(flatten)]
|
||||
options: IndexCommandOptions,
|
||||
},
|
||||
|
||||
/// Index a list of files
|
||||
Files {
|
||||
/// List of files to analyse
|
||||
filenames: Vec<String>,
|
||||
|
||||
#[command(flatten)]
|
||||
options: IndexCommandOptions,
|
||||
},
|
||||
|
||||
/// Index a .tar archive, either from a file or streaming from STDIN
|
||||
Tar {
|
||||
/// Either a path to .tar file, or "-" to read .tar data from STDIN
|
||||
tar: String,
|
||||
|
||||
#[command(flatten)]
|
||||
options: IndexCommandOptions,
|
||||
},
|
||||
}
|
||||
#[derive(Parser, Debug)]
|
||||
struct IndexCommandParser {
|
||||
#[structopt(subcommand)]
|
||||
index_command: IndexCommand,
|
||||
}
|
||||
|
||||
#[derive(Subcommand)]
|
||||
enum Commands {
|
||||
/// Index source files using Tree Sitter parser for a given language
|
||||
/// and produce a SCIP file
|
||||
Index {
|
||||
/// Which language parser to use to process the files
|
||||
#[arg(short, long)]
|
||||
language: String,
|
||||
|
||||
/// Path where the SCIP index will be written
|
||||
#[arg(short, long, default_value = "./index.scip")]
|
||||
out: String,
|
||||
|
||||
/// Folder to index - will be chosen as project root,
|
||||
/// and files will be discovered according to
|
||||
/// configured extensions for the selected language
|
||||
#[arg(long)]
|
||||
workspace: Option<String>,
|
||||
|
||||
/// List of files to analyse
|
||||
filenames: Vec<String>,
|
||||
|
||||
/// Analysis mode
|
||||
#[arg(short, long, default_value = "full")]
|
||||
mode: AnalysisMode,
|
||||
|
||||
/// Fail on first error
|
||||
#[arg(long, default_value_t = false)]
|
||||
fail_fast: bool,
|
||||
|
||||
/// Project root to write to SCIP index
|
||||
#[arg(short, long, default_value = "./")]
|
||||
project_root: String,
|
||||
|
||||
/// Evaluate the build index against an index from a file
|
||||
#[arg(long)]
|
||||
evaluate: Option<String>,
|
||||
},
|
||||
#[clap(name = "index")]
|
||||
Index(IndexCommandParser),
|
||||
|
||||
/// Fuzzily evaluate candidate SCIP index against known ground truth
|
||||
ScipEvaluate {
|
||||
@ -99,49 +133,44 @@ pub fn main() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
|
||||
match cli.command {
|
||||
Commands::Index {
|
||||
language,
|
||||
out,
|
||||
filenames,
|
||||
workspace,
|
||||
mode,
|
||||
fail_fast,
|
||||
project_root,
|
||||
evaluate,
|
||||
} => {
|
||||
let index_mode = {
|
||||
match workspace {
|
||||
None => {
|
||||
if filenames.is_empty() {
|
||||
eprintln!("either specify --workspace or provide a list of files");
|
||||
process::exit(1)
|
||||
}
|
||||
IndexMode::Files { list: filenames }
|
||||
Commands::Index(index1) => {
|
||||
let result = match index1.index_command {
|
||||
IndexCommand::Files { filenames, options } => {
|
||||
if filenames.is_empty() {
|
||||
eprintln!("List of files cannot be empty");
|
||||
process::exit(1)
|
||||
}
|
||||
Some(location) => {
|
||||
if !filenames.is_empty() {
|
||||
eprintln!("--workspace option cannot be combined with a list of files");
|
||||
process::exit(1)
|
||||
} else {
|
||||
IndexMode::Workspace {
|
||||
location: location.into(),
|
||||
}
|
||||
}
|
||||
run_index_command(options, IndexMode::Files { list: filenames })
|
||||
}
|
||||
IndexCommand::Workspace { dir, options } => run_index_command(
|
||||
options,
|
||||
IndexMode::Workspace {
|
||||
location: dir.into(),
|
||||
},
|
||||
),
|
||||
|
||||
IndexCommand::Tar { tar, options } => {
|
||||
if tar == "-" {
|
||||
run_index_command(
|
||||
options,
|
||||
IndexMode::TarArchive {
|
||||
input: scip_syntax::index::TarMode::Stdin,
|
||||
},
|
||||
)
|
||||
} else {
|
||||
run_index_command(
|
||||
options,
|
||||
IndexMode::TarArchive {
|
||||
input: TarMode::File {
|
||||
location: PathBuf::from(tar),
|
||||
},
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
index_command(
|
||||
language,
|
||||
index_mode,
|
||||
PathBuf::from(out),
|
||||
PathBuf::from(project_root),
|
||||
evaluate.map(PathBuf::from),
|
||||
IndexOptions {
|
||||
analysis_mode: mode,
|
||||
fail_fast,
|
||||
},
|
||||
)?
|
||||
result.unwrap()
|
||||
}
|
||||
|
||||
Commands::ScipEvaluate {
|
||||
@ -166,3 +195,17 @@ pub fn main() -> anyhow::Result<()> {
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_index_command(options: IndexCommandOptions, mode: IndexMode) -> anyhow::Result<()> {
|
||||
index_command(
|
||||
options.language,
|
||||
mode,
|
||||
PathBuf::from(options.out),
|
||||
PathBuf::from(options.project_root),
|
||||
options.evaluate.map(PathBuf::from),
|
||||
IndexOptions {
|
||||
analysis_mode: options.mode,
|
||||
fail_fast: options.fail_fast,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
4
docker-images/syntax-highlighter/crates/scip-syntax/testdata/package-info.java
vendored
Normal file
4
docker-images/syntax-highlighter/crates/scip-syntax/testdata/package-info.java
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
@Deprecated
|
||||
package foo.bar;
|
||||
|
||||
class Baz {}
|
||||
@ -1,11 +1,13 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
env::temp_dir,
|
||||
collections::{HashMap, HashSet},
|
||||
io::Write,
|
||||
path::{Path, PathBuf},
|
||||
process::Command,
|
||||
process::{Command, Stdio},
|
||||
};
|
||||
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use assert_cmd::{cargo::cargo_bin, prelude::*};
|
||||
use scip::types::Document;
|
||||
use scip_syntax::{
|
||||
evaluate::Evaluator,
|
||||
index::{index_command, AnalysisMode, IndexMode, IndexOptions},
|
||||
@ -37,6 +39,7 @@ lazy_static::lazy_static! {
|
||||
}
|
||||
|
||||
use syntax_analysis::snapshot::{dump_document_with_config, EmitSymbol, SnapshotOptions};
|
||||
use tar::{Builder, Header};
|
||||
|
||||
fn snapshot_syntax_document(doc: &scip::types::Document, source: &str) -> String {
|
||||
dump_document_with_config(
|
||||
@ -54,7 +57,7 @@ fn snapshot_syntax_document(doc: &scip::types::Document, source: &str) -> String
|
||||
fn java_e2e_evaluation() {
|
||||
let dir = BASE.join("testdata/java");
|
||||
|
||||
let out_dir = temp_dir();
|
||||
let out_dir = tempdir();
|
||||
|
||||
let candidate = out_dir.join("index-tree-sitter.scip");
|
||||
|
||||
@ -94,55 +97,273 @@ fn java_e2e_evaluation() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn java_e2e_indexing() {
|
||||
let out_dir = temp_dir();
|
||||
let setup = HashMap::from([(
|
||||
PathBuf::from("globals.java"),
|
||||
include_str!("../testdata/globals.java").to_string(),
|
||||
)]);
|
||||
fn java_files_indexing() {
|
||||
let out_dir = tempdir();
|
||||
let setup = indexing_data();
|
||||
|
||||
run_index(&out_dir, &setup, vec!["--language", "java"]);
|
||||
let mut cmd = command("index");
|
||||
let output_location = out_dir.join("index.scip");
|
||||
let paths = extract_paths(&setup);
|
||||
|
||||
let index = read_index_from_file(&out_dir.join("index.scip")).unwrap();
|
||||
prepare(&out_dir, &setup).unwrap();
|
||||
|
||||
for doc in &index.documents {
|
||||
let path = &doc.relative_path;
|
||||
let dumped = snapshot_syntax_document(doc, setup.get(&PathBuf::from(&path)).expect("??"));
|
||||
cmd.args(vec![
|
||||
"files",
|
||||
"--language",
|
||||
"java",
|
||||
"--out",
|
||||
output_location.to_str().unwrap(),
|
||||
])
|
||||
.current_dir(&out_dir)
|
||||
.args(paths)
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
insta::assert_snapshot!(path.clone(), dumped);
|
||||
}
|
||||
let index = read_index_from_file(&output_location).unwrap();
|
||||
|
||||
assert_eq!(extract_paths(&setup), extract_indexed_paths(&index));
|
||||
|
||||
let index_snapshot = snapshot_from_files(&index.documents, &out_dir);
|
||||
|
||||
insta::assert_snapshot!(index_snapshot);
|
||||
}
|
||||
|
||||
fn prepare(temp: &Path, files: &HashMap<PathBuf, String>) {
|
||||
#[test]
|
||||
fn java_workspace_indexing() {
|
||||
let out_dir = tempdir();
|
||||
let setup = indexing_data();
|
||||
|
||||
let mut cmd = command("index");
|
||||
let output_location = out_dir.join("index.scip");
|
||||
|
||||
prepare(&out_dir, &setup).unwrap();
|
||||
|
||||
cmd.args(vec![
|
||||
"workspace",
|
||||
out_dir.to_str().unwrap(),
|
||||
"--language",
|
||||
"java",
|
||||
"--out",
|
||||
output_location.to_str().unwrap(),
|
||||
])
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let index = read_index_from_file(&output_location).unwrap();
|
||||
|
||||
assert_eq!(extract_paths(&setup), extract_indexed_paths(&index));
|
||||
|
||||
let index_snapshot = snapshot_from_files(&index.documents, &out_dir);
|
||||
|
||||
insta::assert_snapshot!(index_snapshot);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn java_tar_file_indexing() {
|
||||
let out_dir = tempdir();
|
||||
let setup = indexing_data();
|
||||
let tar_data = create_tar(&setup);
|
||||
|
||||
let data = tar_data.unwrap();
|
||||
|
||||
let mut cmd = command("index");
|
||||
let tar_file = out_dir.join("test.tar");
|
||||
let output_location = out_dir.join("index.scip");
|
||||
|
||||
write_file_bytes(&tar_file, &data).unwrap();
|
||||
|
||||
cmd.args(vec![
|
||||
"tar",
|
||||
tar_file.to_str().unwrap(),
|
||||
"--language",
|
||||
"java",
|
||||
"--out",
|
||||
output_location.to_str().unwrap(),
|
||||
])
|
||||
.assert()
|
||||
.success();
|
||||
|
||||
let index = read_index_from_file(&output_location).unwrap();
|
||||
|
||||
assert_eq!(extract_paths(&setup), extract_indexed_paths(&index));
|
||||
|
||||
let index_snapshot = snapshot_from_data(&index.documents, &setup);
|
||||
|
||||
insta::assert_snapshot!(index_snapshot);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn java_tar_stream_indexing() {
|
||||
let out_dir = tempdir();
|
||||
let setup = indexing_data();
|
||||
let tar_data = create_tar(&setup);
|
||||
|
||||
let data = tar_data.unwrap();
|
||||
|
||||
let mut cmd = command("index");
|
||||
let tar_file = out_dir.join("test.tar");
|
||||
let output_location = out_dir.join("index.scip");
|
||||
|
||||
write_file_bytes(&tar_file, &data)
|
||||
.context("Failed to write tar data")
|
||||
.unwrap();
|
||||
|
||||
let mut spawned = cmd
|
||||
.args(vec![
|
||||
"tar",
|
||||
"-",
|
||||
"--language",
|
||||
"java",
|
||||
"--out",
|
||||
output_location.to_str().unwrap(),
|
||||
])
|
||||
.stdin(Stdio::piped())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
|
||||
spawned.stdin.take().unwrap().write_all(&data).unwrap();
|
||||
|
||||
let exit_status = spawned.wait().unwrap();
|
||||
|
||||
assert_eq!(exit_status.code(), Some(0));
|
||||
|
||||
let index = read_index_from_file(&output_location).unwrap();
|
||||
|
||||
assert_eq!(extract_paths(&setup), extract_indexed_paths(&index));
|
||||
|
||||
let index_snapshot = snapshot_from_data(&index.documents, &setup);
|
||||
|
||||
insta::assert_snapshot!(index_snapshot);
|
||||
}
|
||||
|
||||
fn prepare(temp: &Path, files: &HashMap<PathBuf, String>) -> Result<()> {
|
||||
for (path, contents) in files.iter() {
|
||||
let file_path = temp.join(path);
|
||||
write_file(&file_path, contents);
|
||||
write_file_string(&file_path, contents)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn run_index(location: &PathBuf, files: &HashMap<PathBuf, String>, extra_arguments: Vec<&str>) {
|
||||
prepare(location, files);
|
||||
|
||||
let mut base_args = vec!["index"];
|
||||
base_args.extend(extra_arguments);
|
||||
|
||||
fn command(sub: &str) -> Command {
|
||||
let mut cmd = Command::new(BINARY_LOCATION.to_str().unwrap());
|
||||
|
||||
cmd.args(base_args);
|
||||
cmd.arg(sub);
|
||||
|
||||
for (path, _) in files.iter() {
|
||||
cmd.arg(path.to_str().unwrap());
|
||||
}
|
||||
|
||||
cmd.current_dir(location);
|
||||
|
||||
cmd.assert().success();
|
||||
cmd
|
||||
}
|
||||
|
||||
fn write_file(path: &PathBuf, contents: &String) {
|
||||
fn write_file_string(path: &PathBuf, contents: &String) -> Result<()> {
|
||||
write_file_bytes(path, contents.as_bytes())
|
||||
}
|
||||
|
||||
fn write_file_bytes(path: &PathBuf, contents: &[u8]) -> Result<()> {
|
||||
use std::io::Write;
|
||||
|
||||
let output = std::fs::File::create(path).unwrap();
|
||||
let Some(parent) = path.parent() else {
|
||||
bail!("failed to find parent dir for {}", path.display())
|
||||
};
|
||||
|
||||
std::fs::create_dir_all(parent)
|
||||
.with_context(|| anyhow!("Failed to create all parent folders for {}", path.display()))?;
|
||||
|
||||
let output = std::fs::File::create(path)
|
||||
.with_context(|| anyhow!("Failed to open file {} for writing", path.to_str().unwrap()))?;
|
||||
let mut writer = std::io::BufWriter::new(output);
|
||||
writer.write_all(contents.as_bytes()).unwrap();
|
||||
writer.write_all(contents)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tempdir() -> PathBuf {
|
||||
tempfile::tempdir().unwrap().into_path()
|
||||
}
|
||||
|
||||
fn create_tar(files: &HashMap<PathBuf, String>) -> Result<Vec<u8>, std::io::Error> {
|
||||
let mut ar = Builder::new(Vec::new());
|
||||
|
||||
for (path, text) in files.iter() {
|
||||
let mut header = Header::new_gnu();
|
||||
let bytes = text.as_bytes();
|
||||
|
||||
header
|
||||
.set_path(path.to_str().unwrap())
|
||||
.expect("Failed to set path for archive entry");
|
||||
header.set_size(bytes.len() as u64);
|
||||
header.set_cksum();
|
||||
ar.append(&header, bytes).unwrap();
|
||||
}
|
||||
|
||||
ar.into_inner()
|
||||
}
|
||||
|
||||
fn indexing_data() -> HashMap<PathBuf, String> {
|
||||
HashMap::from([
|
||||
(
|
||||
PathBuf::from("src/main/java/globals.java"),
|
||||
include_str!("../testdata/globals.java").to_string(),
|
||||
),
|
||||
(
|
||||
PathBuf::from("package-info.java"),
|
||||
include_str!("../testdata/package-info.java").to_string(),
|
||||
),
|
||||
])
|
||||
}
|
||||
|
||||
fn extract_paths(setup: &HashMap<PathBuf, String>) -> HashSet<String> {
|
||||
setup
|
||||
.keys()
|
||||
.map(|pb| pb.to_str().unwrap().to_string())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn extract_indexed_paths(index: &scip::types::Index) -> HashSet<String> {
|
||||
index
|
||||
.documents
|
||||
.iter()
|
||||
.map(|pb| pb.relative_path.clone())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn snapshot_from_files(docs: &[Document], project_root: &Path) -> String {
|
||||
let mut str = String::new();
|
||||
let mut docs = docs.to_owned();
|
||||
docs.sort_by_key(|doc| doc.relative_path.clone());
|
||||
|
||||
for doc in docs {
|
||||
let path = project_root.join(doc.relative_path.clone());
|
||||
let contents = std::fs::read_to_string(path.clone())
|
||||
.with_context(|| anyhow!("Failed to read path {}", path.display()))
|
||||
.unwrap();
|
||||
|
||||
str.push_str(&format_snapshot_document(&doc, &contents));
|
||||
}
|
||||
|
||||
str
|
||||
}
|
||||
|
||||
fn format_snapshot_document(doc: &scip::types::Document, contents: &str) -> String {
|
||||
let mut str = String::new();
|
||||
str.push_str(format!("//----FILE={}\n", doc.relative_path).as_str());
|
||||
str.push_str(&snapshot_syntax_document(doc, contents));
|
||||
str.push_str("\n\n");
|
||||
|
||||
str
|
||||
}
|
||||
|
||||
fn snapshot_from_data(docs: &[Document], data: &HashMap<PathBuf, String>) -> String {
|
||||
let mut str = String::new();
|
||||
let mut docs = docs.to_owned();
|
||||
docs.sort_by_key(|doc| doc.relative_path.clone());
|
||||
|
||||
for doc in docs {
|
||||
let contents = data
|
||||
.get(&PathBuf::from(&doc.relative_path))
|
||||
.context(format!("Failed to find {} in data", &doc.relative_path))
|
||||
.unwrap();
|
||||
|
||||
str.push_str(&format_snapshot_document(&doc, contents));
|
||||
}
|
||||
|
||||
str
|
||||
}
|
||||
|
||||
@ -1,7 +1,17 @@
|
||||
---
|
||||
source: crates/scip-syntax/tests/integration_test.rs
|
||||
expression: dumped
|
||||
expression: index_snapshot
|
||||
---
|
||||
//----FILE=package-info.java
|
||||
@Deprecated
|
||||
package foo.bar;
|
||||
// ^^^^^^^ definition(Package) scip-ctags `foo.bar`/
|
||||
|
||||
class Baz {}
|
||||
// ^^^ definition scip-ctags `foo.bar`/Baz#
|
||||
|
||||
|
||||
//----FILE=src/main/java/globals.java
|
||||
package MyPackage;
|
||||
// ^^^^^^^^^ definition(Package) scip-ctags MyPackage/
|
||||
|
||||
@ -72,3 +82,5 @@ expression: dumped
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,86 @@
|
||||
---
|
||||
source: crates/scip-syntax/tests/integration_test.rs
|
||||
expression: index_snapshot
|
||||
---
|
||||
//----FILE=package-info.java
|
||||
@Deprecated
|
||||
package foo.bar;
|
||||
// ^^^^^^^ definition(Package) scip-ctags `foo.bar`/
|
||||
|
||||
class Baz {}
|
||||
// ^^^ definition scip-ctags `foo.bar`/Baz#
|
||||
|
||||
|
||||
//----FILE=src/main/java/globals.java
|
||||
package MyPackage;
|
||||
// ^^^^^^^^^ definition(Package) scip-ctags MyPackage/
|
||||
|
||||
public class globals {
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#
|
||||
private static int field1;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field1.
|
||||
protected static int field2;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field2.
|
||||
public static int field3;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field3.
|
||||
private int field4;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field4.
|
||||
protected int field5;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field5.
|
||||
public int field6;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field6.
|
||||
|
||||
private static void method1() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method1().
|
||||
protected static void method2() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method2().
|
||||
public static void method3() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method3().
|
||||
private void method4() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method4().
|
||||
protected void method5() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method5().
|
||||
public void method6() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method6().
|
||||
|
||||
public static final String COOLEST_STRING = "probably this one";
|
||||
// ^^^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#COOLEST_STRING.
|
||||
|
||||
public class ClassInAClass {
|
||||
// ^^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#
|
||||
boolean classy = true;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#classy.
|
||||
|
||||
public static enum Enum {
|
||||
// ^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#
|
||||
these,
|
||||
// ^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#these.
|
||||
should,
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#should.
|
||||
be,
|
||||
// ^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#be.
|
||||
recognized,
|
||||
// ^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#recognized.
|
||||
as,
|
||||
// ^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#as.
|
||||
terms
|
||||
// ^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#terms.
|
||||
}
|
||||
|
||||
public interface Goated {
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Goated#
|
||||
boolean withTheSauce();
|
||||
// ^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Goated#withTheSauce().
|
||||
}
|
||||
|
||||
public void myCoolMethod() {
|
||||
// ^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#myCoolMethod().
|
||||
class WhatIsGoingOn {}
|
||||
boolean iThinkThisIsAllowedButWeDontReallyCare = true;
|
||||
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ definition local 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,86 @@
|
||||
---
|
||||
source: crates/scip-syntax/tests/integration_test.rs
|
||||
expression: index_snapshot
|
||||
---
|
||||
//----FILE=package-info.java
|
||||
@Deprecated
|
||||
package foo.bar;
|
||||
// ^^^^^^^ definition(Package) scip-ctags `foo.bar`/
|
||||
|
||||
class Baz {}
|
||||
// ^^^ definition scip-ctags `foo.bar`/Baz#
|
||||
|
||||
|
||||
//----FILE=src/main/java/globals.java
|
||||
package MyPackage;
|
||||
// ^^^^^^^^^ definition(Package) scip-ctags MyPackage/
|
||||
|
||||
public class globals {
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#
|
||||
private static int field1;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field1.
|
||||
protected static int field2;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field2.
|
||||
public static int field3;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field3.
|
||||
private int field4;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field4.
|
||||
protected int field5;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field5.
|
||||
public int field6;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field6.
|
||||
|
||||
private static void method1() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method1().
|
||||
protected static void method2() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method2().
|
||||
public static void method3() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method3().
|
||||
private void method4() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method4().
|
||||
protected void method5() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method5().
|
||||
public void method6() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method6().
|
||||
|
||||
public static final String COOLEST_STRING = "probably this one";
|
||||
// ^^^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#COOLEST_STRING.
|
||||
|
||||
public class ClassInAClass {
|
||||
// ^^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#
|
||||
boolean classy = true;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#classy.
|
||||
|
||||
public static enum Enum {
|
||||
// ^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#
|
||||
these,
|
||||
// ^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#these.
|
||||
should,
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#should.
|
||||
be,
|
||||
// ^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#be.
|
||||
recognized,
|
||||
// ^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#recognized.
|
||||
as,
|
||||
// ^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#as.
|
||||
terms
|
||||
// ^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#terms.
|
||||
}
|
||||
|
||||
public interface Goated {
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Goated#
|
||||
boolean withTheSauce();
|
||||
// ^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Goated#withTheSauce().
|
||||
}
|
||||
|
||||
public void myCoolMethod() {
|
||||
// ^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#myCoolMethod().
|
||||
class WhatIsGoingOn {}
|
||||
boolean iThinkThisIsAllowedButWeDontReallyCare = true;
|
||||
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ definition local 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,86 @@
|
||||
---
|
||||
source: crates/scip-syntax/tests/integration_test.rs
|
||||
expression: index_snapshot
|
||||
---
|
||||
//----FILE=package-info.java
|
||||
@Deprecated
|
||||
package foo.bar;
|
||||
// ^^^^^^^ definition(Package) scip-ctags `foo.bar`/
|
||||
|
||||
class Baz {}
|
||||
// ^^^ definition scip-ctags `foo.bar`/Baz#
|
||||
|
||||
|
||||
//----FILE=src/main/java/globals.java
|
||||
package MyPackage;
|
||||
// ^^^^^^^^^ definition(Package) scip-ctags MyPackage/
|
||||
|
||||
public class globals {
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#
|
||||
private static int field1;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field1.
|
||||
protected static int field2;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field2.
|
||||
public static int field3;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field3.
|
||||
private int field4;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field4.
|
||||
protected int field5;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field5.
|
||||
public int field6;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#field6.
|
||||
|
||||
private static void method1() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method1().
|
||||
protected static void method2() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method2().
|
||||
public static void method3() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method3().
|
||||
private void method4() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method4().
|
||||
protected void method5() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method5().
|
||||
public void method6() {}
|
||||
// ^^^^^^^ definition scip-ctags MyPackage/globals#method6().
|
||||
|
||||
public static final String COOLEST_STRING = "probably this one";
|
||||
// ^^^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#COOLEST_STRING.
|
||||
|
||||
public class ClassInAClass {
|
||||
// ^^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#
|
||||
boolean classy = true;
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#classy.
|
||||
|
||||
public static enum Enum {
|
||||
// ^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#
|
||||
these,
|
||||
// ^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#these.
|
||||
should,
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#should.
|
||||
be,
|
||||
// ^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#be.
|
||||
recognized,
|
||||
// ^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#recognized.
|
||||
as,
|
||||
// ^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#as.
|
||||
terms
|
||||
// ^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Enum#terms.
|
||||
}
|
||||
|
||||
public interface Goated {
|
||||
// ^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Goated#
|
||||
boolean withTheSauce();
|
||||
// ^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#Goated#withTheSauce().
|
||||
}
|
||||
|
||||
public void myCoolMethod() {
|
||||
// ^^^^^^^^^^^^ definition scip-ctags MyPackage/globals#ClassInAClass#myCoolMethod().
|
||||
class WhatIsGoingOn {}
|
||||
boolean iThinkThisIsAllowedButWeDontReallyCare = true;
|
||||
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ definition local 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user