fixes for scheme+haskell

This commit is contained in:
Dhanji R. Prasanna
2025-11-05 14:33:12 +11:00
parent 4327c839a9
commit 53c8245942
3 changed files with 126 additions and 62 deletions

107
Cargo.lock generated
View File

@@ -318,13 +318,14 @@ dependencies = [
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.0.106" version = "1.2.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" checksum = "37521ac7aabe3d13122dc382493e20c9416f299d2ccd5b3a5340a2570cdeb0f3"
dependencies = [ dependencies = [
"find-msvc-tools",
"jobserver", "jobserver",
"libc", "libc",
"once_cell", "shlex",
] ]
[[package]] [[package]]
@@ -1135,6 +1136,12 @@ dependencies = [
"simd-adler32", "simd-adler32",
] ]
[[package]]
name = "find-msvc-tools"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.1.5" version = "1.1.5"
@@ -1386,6 +1393,7 @@ dependencies = [
"serde_json", "serde_json",
"serde_yaml", "serde_yaml",
"shellexpand", "shellexpand",
"streaming-iterator",
"thiserror 1.0.69", "thiserror 1.0.69",
"tokio", "tokio",
"tokio-stream", "tokio-stream",
@@ -1395,11 +1403,12 @@ dependencies = [
"tree-sitter-c", "tree-sitter-c",
"tree-sitter-cpp", "tree-sitter-cpp",
"tree-sitter-go", "tree-sitter-go",
"tree-sitter-haskell",
"tree-sitter-java", "tree-sitter-java",
"tree-sitter-javascript", "tree-sitter-javascript",
"tree-sitter-kotlin",
"tree-sitter-python", "tree-sitter-python",
"tree-sitter-rust", "tree-sitter-rust",
"tree-sitter-scheme",
"tree-sitter-typescript", "tree-sitter-typescript",
"uuid", "uuid",
"walkdir", "walkdir",
@@ -3211,6 +3220,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "streaming-iterator"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520"
[[package]] [[package]]
name = "strict" name = "strict"
version = "0.2.0" version = "0.2.0"
@@ -3634,102 +3649,120 @@ dependencies = [
[[package]] [[package]]
name = "tree-sitter" name = "tree-sitter"
version = "0.22.6" version = "0.24.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75"
dependencies = [ dependencies = [
"cc", "cc",
"regex", "regex",
"regex-syntax",
"streaming-iterator",
"tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-c" name = "tree-sitter-c"
version = "0.21.4" version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f956d5351d62652864a4ff3ae861747e7a1940dc96c9998ae400ac0d3ce30427" checksum = "afd2b1bf1585dc2ef6d69e87d01db8adb059006649dd5f96f31aa789ee6e9c71"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-cpp" name = "tree-sitter-cpp"
version = "0.21.0" version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e058d4b9cefb54a8f322b31a1bd3cd306919b70b729523473b5aad8d315a8897" checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-go" name = "tree-sitter-go"
version = "0.21.2" version = "0.23.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8d702a98d3c7e70e466456e58ff2b1ac550bf1e29b97e5770676d2fdabec00d" checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
]
[[package]]
name = "tree-sitter-haskell"
version = "0.23.1"
source = "git+https://github.com/tree-sitter/tree-sitter-haskell#0975ef72fc3c47b530309ca93937d7d143523628"
dependencies = [
"cc",
"tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-java" name = "tree-sitter-java"
version = "0.21.0" version = "0.23.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33bc21adf831a773c075d9d00107ab43965e6a6ea7607b47fd9ec6f3db4b481b" checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-javascript" name = "tree-sitter-javascript"
version = "0.21.4" version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8710a71bc6779e33811a8067bdda3ed08bed1733296ff915e44faf60f8c533d7" checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-kotlin" name = "tree-sitter-language"
version = "0.3.8" version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54ff60aeb036f5762515ceb31404512ea4f9599764bcd3857074bb82867bdd34" checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8"
dependencies = [
"cc",
"tree-sitter",
]
[[package]] [[package]]
name = "tree-sitter-python" name = "tree-sitter-python"
version = "0.21.0" version = "0.23.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4066c6cf678f962f8c2c4561f205945c84834cce73d981e71392624fdc390a9" checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-rust" name = "tree-sitter-rust"
version = "0.21.2" version = "0.23.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93" checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
]
[[package]]
name = "tree-sitter-scheme"
version = "0.24.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a7e7f156bdf38145f26705d1733185698845307d3e9d9c071ecce4375575131"
dependencies = [
"cc",
"tree-sitter-language",
] ]
[[package]] [[package]]
name = "tree-sitter-typescript" name = "tree-sitter-typescript"
version = "0.21.2" version = "0.23.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecb35d98a688378e56c18c9c159824fd16f730ccbea19aacf4f206e5d5438ed9" checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff"
dependencies = [ dependencies = [
"cc", "cc",
"tree-sitter", "tree-sitter-language",
] ]
[[package]] [[package]]

View File

@@ -28,14 +28,17 @@ shellexpand = "3.1"
serde_yaml = "0.9" serde_yaml = "0.9"
# tree-sitter for embedded code search # tree-sitter for embedded code search
tree-sitter = "0.22" tree-sitter = "0.24"
tree-sitter-rust = "0.21" tree-sitter-rust = "0.23"
tree-sitter-python = "0.21" tree-sitter-python = "0.23"
tree-sitter-javascript = "0.21" tree-sitter-javascript = "0.23"
tree-sitter-typescript = "0.21" tree-sitter-typescript = "0.23"
tree-sitter-go = "0.21" tree-sitter-go = "0.23"
tree-sitter-java = "0.21" tree-sitter-java = "0.23"
tree-sitter-c = "0.21" tree-sitter-c = "0.23"
tree-sitter-cpp = "0.21" tree-sitter-cpp = "0.23"
tree-sitter-kotlin = "0.3" # tree-sitter-kotlin = "0.3" # Temporarily disabled - incompatible with tree-sitter 0.24
tree-sitter-haskell = { git = "https://github.com/tree-sitter/tree-sitter-haskell" }
tree-sitter-scheme = "0.24"
streaming-iterator = "0.1"
walkdir = "2.4" walkdir = "2.4"

View File

@@ -4,6 +4,7 @@ use std::collections::HashMap;
use std::fs; use std::fs;
use std::path::Path; use std::path::Path;
use tree_sitter::{Language, Parser, Query, QueryCursor}; use tree_sitter::{Language, Parser, Query, QueryCursor};
use streaming_iterator::StreamingIterator;
use walkdir::WalkDir; use walkdir::WalkDir;
pub struct TreeSitterSearcher { pub struct TreeSitterSearcher {
@@ -19,7 +20,7 @@ impl TreeSitterSearcher {
// Initialize Rust // Initialize Rust
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_rust::language().into(); let language: Language = tree_sitter_rust::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set Rust language: {}", e))?; .map_err(|e| anyhow!("Failed to set Rust language: {}", e))?;
@@ -30,7 +31,7 @@ impl TreeSitterSearcher {
// Initialize Python // Initialize Python
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_python::language().into(); let language: Language = tree_sitter_python::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set Python language: {}", e))?; .map_err(|e| anyhow!("Failed to set Python language: {}", e))?;
@@ -41,7 +42,7 @@ impl TreeSitterSearcher {
// Initialize JavaScript // Initialize JavaScript
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_javascript::language().into(); let language: Language = tree_sitter_javascript::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set JavaScript language: {}", e))?; .map_err(|e| anyhow!("Failed to set JavaScript language: {}", e))?;
@@ -59,7 +60,7 @@ impl TreeSitterSearcher {
// Initialize TypeScript // Initialize TypeScript
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_typescript::language_typescript().into(); let language: Language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set TypeScript language: {}", e))?; .map_err(|e| anyhow!("Failed to set TypeScript language: {}", e))?;
@@ -77,7 +78,7 @@ impl TreeSitterSearcher {
// Initialize Go // Initialize Go
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_go::language().into(); let language: Language = tree_sitter_go::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set Go language: {}", e))?; .map_err(|e| anyhow!("Failed to set Go language: {}", e))?;
@@ -88,7 +89,7 @@ impl TreeSitterSearcher {
// Initialize Java // Initialize Java
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_java::language().into(); let language: Language = tree_sitter_java::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set Java language: {}", e))?; .map_err(|e| anyhow!("Failed to set Java language: {}", e))?;
@@ -99,7 +100,7 @@ impl TreeSitterSearcher {
// Initialize C // Initialize C
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_c::language().into(); let language: Language = tree_sitter_c::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set C language: {}", e))?; .map_err(|e| anyhow!("Failed to set C language: {}", e))?;
@@ -110,7 +111,7 @@ impl TreeSitterSearcher {
// Initialize C++ // Initialize C++
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_cpp::language().into(); let language: Language = tree_sitter_cpp::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set C++ language: {}", e))?; .map_err(|e| anyhow!("Failed to set C++ language: {}", e))?;
@@ -118,15 +119,37 @@ impl TreeSitterSearcher {
languages.insert("cpp".to_string(), language); languages.insert("cpp".to_string(), language);
} }
// Initialize Kotlin // // Initialize Kotlin - Temporarily disabled due to tree-sitter version incompatibility
// {
// let mut parser = Parser::new();
// let language: Language = tree_sitter_kotlin::language();
// parser
// .set_language(&language)
// .map_err(|e| anyhow!("Failed to set Kotlin language: {}", e))?;
// parsers.insert("kotlin".to_string(), parser);
// languages.insert("kotlin".to_string(), language);
// }
// Initialize Haskell
{ {
let mut parser = Parser::new(); let mut parser = Parser::new();
let language: Language = tree_sitter_kotlin::language().into(); let language: Language = tree_sitter_haskell::LANGUAGE.into();
parser parser
.set_language(&language) .set_language(&language)
.map_err(|e| anyhow!("Failed to set Kotlin language: {}", e))?; .map_err(|e| anyhow!("Failed to set Haskell language: {}", e))?;
parsers.insert("kotlin".to_string(), parser); parsers.insert("haskell".to_string(), parser);
languages.insert("kotlin".to_string(), language); languages.insert("haskell".to_string(), language);
}
// Initialize Scheme
{
let mut parser = Parser::new();
let language: Language = tree_sitter_scheme::LANGUAGE.into();
parser
.set_language(&language)
.map_err(|e| anyhow!("Failed to set Scheme language: {}", e))?;
parsers.insert("scheme".to_string(), parser);
languages.insert("scheme".to_string(), language);
} }
if parsers.is_empty() { if parsers.is_empty() {
@@ -232,13 +255,14 @@ impl TreeSitterSearcher {
if let Ok(source_code) = fs::read_to_string(path) { if let Ok(source_code) = fs::read_to_string(path) {
if let Some(tree) = parser.parse(&source_code, None) { if let Some(tree) = parser.parse(&source_code, None) {
let mut cursor = QueryCursor::new(); let mut cursor = QueryCursor::new();
let query_matches = cursor.matches( let mut query_matches = cursor.matches(
&query, &query,
tree.root_node(), tree.root_node(),
source_code.as_bytes(), source_code.as_bytes(),
); );
for query_match in query_matches { query_matches.advance();
while let Some(query_match) = query_matches.get() {
if matches.len() >= max_matches { if matches.len() >= max_matches {
break; break;
} }
@@ -284,6 +308,8 @@ impl TreeSitterSearcher {
captures: captures_map, captures: captures_map,
context, context,
}); });
query_matches.advance();
} }
} }
} }
@@ -311,6 +337,8 @@ impl TreeSitterSearcher {
("c", Some("c" | "h")) => true, ("c", Some("c" | "h")) => true,
("cpp", Some("cpp" | "cc" | "cxx" | "hpp" | "hxx" | "h")) => true, ("cpp", Some("cpp" | "cc" | "cxx" | "hpp" | "hxx" | "h")) => true,
("kotlin", Some("kt" | "kts")) => true, ("kotlin", Some("kt" | "kts")) => true,
("haskell", Some("hs" | "lhs")) => true,
("scheme", Some("scm" | "ss" | "sld" | "sls")) => true,
_ => false, _ => false,
} }
} }