From 53c824594202dfac496eb053fb1b0cacbe1a812a Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Wed, 5 Nov 2025 14:33:12 +1100 Subject: [PATCH] fixes for scheme+haskell --- Cargo.lock | 107 ++++++++++++++------- crates/g3-core/Cargo.toml | 23 +++-- crates/g3-core/src/code_search/searcher.rs | 58 ++++++++--- 3 files changed, 126 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5d08e32..98e726e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -318,13 +318,14 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.106" +version = "1.2.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" +checksum = "37521ac7aabe3d13122dc382493e20c9416f299d2ccd5b3a5340a2570cdeb0f3" dependencies = [ + "find-msvc-tools", "jobserver", "libc", - "once_cell", + "shlex", ] [[package]] @@ -1135,6 +1136,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + [[package]] name = "flate2" version = "1.1.5" @@ -1386,6 +1393,7 @@ dependencies = [ "serde_json", "serde_yaml", "shellexpand", + "streaming-iterator", "thiserror 1.0.69", "tokio", "tokio-stream", @@ -1395,11 +1403,12 @@ dependencies = [ "tree-sitter-c", "tree-sitter-cpp", "tree-sitter-go", + "tree-sitter-haskell", "tree-sitter-java", "tree-sitter-javascript", - "tree-sitter-kotlin", "tree-sitter-python", "tree-sitter-rust", + "tree-sitter-scheme", "tree-sitter-typescript", "uuid", "walkdir", @@ -3211,6 +3220,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + [[package]] name = "strict" version = "0.2.0" @@ -3634,102 +3649,120 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.22.6" +version = "0.24.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca" +checksum = "a5387dffa7ffc7d2dae12b50c6f7aab8ff79d6210147c6613561fc3d474c6f75" dependencies = [ "cc", "regex", + "regex-syntax", + "streaming-iterator", + "tree-sitter-language", ] [[package]] name = "tree-sitter-c" -version = "0.21.4" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f956d5351d62652864a4ff3ae861747e7a1940dc96c9998ae400ac0d3ce30427" +checksum = "afd2b1bf1585dc2ef6d69e87d01db8adb059006649dd5f96f31aa789ee6e9c71" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] name = "tree-sitter-cpp" -version = "0.21.0" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e058d4b9cefb54a8f322b31a1bd3cd306919b70b729523473b5aad8d315a8897" +checksum = "df2196ea9d47b4ab4a31b9297eaa5a5d19a0b121dceb9f118f6790ad0ab94743" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] name = "tree-sitter-go" -version = "0.21.2" +version = "0.23.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8d702a98d3c7e70e466456e58ff2b1ac550bf1e29b97e5770676d2fdabec00d" +checksum = "b13d476345220dbe600147dd444165c5791bf85ef53e28acbedd46112ee18431" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-haskell" +version = "0.23.1" +source = "git+https://github.com/tree-sitter/tree-sitter-haskell#0975ef72fc3c47b530309ca93937d7d143523628" +dependencies = [ + "cc", + "tree-sitter-language", ] [[package]] name = "tree-sitter-java" -version = "0.21.0" +version = "0.23.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33bc21adf831a773c075d9d00107ab43965e6a6ea7607b47fd9ec6f3db4b481b" +checksum = "0aa6cbcdc8c679b214e616fd3300da67da0e492e066df01bcf5a5921a71e90d6" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] name = "tree-sitter-javascript" -version = "0.21.4" +version = "0.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8710a71bc6779e33811a8067bdda3ed08bed1733296ff915e44faf60f8c533d7" +checksum = "bf40bf599e0416c16c125c3cec10ee5ddc7d1bb8b0c60fa5c4de249ad34dc1b1" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] -name = "tree-sitter-kotlin" -version = "0.3.8" +name = "tree-sitter-language" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54ff60aeb036f5762515ceb31404512ea4f9599764bcd3857074bb82867bdd34" -dependencies = [ - "cc", - "tree-sitter", -] +checksum = "c4013970217383f67b18aef68f6fb2e8d409bc5755227092d32efb0422ba24b8" [[package]] name = "tree-sitter-python" -version = "0.21.0" +version = "0.23.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4066c6cf678f962f8c2c4561f205945c84834cce73d981e71392624fdc390a9" +checksum = "3d065aaa27f3aaceaf60c1f0e0ac09e1cb9eb8ed28e7bcdaa52129cffc7f4b04" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] name = "tree-sitter-rust" -version = "0.21.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "277690f420bf90741dea984f3da038ace46c4fe6047cba57a66822226cde1c93" +checksum = "ca8ccb3e3a3495c8a943f6c3fd24c3804c471fd7f4f16087623c7fa4c0068e8a" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-scheme" +version = "0.24.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7e7f156bdf38145f26705d1733185698845307d3e9d9c071ecce4375575131" +dependencies = [ + "cc", + "tree-sitter-language", ] [[package]] name = "tree-sitter-typescript" -version = "0.21.2" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ecb35d98a688378e56c18c9c159824fd16f730ccbea19aacf4f206e5d5438ed9" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" dependencies = [ "cc", - "tree-sitter", + "tree-sitter-language", ] [[package]] diff --git a/crates/g3-core/Cargo.toml b/crates/g3-core/Cargo.toml index 37e4274..994442a 100644 --- a/crates/g3-core/Cargo.toml +++ b/crates/g3-core/Cargo.toml @@ -28,14 +28,17 @@ shellexpand = "3.1" serde_yaml = "0.9" # tree-sitter for embedded code search -tree-sitter = "0.22" -tree-sitter-rust = "0.21" -tree-sitter-python = "0.21" -tree-sitter-javascript = "0.21" -tree-sitter-typescript = "0.21" -tree-sitter-go = "0.21" -tree-sitter-java = "0.21" -tree-sitter-c = "0.21" -tree-sitter-cpp = "0.21" -tree-sitter-kotlin = "0.3" +tree-sitter = "0.24" +tree-sitter-rust = "0.23" +tree-sitter-python = "0.23" +tree-sitter-javascript = "0.23" +tree-sitter-typescript = "0.23" +tree-sitter-go = "0.23" +tree-sitter-java = "0.23" +tree-sitter-c = "0.23" +tree-sitter-cpp = "0.23" +# tree-sitter-kotlin = "0.3" # Temporarily disabled - incompatible with tree-sitter 0.24 +tree-sitter-haskell = { git = "https://github.com/tree-sitter/tree-sitter-haskell" } +tree-sitter-scheme = "0.24" +streaming-iterator = "0.1" walkdir = "2.4" diff --git a/crates/g3-core/src/code_search/searcher.rs b/crates/g3-core/src/code_search/searcher.rs index 12bdfde..6e2f913 100644 --- a/crates/g3-core/src/code_search/searcher.rs +++ b/crates/g3-core/src/code_search/searcher.rs @@ -4,6 +4,7 @@ use std::collections::HashMap; use std::fs; use std::path::Path; use tree_sitter::{Language, Parser, Query, QueryCursor}; +use streaming_iterator::StreamingIterator; use walkdir::WalkDir; pub struct TreeSitterSearcher { @@ -19,7 +20,7 @@ impl TreeSitterSearcher { // Initialize Rust { let mut parser = Parser::new(); - let language: Language = tree_sitter_rust::language().into(); + let language: Language = tree_sitter_rust::LANGUAGE.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set Rust language: {}", e))?; @@ -30,7 +31,7 @@ impl TreeSitterSearcher { // Initialize Python { let mut parser = Parser::new(); - let language: Language = tree_sitter_python::language().into(); + let language: Language = tree_sitter_python::LANGUAGE.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set Python language: {}", e))?; @@ -41,7 +42,7 @@ impl TreeSitterSearcher { // Initialize JavaScript { let mut parser = Parser::new(); - let language: Language = tree_sitter_javascript::language().into(); + let language: Language = tree_sitter_javascript::LANGUAGE.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set JavaScript language: {}", e))?; @@ -59,7 +60,7 @@ impl TreeSitterSearcher { // Initialize TypeScript { let mut parser = Parser::new(); - let language: Language = tree_sitter_typescript::language_typescript().into(); + let language: Language = tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set TypeScript language: {}", e))?; @@ -77,7 +78,7 @@ impl TreeSitterSearcher { // Initialize Go { let mut parser = Parser::new(); - let language: Language = tree_sitter_go::language().into(); + let language: Language = tree_sitter_go::LANGUAGE.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set Go language: {}", e))?; @@ -88,7 +89,7 @@ impl TreeSitterSearcher { // Initialize Java { let mut parser = Parser::new(); - let language: Language = tree_sitter_java::language().into(); + let language: Language = tree_sitter_java::LANGUAGE.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set Java language: {}", e))?; @@ -99,7 +100,7 @@ impl TreeSitterSearcher { // Initialize C { let mut parser = Parser::new(); - let language: Language = tree_sitter_c::language().into(); + let language: Language = tree_sitter_c::LANGUAGE.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set C language: {}", e))?; @@ -110,7 +111,7 @@ impl TreeSitterSearcher { // Initialize C++ { let mut parser = Parser::new(); - let language: Language = tree_sitter_cpp::language().into(); + let language: Language = tree_sitter_cpp::LANGUAGE.into(); parser .set_language(&language) .map_err(|e| anyhow!("Failed to set C++ language: {}", e))?; @@ -118,15 +119,37 @@ impl TreeSitterSearcher { languages.insert("cpp".to_string(), language); } - // Initialize Kotlin + // // Initialize Kotlin - Temporarily disabled due to tree-sitter version incompatibility + // { + // let mut parser = Parser::new(); + // let language: Language = tree_sitter_kotlin::language(); + // parser + // .set_language(&language) + // .map_err(|e| anyhow!("Failed to set Kotlin language: {}", e))?; + // parsers.insert("kotlin".to_string(), parser); + // languages.insert("kotlin".to_string(), language); + // } + + // Initialize Haskell { let mut parser = Parser::new(); - let language: Language = tree_sitter_kotlin::language().into(); + let language: Language = tree_sitter_haskell::LANGUAGE.into(); parser .set_language(&language) - .map_err(|e| anyhow!("Failed to set Kotlin language: {}", e))?; - parsers.insert("kotlin".to_string(), parser); - languages.insert("kotlin".to_string(), language); + .map_err(|e| anyhow!("Failed to set Haskell language: {}", e))?; + parsers.insert("haskell".to_string(), parser); + languages.insert("haskell".to_string(), language); + } + + // Initialize Scheme + { + let mut parser = Parser::new(); + let language: Language = tree_sitter_scheme::LANGUAGE.into(); + parser + .set_language(&language) + .map_err(|e| anyhow!("Failed to set Scheme language: {}", e))?; + parsers.insert("scheme".to_string(), parser); + languages.insert("scheme".to_string(), language); } if parsers.is_empty() { @@ -232,13 +255,14 @@ impl TreeSitterSearcher { if let Ok(source_code) = fs::read_to_string(path) { if let Some(tree) = parser.parse(&source_code, None) { let mut cursor = QueryCursor::new(); - let query_matches = cursor.matches( + let mut query_matches = cursor.matches( &query, tree.root_node(), source_code.as_bytes(), ); - for query_match in query_matches { + query_matches.advance(); + while let Some(query_match) = query_matches.get() { if matches.len() >= max_matches { break; } @@ -284,6 +308,8 @@ impl TreeSitterSearcher { captures: captures_map, context, }); + + query_matches.advance(); } } } @@ -311,6 +337,8 @@ impl TreeSitterSearcher { ("c", Some("c" | "h")) => true, ("cpp", Some("cpp" | "cc" | "cxx" | "hpp" | "hxx" | "h")) => true, ("kotlin", Some("kt" | "kts")) => true, + ("haskell", Some("hs" | "lhs")) => true, + ("scheme", Some("scm" | "ss" | "sld" | "sls")) => true, _ => false, } }