From fa38439a06a68c0a811926a75f0147d7f83c4210 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Wed, 5 Nov 2025 14:07:50 +1100 Subject: [PATCH] adding more languages to tree-sitter (java, go, cpp,..) --- Cargo.lock | 44 +++++++++ README.md | 2 +- crates/g3-core/Cargo.toml | 4 + crates/g3-core/src/code_search/searcher.rs | 48 ++++++++++ crates/g3-core/src/lib.rs | 8 +- crates/g3-core/tests/code_search_test.rs | 103 +++++++++++++++++++++ 6 files changed, 204 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 27026d8..88b2ac1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1392,6 +1392,10 @@ dependencies = [ "tokio-util", "tracing", "tree-sitter", + "tree-sitter-c", + "tree-sitter-cpp", + "tree-sitter-go", + "tree-sitter-java", "tree-sitter-javascript", "tree-sitter-python", "tree-sitter-rust", @@ -3637,6 +3641,46 @@ dependencies = [ "regex", ] +[[package]] +name = "tree-sitter-c" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f956d5351d62652864a4ff3ae861747e7a1940dc96c9998ae400ac0d3ce30427" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-cpp" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e058d4b9cefb54a8f322b31a1bd3cd306919b70b729523473b5aad8d315a8897" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-go" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d702a98d3c7e70e466456e58ff2b1ac550bf1e29b97e5770676d2fdabec00d" +dependencies = [ + "cc", + "tree-sitter", +] + +[[package]] +name = "tree-sitter-java" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33bc21adf831a773c075d9d00107ab43965e6a6ea7607b47fd9ec6f3db4b481b" +dependencies = [ + "cc", + "tree-sitter", +] + [[package]] name = "tree-sitter-javascript" version = "0.21.4" diff --git a/README.md b/README.md index 541595b..f31ff22 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ These commands give you fine-grained control over context management, allowing y - Screenshot capture and window management - OCR text extraction from images and screen regions - Window listing and identification -- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript) - see [Code Search Guide](docs/CODE_SEARCH.md) +- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md) - **Final Output**: Formatted result presentation ### Provider Flexibility diff --git a/crates/g3-core/Cargo.toml b/crates/g3-core/Cargo.toml index f4845e4..8d02b63 100644 --- a/crates/g3-core/Cargo.toml +++ b/crates/g3-core/Cargo.toml @@ -33,4 +33,8 @@ tree-sitter-rust = "0.21" tree-sitter-python = "0.21" tree-sitter-javascript = "0.21" tree-sitter-typescript = "0.21" +tree-sitter-go = "0.21" +tree-sitter-java = "0.21" +tree-sitter-c = "0.21" +tree-sitter-cpp = "0.21" walkdir = "2.4" diff --git a/crates/g3-core/src/code_search/searcher.rs b/crates/g3-core/src/code_search/searcher.rs index 45d0c5f..9727e26 100644 --- a/crates/g3-core/src/code_search/searcher.rs +++ b/crates/g3-core/src/code_search/searcher.rs @@ -74,6 +74,50 @@ impl TreeSitterSearcher { languages.insert("ts".to_string(), language.clone()); } + // Initialize Go + { + let mut parser = Parser::new(); + let language: Language = tree_sitter_go::language().into(); + parser + .set_language(&language) + .map_err(|e| anyhow!("Failed to set Go language: {}", e))?; + parsers.insert("go".to_string(), parser); + languages.insert("go".to_string(), language); + } + + // Initialize Java + { + let mut parser = Parser::new(); + let language: Language = tree_sitter_java::language().into(); + parser + .set_language(&language) + .map_err(|e| anyhow!("Failed to set Java language: {}", e))?; + parsers.insert("java".to_string(), parser); + languages.insert("java".to_string(), language); + } + + // Initialize C + { + let mut parser = Parser::new(); + let language: Language = tree_sitter_c::language().into(); + parser + .set_language(&language) + .map_err(|e| anyhow!("Failed to set C language: {}", e))?; + parsers.insert("c".to_string(), parser); + languages.insert("c".to_string(), language); + } + + // Initialize C++ + { + let mut parser = Parser::new(); + let language: Language = tree_sitter_cpp::language().into(); + parser + .set_language(&language) + .map_err(|e| anyhow!("Failed to set C++ language: {}", e))?; + parsers.insert("cpp".to_string(), parser); + languages.insert("cpp".to_string(), language); + } + if parsers.is_empty() { return Err(anyhow!( "No language parsers available. Enable at least one language feature." @@ -251,6 +295,10 @@ impl TreeSitterSearcher { ("python", Some("py")) => true, ("javascript" | "js", Some("js" | "jsx" | "mjs")) => true, ("typescript" | "ts", Some("ts" | "tsx")) => true, + ("go", Some("go")) => true, + ("java", Some("java")) => true, + ("c", Some("c" | "h")) => true, + ("cpp", Some("cpp" | "cc" | "cxx" | "hpp" | "hxx" | "h")) => true, _ => false, } } diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index ddb949f..0771bd1 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1093,7 +1093,7 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request: For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\". If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir. -For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense. +For working with code, prioritize use of code_search tool over read_file, first. Additional examples for the 'code_search' tool: - Find functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_functions\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"paths\": [\"src/\"]}]}} @@ -1113,7 +1113,7 @@ Do not explain what you're going to do - just do it by calling the tools. # Task Management -Use todo_read and todo_write for tasks with 2+ steps, multiple files/components, or uncertain scope. +Use todo_read and todo_write for all but the simplest of tasks. Workflow: - Start: read → write checklist @@ -1908,7 +1908,7 @@ Template: // Add code_search tool tools.push(Tool { name: "code_search".to_string(), - description: "Batch syntax-aware code searches using embedded tree-sitter. Supports up to 20 searches in parallel for Rust, Python, JavaScript, and TypeScript. Uses tree-sitter query syntax (S-expressions).".to_string(), + description: "Batch syntax-aware code searches using embedded tree-sitter. Supports up to 20 searches in parallel for Rust, Python, JavaScript, TypeScript, Go, Java, C, and C++. Uses tree-sitter query syntax (S-expressions).".to_string(), input_schema: json!({ "type": "object", "properties": { @@ -1920,7 +1920,7 @@ Template: "properties": { "name": { "type": "string", "description": "Label for this search." }, "query": { "type": "string", "description": "tree-sitter query in S-expression format (e.g., \"(function_item name: (identifier) @name)\")"}, - "language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript"], "description": "Programming language to search." }, + "language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"], "description": "Programming language to search." }, "paths": { "type": "array", "items": { "type": "string" }, "description": "Paths/dirs to search. Defaults to current dir if empty." }, "context_lines": { "type": "integer", "minimum": 0, "maximum": 20, "default": 0, "description": "Lines of context to include around each match." } }, diff --git a/crates/g3-core/tests/code_search_test.rs b/crates/g3-core/tests/code_search_test.rs index 1bfad0f..c5d1627 100644 --- a/crates/g3-core/tests/code_search_test.rs +++ b/crates/g3-core/tests/code_search_test.rs @@ -410,3 +410,106 @@ class MyClass { // Cleanup fs::remove_dir_all(&test_dir).ok(); } + +#[tokio::test] +async fn test_go_search() { + let request = CodeSearchRequest { + searches: vec![SearchSpec { + name: "go_functions".to_string(), + query: "(function_declaration name: (identifier) @name)".to_string(), + language: "go".to_string(), + paths: vec!["examples/test_code".to_string()], + context_lines: 0, + }], + max_concurrency: 4, + max_matches_per_search: 500, + }; + + let response = execute_code_search(request).await.unwrap(); + assert_eq!(response.searches.len(), 1); + assert!(response.searches[0].matches.len() > 0); + + // Should find main and greet functions + let names: Vec<&str> = response.searches[0].matches.iter() + .filter_map(|m| m.captures.get("name").map(|s| s.as_str())) + .collect(); + assert!(names.contains(&"main")); + assert!(names.contains(&"greet")); +} + +#[tokio::test] +async fn test_java_search() { + let request = CodeSearchRequest { + searches: vec![SearchSpec { + name: "java_classes".to_string(), + query: "(class_declaration name: (identifier) @name)".to_string(), + language: "java".to_string(), + paths: vec!["examples/test_code".to_string()], + context_lines: 0, + }], + max_concurrency: 4, + max_matches_per_search: 500, + }; + + let response = execute_code_search(request).await.unwrap(); + assert_eq!(response.searches.len(), 1); + assert!(response.searches[0].matches.len() > 0); + + // Should find Example class + let names: Vec<&str> = response.searches[0].matches.iter() + .filter_map(|m| m.captures.get("name").map(|s| s.as_str())) + .collect(); + assert!(names.contains(&"Example")); +} + +#[tokio::test] +async fn test_c_search() { + let request = CodeSearchRequest { + searches: vec![SearchSpec { + name: "c_functions".to_string(), + query: "(function_definition declarator: (function_declarator declarator: (identifier) @name))".to_string(), + language: "c".to_string(), + paths: vec!["examples/test_code".to_string()], + context_lines: 0, + }], + max_concurrency: 4, + max_matches_per_search: 500, + }; + + let response = execute_code_search(request).await.unwrap(); + assert_eq!(response.searches.len(), 1); + assert!(response.searches[0].matches.len() > 0); + + // Should find greet, add, and main functions + let names: Vec<&str> = response.searches[0].matches.iter() + .filter_map(|m| m.captures.get("name").map(|s| s.as_str())) + .collect(); + assert!(names.contains(&"greet")); + assert!(names.contains(&"add")); + assert!(names.contains(&"main")); +} + +#[tokio::test] +async fn test_cpp_search() { + let request = CodeSearchRequest { + searches: vec![SearchSpec { + name: "cpp_classes".to_string(), + query: "(class_specifier name: (type_identifier) @name)".to_string(), + language: "cpp".to_string(), + paths: vec!["examples/test_code".to_string()], + context_lines: 0, + }], + max_concurrency: 4, + max_matches_per_search: 500, + }; + + let response = execute_code_search(request).await.unwrap(); + assert_eq!(response.searches.len(), 1); + assert!(response.searches[0].matches.len() > 0); + + // Should find Person class + let names: Vec<&str> = response.searches[0].matches.iter() + .filter_map(|m| m.captures.get("name").map(|s| s.as_str())) + .collect(); + assert!(names.contains(&"Person")); +}