adding more languages to tree-sitter (java, go, cpp,..)

This commit is contained in:
Dhanji R. Prasanna
2025-11-05 14:07:50 +11:00
parent f25a3d5e06
commit fa38439a06
6 changed files with 204 additions and 5 deletions

44
Cargo.lock generated
View File

@@ -1392,6 +1392,10 @@ dependencies = [
"tokio-util",
"tracing",
"tree-sitter",
"tree-sitter-c",
"tree-sitter-cpp",
"tree-sitter-go",
"tree-sitter-java",
"tree-sitter-javascript",
"tree-sitter-python",
"tree-sitter-rust",
@@ -3637,6 +3641,46 @@ dependencies = [
"regex",
]
[[package]]
name = "tree-sitter-c"
version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f956d5351d62652864a4ff3ae861747e7a1940dc96c9998ae400ac0d3ce30427"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-cpp"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e058d4b9cefb54a8f322b31a1bd3cd306919b70b729523473b5aad8d315a8897"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-go"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8d702a98d3c7e70e466456e58ff2b1ac550bf1e29b97e5770676d2fdabec00d"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-java"
version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33bc21adf831a773c075d9d00107ab43965e6a6ea7607b47fd9ec6f3db4b481b"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-javascript"
version = "0.21.4"

View File

@@ -94,7 +94,7 @@ These commands give you fine-grained control over context management, allowing y
- Screenshot capture and window management
- OCR text extraction from images and screen regions
- Window listing and identification
- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript) - see [Code Search Guide](docs/CODE_SEARCH.md)
- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
- **Final Output**: Formatted result presentation
### Provider Flexibility

View File

@@ -33,4 +33,8 @@ tree-sitter-rust = "0.21"
tree-sitter-python = "0.21"
tree-sitter-javascript = "0.21"
tree-sitter-typescript = "0.21"
tree-sitter-go = "0.21"
tree-sitter-java = "0.21"
tree-sitter-c = "0.21"
tree-sitter-cpp = "0.21"
walkdir = "2.4"

View File

@@ -74,6 +74,50 @@ impl TreeSitterSearcher {
languages.insert("ts".to_string(), language.clone());
}
// Initialize Go
{
let mut parser = Parser::new();
let language: Language = tree_sitter_go::language().into();
parser
.set_language(&language)
.map_err(|e| anyhow!("Failed to set Go language: {}", e))?;
parsers.insert("go".to_string(), parser);
languages.insert("go".to_string(), language);
}
// Initialize Java
{
let mut parser = Parser::new();
let language: Language = tree_sitter_java::language().into();
parser
.set_language(&language)
.map_err(|e| anyhow!("Failed to set Java language: {}", e))?;
parsers.insert("java".to_string(), parser);
languages.insert("java".to_string(), language);
}
// Initialize C
{
let mut parser = Parser::new();
let language: Language = tree_sitter_c::language().into();
parser
.set_language(&language)
.map_err(|e| anyhow!("Failed to set C language: {}", e))?;
parsers.insert("c".to_string(), parser);
languages.insert("c".to_string(), language);
}
// Initialize C++
{
let mut parser = Parser::new();
let language: Language = tree_sitter_cpp::language().into();
parser
.set_language(&language)
.map_err(|e| anyhow!("Failed to set C++ language: {}", e))?;
parsers.insert("cpp".to_string(), parser);
languages.insert("cpp".to_string(), language);
}
if parsers.is_empty() {
return Err(anyhow!(
"No language parsers available. Enable at least one language feature."
@@ -251,6 +295,10 @@ impl TreeSitterSearcher {
("python", Some("py")) => true,
("javascript" | "js", Some("js" | "jsx" | "mjs")) => true,
("typescript" | "ts", Some("ts" | "tsx")) => true,
("go", Some("go")) => true,
("java", Some("java")) => true,
("c", Some("c" | "h")) => true,
("cpp", Some("cpp" | "cc" | "cxx" | "hpp" | "hxx" | "h")) => true,
_ => false,
}
}

View File

@@ -1093,7 +1093,7 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request:
For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\".
If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense.
For working with code, prioritize use of code_search tool over read_file, first.
Additional examples for the 'code_search' tool:
- Find functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_functions\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"paths\": [\"src/\"]}]}}
@@ -1113,7 +1113,7 @@ Do not explain what you're going to do - just do it by calling the tools.
# Task Management
Use todo_read and todo_write for tasks with 2+ steps, multiple files/components, or uncertain scope.
Use todo_read and todo_write for all but the simplest of tasks.
Workflow:
- Start: read → write checklist
@@ -1908,7 +1908,7 @@ Template:
// Add code_search tool
tools.push(Tool {
name: "code_search".to_string(),
description: "Batch syntax-aware code searches using embedded tree-sitter. Supports up to 20 searches in parallel for Rust, Python, JavaScript, and TypeScript. Uses tree-sitter query syntax (S-expressions).".to_string(),
description: "Batch syntax-aware code searches using embedded tree-sitter. Supports up to 20 searches in parallel for Rust, Python, JavaScript, TypeScript, Go, Java, C, and C++. Uses tree-sitter query syntax (S-expressions).".to_string(),
input_schema: json!({
"type": "object",
"properties": {
@@ -1920,7 +1920,7 @@ Template:
"properties": {
"name": { "type": "string", "description": "Label for this search." },
"query": { "type": "string", "description": "tree-sitter query in S-expression format (e.g., \"(function_item name: (identifier) @name)\")"},
"language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript"], "description": "Programming language to search." },
"language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"], "description": "Programming language to search." },
"paths": { "type": "array", "items": { "type": "string" }, "description": "Paths/dirs to search. Defaults to current dir if empty." },
"context_lines": { "type": "integer", "minimum": 0, "maximum": 20, "default": 0, "description": "Lines of context to include around each match." }
},

View File

@@ -410,3 +410,106 @@ class MyClass {
// Cleanup
fs::remove_dir_all(&test_dir).ok();
}
#[tokio::test]
async fn test_go_search() {
let request = CodeSearchRequest {
searches: vec![SearchSpec {
name: "go_functions".to_string(),
query: "(function_declaration name: (identifier) @name)".to_string(),
language: "go".to_string(),
paths: vec!["examples/test_code".to_string()],
context_lines: 0,
}],
max_concurrency: 4,
max_matches_per_search: 500,
};
let response = execute_code_search(request).await.unwrap();
assert_eq!(response.searches.len(), 1);
assert!(response.searches[0].matches.len() > 0);
// Should find main and greet functions
let names: Vec<&str> = response.searches[0].matches.iter()
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
.collect();
assert!(names.contains(&"main"));
assert!(names.contains(&"greet"));
}
#[tokio::test]
async fn test_java_search() {
let request = CodeSearchRequest {
searches: vec![SearchSpec {
name: "java_classes".to_string(),
query: "(class_declaration name: (identifier) @name)".to_string(),
language: "java".to_string(),
paths: vec!["examples/test_code".to_string()],
context_lines: 0,
}],
max_concurrency: 4,
max_matches_per_search: 500,
};
let response = execute_code_search(request).await.unwrap();
assert_eq!(response.searches.len(), 1);
assert!(response.searches[0].matches.len() > 0);
// Should find Example class
let names: Vec<&str> = response.searches[0].matches.iter()
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
.collect();
assert!(names.contains(&"Example"));
}
#[tokio::test]
async fn test_c_search() {
let request = CodeSearchRequest {
searches: vec![SearchSpec {
name: "c_functions".to_string(),
query: "(function_definition declarator: (function_declarator declarator: (identifier) @name))".to_string(),
language: "c".to_string(),
paths: vec!["examples/test_code".to_string()],
context_lines: 0,
}],
max_concurrency: 4,
max_matches_per_search: 500,
};
let response = execute_code_search(request).await.unwrap();
assert_eq!(response.searches.len(), 1);
assert!(response.searches[0].matches.len() > 0);
// Should find greet, add, and main functions
let names: Vec<&str> = response.searches[0].matches.iter()
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
.collect();
assert!(names.contains(&"greet"));
assert!(names.contains(&"add"));
assert!(names.contains(&"main"));
}
#[tokio::test]
async fn test_cpp_search() {
let request = CodeSearchRequest {
searches: vec![SearchSpec {
name: "cpp_classes".to_string(),
query: "(class_specifier name: (type_identifier) @name)".to_string(),
language: "cpp".to_string(),
paths: vec!["examples/test_code".to_string()],
context_lines: 0,
}],
max_concurrency: 4,
max_matches_per_search: 500,
};
let response = execute_code_search(request).await.unwrap();
assert_eq!(response.searches.len(), 1);
assert!(response.searches[0].matches.len() > 0);
// Should find Person class
let names: Vec<&str> = response.searches[0].matches.iter()
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
.collect();
assert!(names.contains(&"Person"));
}