adding more languages to tree-sitter (java, go, cpp,..)
This commit is contained in:
44
Cargo.lock
generated
44
Cargo.lock
generated
@@ -1392,6 +1392,10 @@ dependencies = [
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
"tree-sitter",
|
||||
"tree-sitter-c",
|
||||
"tree-sitter-cpp",
|
||||
"tree-sitter-go",
|
||||
"tree-sitter-java",
|
||||
"tree-sitter-javascript",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-rust",
|
||||
@@ -3637,6 +3641,46 @@ dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-c"
|
||||
version = "0.21.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f956d5351d62652864a4ff3ae861747e7a1940dc96c9998ae400ac0d3ce30427"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-cpp"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e058d4b9cefb54a8f322b31a1bd3cd306919b70b729523473b5aad8d315a8897"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-go"
|
||||
version = "0.21.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b8d702a98d3c7e70e466456e58ff2b1ac550bf1e29b97e5770676d2fdabec00d"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-java"
|
||||
version = "0.21.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "33bc21adf831a773c075d9d00107ab43965e6a6ea7607b47fd9ec6f3db4b481b"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-javascript"
|
||||
version = "0.21.4"
|
||||
|
||||
@@ -94,7 +94,7 @@ These commands give you fine-grained control over context management, allowing y
|
||||
- Screenshot capture and window management
|
||||
- OCR text extraction from images and screen regions
|
||||
- Window listing and identification
|
||||
- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript) - see [Code Search Guide](docs/CODE_SEARCH.md)
|
||||
- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
|
||||
- **Final Output**: Formatted result presentation
|
||||
|
||||
### Provider Flexibility
|
||||
|
||||
@@ -33,4 +33,8 @@ tree-sitter-rust = "0.21"
|
||||
tree-sitter-python = "0.21"
|
||||
tree-sitter-javascript = "0.21"
|
||||
tree-sitter-typescript = "0.21"
|
||||
tree-sitter-go = "0.21"
|
||||
tree-sitter-java = "0.21"
|
||||
tree-sitter-c = "0.21"
|
||||
tree-sitter-cpp = "0.21"
|
||||
walkdir = "2.4"
|
||||
|
||||
@@ -74,6 +74,50 @@ impl TreeSitterSearcher {
|
||||
languages.insert("ts".to_string(), language.clone());
|
||||
}
|
||||
|
||||
// Initialize Go
|
||||
{
|
||||
let mut parser = Parser::new();
|
||||
let language: Language = tree_sitter_go::language().into();
|
||||
parser
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set Go language: {}", e))?;
|
||||
parsers.insert("go".to_string(), parser);
|
||||
languages.insert("go".to_string(), language);
|
||||
}
|
||||
|
||||
// Initialize Java
|
||||
{
|
||||
let mut parser = Parser::new();
|
||||
let language: Language = tree_sitter_java::language().into();
|
||||
parser
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set Java language: {}", e))?;
|
||||
parsers.insert("java".to_string(), parser);
|
||||
languages.insert("java".to_string(), language);
|
||||
}
|
||||
|
||||
// Initialize C
|
||||
{
|
||||
let mut parser = Parser::new();
|
||||
let language: Language = tree_sitter_c::language().into();
|
||||
parser
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set C language: {}", e))?;
|
||||
parsers.insert("c".to_string(), parser);
|
||||
languages.insert("c".to_string(), language);
|
||||
}
|
||||
|
||||
// Initialize C++
|
||||
{
|
||||
let mut parser = Parser::new();
|
||||
let language: Language = tree_sitter_cpp::language().into();
|
||||
parser
|
||||
.set_language(&language)
|
||||
.map_err(|e| anyhow!("Failed to set C++ language: {}", e))?;
|
||||
parsers.insert("cpp".to_string(), parser);
|
||||
languages.insert("cpp".to_string(), language);
|
||||
}
|
||||
|
||||
if parsers.is_empty() {
|
||||
return Err(anyhow!(
|
||||
"No language parsers available. Enable at least one language feature."
|
||||
@@ -251,6 +295,10 @@ impl TreeSitterSearcher {
|
||||
("python", Some("py")) => true,
|
||||
("javascript" | "js", Some("js" | "jsx" | "mjs")) => true,
|
||||
("typescript" | "ts", Some("ts" | "tsx")) => true,
|
||||
("go", Some("go")) => true,
|
||||
("java", Some("java")) => true,
|
||||
("c", Some("c" | "h")) => true,
|
||||
("cpp", Some("cpp" | "cc" | "cxx" | "hpp" | "hxx" | "h")) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1093,7 +1093,7 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request:
|
||||
For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\".
|
||||
If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
|
||||
|
||||
For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense.
|
||||
For working with code, prioritize use of code_search tool over read_file, first.
|
||||
|
||||
Additional examples for the 'code_search' tool:
|
||||
- Find functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_functions\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"paths\": [\"src/\"]}]}}
|
||||
@@ -1113,7 +1113,7 @@ Do not explain what you're going to do - just do it by calling the tools.
|
||||
|
||||
# Task Management
|
||||
|
||||
Use todo_read and todo_write for tasks with 2+ steps, multiple files/components, or uncertain scope.
|
||||
Use todo_read and todo_write for all but the simplest of tasks.
|
||||
|
||||
Workflow:
|
||||
- Start: read → write checklist
|
||||
@@ -1908,7 +1908,7 @@ Template:
|
||||
// Add code_search tool
|
||||
tools.push(Tool {
|
||||
name: "code_search".to_string(),
|
||||
description: "Batch syntax-aware code searches using embedded tree-sitter. Supports up to 20 searches in parallel for Rust, Python, JavaScript, and TypeScript. Uses tree-sitter query syntax (S-expressions).".to_string(),
|
||||
description: "Batch syntax-aware code searches using embedded tree-sitter. Supports up to 20 searches in parallel for Rust, Python, JavaScript, TypeScript, Go, Java, C, and C++. Uses tree-sitter query syntax (S-expressions).".to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@@ -1920,7 +1920,7 @@ Template:
|
||||
"properties": {
|
||||
"name": { "type": "string", "description": "Label for this search." },
|
||||
"query": { "type": "string", "description": "tree-sitter query in S-expression format (e.g., \"(function_item name: (identifier) @name)\")"},
|
||||
"language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript"], "description": "Programming language to search." },
|
||||
"language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"], "description": "Programming language to search." },
|
||||
"paths": { "type": "array", "items": { "type": "string" }, "description": "Paths/dirs to search. Defaults to current dir if empty." },
|
||||
"context_lines": { "type": "integer", "minimum": 0, "maximum": 20, "default": 0, "description": "Lines of context to include around each match." }
|
||||
},
|
||||
|
||||
@@ -410,3 +410,106 @@ class MyClass {
|
||||
// Cleanup
|
||||
fs::remove_dir_all(&test_dir).ok();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_go_search() {
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "go_functions".to_string(),
|
||||
query: "(function_declaration name: (identifier) @name)".to_string(),
|
||||
language: "go".to_string(),
|
||||
paths: vec!["examples/test_code".to_string()],
|
||||
context_lines: 0,
|
||||
}],
|
||||
max_concurrency: 4,
|
||||
max_matches_per_search: 500,
|
||||
};
|
||||
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
// Should find main and greet functions
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"main"));
|
||||
assert!(names.contains(&"greet"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_java_search() {
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "java_classes".to_string(),
|
||||
query: "(class_declaration name: (identifier) @name)".to_string(),
|
||||
language: "java".to_string(),
|
||||
paths: vec!["examples/test_code".to_string()],
|
||||
context_lines: 0,
|
||||
}],
|
||||
max_concurrency: 4,
|
||||
max_matches_per_search: 500,
|
||||
};
|
||||
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
// Should find Example class
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"Example"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_c_search() {
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "c_functions".to_string(),
|
||||
query: "(function_definition declarator: (function_declarator declarator: (identifier) @name))".to_string(),
|
||||
language: "c".to_string(),
|
||||
paths: vec!["examples/test_code".to_string()],
|
||||
context_lines: 0,
|
||||
}],
|
||||
max_concurrency: 4,
|
||||
max_matches_per_search: 500,
|
||||
};
|
||||
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
// Should find greet, add, and main functions
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"greet"));
|
||||
assert!(names.contains(&"add"));
|
||||
assert!(names.contains(&"main"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cpp_search() {
|
||||
let request = CodeSearchRequest {
|
||||
searches: vec![SearchSpec {
|
||||
name: "cpp_classes".to_string(),
|
||||
query: "(class_specifier name: (type_identifier) @name)".to_string(),
|
||||
language: "cpp".to_string(),
|
||||
paths: vec!["examples/test_code".to_string()],
|
||||
context_lines: 0,
|
||||
}],
|
||||
max_concurrency: 4,
|
||||
max_matches_per_search: 500,
|
||||
};
|
||||
|
||||
let response = execute_code_search(request).await.unwrap();
|
||||
assert_eq!(response.searches.len(), 1);
|
||||
assert!(response.searches[0].matches.len() > 0);
|
||||
|
||||
// Should find Person class
|
||||
let names: Vec<&str> = response.searches[0].matches.iter()
|
||||
.filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
|
||||
.collect();
|
||||
assert!(names.contains(&"Person"));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user