From ad9ba5e5d8f5549bba118e872760b34235167803 Mon Sep 17 00:00:00 2001 From: Jochen Date: Sat, 1 Nov 2025 14:59:55 +1100 Subject: [PATCH 1/3] added ast-grep use g3 tool use of ast-grep command with batching for faster code exploration. --- Cargo.lock | 20 + crates/g3-core/Cargo.toml | 1 + crates/g3-core/src/code_search.rs | 787 ++++++++++++++++++++++++++++++ crates/g3-core/src/lib.rs | 112 ++++- 4 files changed, 919 insertions(+), 1 deletion(-) create mode 100644 crates/g3-core/src/code_search.rs diff --git a/Cargo.lock b/Cargo.lock index 05e1252..4431c1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1391,6 +1391,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "serde_yaml", "shellexpand", "thiserror 1.0.69", "tokio", @@ -3078,6 +3079,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha2" version = "0.10.9" @@ -3667,6 +3681,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "url" version = "2.5.7" diff --git a/crates/g3-core/Cargo.toml b/crates/g3-core/Cargo.toml index 0e86851..67f9a2e 100644 --- a/crates/g3-core/Cargo.toml +++ b/crates/g3-core/Cargo.toml @@ -25,3 +25,4 @@ chrono = { version = "0.4", features = ["serde"] } rand = "0.8" regex = "1.0" shellexpand = "3.1" +serde_yaml = "0.9" diff --git a/crates/g3-core/src/code_search.rs b/crates/g3-core/src/code_search.rs new file mode 100644 index 0000000..1e4d38f --- /dev/null +++ b/crates/g3-core/src/code_search.rs @@ -0,0 +1,787 @@ +//! Code search functionality using ast-grep for syntax-aware semantic searches + +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; +use std::process::Stdio; +use std::time::{Duration, Instant}; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::Command; +use tokio::sync::Semaphore; +use tracing::{debug, error, info, warn}; + +/// Maximum number of searches allowed per request +const MAX_SEARCHES: usize = 20; + +/// Default timeout for individual searches in seconds +const DEFAULT_TIMEOUT_SECS: u64 = 60; + +/// Default maximum concurrency +const DEFAULT_MAX_CONCURRENCY: usize = 4; + +/// Default maximum matches per search +const DEFAULT_MAX_MATCHES: usize = 500; + +/// Search specification for a single ast-grep search +#[derive(Debug, Clone, Deserialize)] +pub struct SearchSpec { + pub name: String, + pub mode: SearchMode, + + // Pattern mode fields + pub pattern: Option, + pub language: Option, + + // YAML mode fields + pub rule_yaml: Option, + + // Common fields + pub paths: Option>, + pub globs: Option>, + pub json_style: Option, + pub context: Option, + pub threads: Option, + pub include_metadata: Option, + pub no_ignore: Option>, + pub severity: Option>, + pub timeout_secs: Option, +} + +/// Search mode: pattern or yaml +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum SearchMode { + Pattern, + Yaml, +} + +/// JSON output style +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum JsonStyle { + Pretty, + Stream, + Compact, +} + +impl Default for JsonStyle { + fn default() -> Self { + JsonStyle::Stream + } +} + +/// No-ignore types +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum NoIgnoreType { + Hidden, + Dot, + Exclude, + Global, + Parent, + Vcs, +} + +/// Severity levels for YAML rules +#[derive(Debug, Clone, Deserialize, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum SeverityLevel { + Error, + Warning, + Info, + Hint, + Off, +} + +/// Request structure for code search +#[derive(Debug, Deserialize)] +pub struct CodeSearchRequest { + pub searches: Vec, + pub max_concurrency: Option, + pub max_matches_per_search: Option, +} + +/// Result of a single search +#[derive(Debug, Serialize)] +pub struct SearchResult { + pub name: String, + pub mode: String, + pub status: String, + pub cmd: Vec, + pub match_count: Option, + pub truncated: Option, + pub matches: Option>, + pub stderr: Option, + pub exit_code: Option, + pub duration_ms: u64, +} + +/// Summary of all searches +#[derive(Debug, Serialize)] +pub struct SearchSummary { + pub completed: usize, + pub total: usize, + pub total_matches: usize, + pub duration_ms: u64, +} + +/// Complete response structure +#[derive(Debug, Serialize)] +pub struct CodeSearchResponse { + pub summary: SearchSummary, + pub searches: Vec, +} + +/// YAML rule structure for validation +#[derive(Debug, Deserialize)] +struct YamlRule { + pub id: String, + pub language: String, + pub rule: Value, +} + +/// Execute a batch of code searches using ast-grep +pub async fn execute_code_search(request: CodeSearchRequest) -> Result { + let start_time = Instant::now(); + + // Validate request + if request.searches.is_empty() { + return Err(anyhow!("No searches specified")); + } + + if request.searches.len() > MAX_SEARCHES { + return Err(anyhow!( + "Too many searches: {} (max: {})", + request.searches.len(), + MAX_SEARCHES + )); + } + + // Check if ast-grep is available + check_ast_grep_available().await?; + + let max_concurrency = request.max_concurrency.unwrap_or(DEFAULT_MAX_CONCURRENCY); + let max_matches = request.max_matches_per_search.unwrap_or(DEFAULT_MAX_MATCHES); + + // Create semaphore for concurrency control + let semaphore = std::sync::Arc::new(Semaphore::new(max_concurrency)); + + // Execute searches concurrently + let mut tasks = Vec::new(); + + for search in request.searches { + let sem = semaphore.clone(); + let task = tokio::spawn(async move { + let _permit = sem.acquire().await.unwrap(); + execute_single_search(search, max_matches).await + }); + tasks.push(task); + } + + // Wait for all searches to complete + let mut results = Vec::new(); + let mut total_matches = 0; + let mut completed = 0; + + for task in tasks { + match task.await { + Ok(result) => { + if result.status == "ok" { + completed += 1; + if let Some(count) = result.match_count { + total_matches += count; + } + } + results.push(result); + } + Err(e) => { + error!("Task join error: {}", e); + // Create an error result + results.push(SearchResult { + name: "unknown".to_string(), + mode: "unknown".to_string(), + status: "error".to_string(), + cmd: vec![], + match_count: None, + truncated: None, + matches: None, + stderr: Some(format!("Task execution error: {}", e)), + exit_code: None, + duration_ms: 0, + }); + } + } + } + + let total_duration = start_time.elapsed(); + + Ok(CodeSearchResponse { + summary: SearchSummary { + completed, + total: results.len(), + total_matches, + duration_ms: total_duration.as_millis() as u64, + }, + searches: results, + }) +} + +/// Execute a single search +async fn execute_single_search(search: SearchSpec, max_matches: usize) -> SearchResult { + let start_time = Instant::now(); + let timeout_secs = search.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS); + + // Validate the search specification + if let Err(e) = validate_search_spec(&search) { + return SearchResult { + name: search.name, + mode: format!("{:?}", search.mode).to_lowercase(), + status: "error".to_string(), + cmd: vec![], + match_count: None, + truncated: None, + matches: None, + stderr: Some(format!("Validation error: {}", e)), + exit_code: None, + duration_ms: start_time.elapsed().as_millis() as u64, + }; + } + + // Build command + let cmd_args = match build_ast_grep_command(&search) { + Ok(args) => args, + Err(e) => { + return SearchResult { + name: search.name, + mode: format!("{:?}", search.mode).to_lowercase(), + status: "error".to_string(), + cmd: vec![], + match_count: None, + truncated: None, + matches: None, + stderr: Some(format!("Command build error: {}", e)), + exit_code: None, + duration_ms: start_time.elapsed().as_millis() as u64, + }; + } + }; + + debug!("Executing ast-grep command: {:?}", cmd_args); + + // Execute with timeout + let timeout_duration = Duration::from_secs(timeout_secs); + + match tokio::time::timeout(timeout_duration, run_ast_grep_command(&cmd_args)).await { + Ok(Ok((stdout, stderr, exit_code))) => { + let duration_ms = start_time.elapsed().as_millis() as u64; + + if exit_code == 0 { + // Parse JSON output + match parse_ast_grep_output(&stdout, max_matches) { + Ok((matches, truncated)) => { + SearchResult { + name: search.name, + mode: format!("{:?}", search.mode).to_lowercase(), + status: "ok".to_string(), + cmd: cmd_args, + match_count: Some(matches.len()), + truncated: Some(truncated), + matches: Some(matches), + stderr: if stderr.is_empty() { None } else { Some(stderr) }, + exit_code: None, + duration_ms, + } + } + Err(e) => { + SearchResult { + name: search.name, + mode: format!("{:?}", search.mode).to_lowercase(), + status: "error".to_string(), + cmd: cmd_args, + match_count: None, + truncated: None, + matches: None, + stderr: Some(format!("JSON parse error: {}\nRaw output: {}", e, stdout)), + exit_code: Some(exit_code), + duration_ms, + } + } + } + } else { + SearchResult { + name: search.name, + mode: format!("{:?}", search.mode).to_lowercase(), + status: "error".to_string(), + cmd: cmd_args, + match_count: None, + truncated: None, + matches: None, + stderr: Some(stderr), + exit_code: Some(exit_code), + duration_ms, + } + } + } + Ok(Err(e)) => { + SearchResult { + name: search.name, + mode: format!("{:?}", search.mode).to_lowercase(), + status: "error".to_string(), + cmd: cmd_args, + match_count: None, + truncated: None, + matches: None, + stderr: Some(format!("Execution error: {}", e)), + exit_code: None, + duration_ms: start_time.elapsed().as_millis() as u64, + } + } + Err(_) => { + SearchResult { + name: search.name, + mode: format!("{:?}", search.mode).to_lowercase(), + status: "timeout".to_string(), + cmd: cmd_args, + match_count: None, + truncated: None, + matches: None, + stderr: Some(format!("Search timed out after {} seconds", timeout_secs)), + exit_code: None, + duration_ms: start_time.elapsed().as_millis() as u64, + } + } + } +} + +/// Validate a search specification +fn validate_search_spec(search: &SearchSpec) -> Result<()> { + match search.mode { + SearchMode::Pattern => { + if search.pattern.is_none() || search.pattern.as_ref().unwrap().is_empty() { + return Err(anyhow!("Pattern mode requires non-empty 'pattern' field")); + } + } + SearchMode::Yaml => { + let rule_yaml = search.rule_yaml.as_ref() + .ok_or_else(|| anyhow!("YAML mode requires 'rule_yaml' field"))?; + + if rule_yaml.is_empty() { + return Err(anyhow!("YAML mode requires non-empty 'rule_yaml' field")); + } + + // Parse and validate YAML structure + let parsed: YamlRule = serde_yaml::from_str(rule_yaml) + .map_err(|e| anyhow!("Invalid YAML rule: {}", e))?; + + if parsed.id.is_empty() { + return Err(anyhow!("YAML rule must have non-empty 'id' field")); + } + + if parsed.language.is_empty() { + return Err(anyhow!("YAML rule must have non-empty 'language' field")); + } + + // Validate language is supported (basic check) + validate_language(&parsed.language)?; + } + } + + // Validate context range + if let Some(context) = search.context { + if context > 20 { + return Err(anyhow!("Context lines cannot exceed 20")); + } + } + + Ok(()) +} + +/// Validate that a language is supported by ast-grep +fn validate_language(language: &str) -> Result<()> { + let supported_languages = [ + "rust", "javascript", "typescript", "python", "java", "c", "cpp", "csharp", + "go", "html", "css", "json", "yaml", "xml", "bash", "kotlin", "swift", + "php", "ruby", "scala", "dart", "lua", "r", "sql", "dockerfile", + "Rust", "JavaScript", "TypeScript", "Python", "Java", "C", "Cpp", "CSharp", + "Go", "Html", "Css", "Json", "Yaml", "Xml", "Bash", "Kotlin", "Swift", + "Php", "Ruby", "Scala", "Dart", "Lua", "R", "Sql", "Dockerfile" + ]; + + if !supported_languages.contains(&language) { + warn!("Language '{}' may not be supported by ast-grep", language); + } + + Ok(()) +} + +/// Build ast-grep command arguments +fn build_ast_grep_command(search: &SearchSpec) -> Result> { + let mut args = vec!["ast-grep".to_string()]; + + match search.mode { + SearchMode::Pattern => { + args.push("run".to_string()); + + // Add pattern + args.push("-p".to_string()); + args.push(search.pattern.as_ref().unwrap().clone()); + + // Add language if specified + if let Some(ref lang) = search.language { + args.push("-l".to_string()); + args.push(lang.clone()); + } + } + SearchMode::Yaml => { + args.push("scan".to_string()); + + // Add inline rules + args.push("--inline-rules".to_string()); + args.push(search.rule_yaml.as_ref().unwrap().clone()); + + // Add include-metadata if requested + if search.include_metadata.unwrap_or(false) { + args.push("--include-metadata".to_string()); + } + + // Add severity overrides + if let Some(ref severity_map) = search.severity { + for (rule_id, severity) in severity_map { + match severity { + SeverityLevel::Error => { + args.push("--error".to_string()); + args.push(rule_id.clone()); + } + SeverityLevel::Warning => { + args.push("--warning".to_string()); + args.push(rule_id.clone()); + } + SeverityLevel::Info => { + args.push("--info".to_string()); + args.push(rule_id.clone()); + } + SeverityLevel::Hint => { + args.push("--hint".to_string()); + args.push(rule_id.clone()); + } + SeverityLevel::Off => { + args.push("--off".to_string()); + args.push(rule_id.clone()); + } + } + } + } + } + } + + // Add common arguments + + // Add globs if specified + if let Some(ref globs) = search.globs { + if !globs.is_empty() { + args.push("--globs".to_string()); + args.push(globs.join(",")); + } + } + + // Add context + if let Some(context) = search.context { + args.push("-C".to_string()); + args.push(context.to_string()); + } + + // Add threads + if let Some(threads) = search.threads { + args.push("-j".to_string()); + args.push(threads.to_string()); + } + + // Add JSON output style + let json_style = search.json_style.as_ref().unwrap_or(&JsonStyle::Stream); + let json_arg = match json_style { + JsonStyle::Pretty => "--json=pretty", + JsonStyle::Stream => "--json=stream", + JsonStyle::Compact => "--json=compact", + }; + args.push(json_arg.to_string()); + + // Add no-ignore options + if let Some(ref no_ignore_list) = search.no_ignore { + for no_ignore_type in no_ignore_list { + let flag = match no_ignore_type { + NoIgnoreType::Hidden => "--no-ignore=hidden", + NoIgnoreType::Dot => "--no-ignore=dot", + NoIgnoreType::Exclude => "--no-ignore=exclude", + NoIgnoreType::Global => "--no-ignore=global", + NoIgnoreType::Parent => "--no-ignore=parent", + NoIgnoreType::Vcs => "--no-ignore=vcs", + }; + args.push(flag.to_string()); + } + } + + // Add paths (default to current directory if none specified) + if let Some(ref paths) = search.paths { + if !paths.is_empty() { + args.extend(paths.clone()); + } else { + args.push(".".to_string()); + } + } else { + args.push(".".to_string()); + } + + Ok(args) +} + +/// Run ast-grep command and capture output +async fn run_ast_grep_command(args: &[String]) -> Result<(String, String, i32)> { + let mut cmd = Command::new(&args[0]); + cmd.args(&args[1..]); + cmd.stdout(Stdio::piped()); + cmd.stderr(Stdio::piped()); + + debug!("Running command: {:?}", args); + + let mut child = cmd.spawn() + .map_err(|e| anyhow!("Failed to spawn ast-grep process: {}", e))?; + + let stdout = child.stdout.take().unwrap(); + let stderr = child.stderr.take().unwrap(); + + let stdout_reader = BufReader::new(stdout); + let stderr_reader = BufReader::new(stderr); + + let stdout_task = tokio::spawn(async move { + let mut lines = stdout_reader.lines(); + let mut output = String::new(); + while let Ok(Some(line)) = lines.next_line().await { + if !output.is_empty() { + output.push('\n'); + } + output.push_str(&line); + } + output + }); + + let stderr_task = tokio::spawn(async move { + let mut lines = stderr_reader.lines(); + let mut output = String::new(); + while let Ok(Some(line)) = lines.next_line().await { + if !output.is_empty() { + output.push('\n'); + } + output.push_str(&line); + } + output + }); + + let status = child.wait().await + .map_err(|e| anyhow!("Failed to wait for ast-grep process: {}", e))?; + + let stdout_output = stdout_task.await + .map_err(|e| anyhow!("Failed to read stdout: {}", e))?; + let stderr_output = stderr_task.await + .map_err(|e| anyhow!("Failed to read stderr: {}", e))?; + + let exit_code = status.code().unwrap_or(-1); + + Ok((stdout_output, stderr_output, exit_code)) +} + +/// Parse ast-grep JSON output +fn parse_ast_grep_output(output: &str, max_matches: usize) -> Result<(Vec, bool)> { + if output.trim().is_empty() { + return Ok((vec![], false)); + } + + let mut matches = Vec::new(); + let mut truncated = false; + + // Handle stream format (line-delimited JSON) + for line in output.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + match serde_json::from_str::(line) { + Ok(match_obj) => { + if matches.len() >= max_matches { + truncated = true; + break; + } + matches.push(match_obj); + } + Err(e) => { + debug!("Failed to parse JSON line '{}': {}", line, e); + // Try to parse the entire output as a single JSON array + match serde_json::from_str::>(output) { + Ok(array_matches) => { + let take_count = array_matches.len().min(max_matches); + let total_count = array_matches.len(); + matches = array_matches.into_iter().take(take_count).collect(); + truncated = take_count < total_count; + break; + } + Err(e2) => { + return Err(anyhow!( + "Failed to parse ast-grep output as line-delimited JSON or JSON array. Line error: {}, Array error: {}", + e, e2 + )); + } + } + } + } + } + + Ok((matches, truncated)) +} + +/// Check if ast-grep is available and provide installation hints if not +async fn check_ast_grep_available() -> Result<()> { + match Command::new("ast-grep") + .arg("--version") + .output() + .await + { + Ok(output) => { + if output.status.success() { + let version = String::from_utf8_lossy(&output.stdout); + info!("Found ast-grep: {}", version.trim()); + Ok(()) + } else { + Err(anyhow!("ast-grep command failed: {}", String::from_utf8_lossy(&output.stderr))) + } + } + Err(_) => { + Err(anyhow!( + "ast-grep not found. Please install it using one of these methods:\n\n\ + • Homebrew (macOS): brew install ast-grep\n\ + • MacPorts (macOS): sudo port install ast-grep\n\ + • Nix: nix-env -iA nixpkgs.ast-grep\n\ + • Cargo: cargo install ast-grep\n\ + • npm: npm install -g @ast-grep/cli\n\ + • pip: pip install ast-grep\n\n\ + For more installation options, visit: https://ast-grep.github.io/guide/quick-start.html" + )) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_validate_pattern_search() { + let search = SearchSpec { + name: "test".to_string(), + mode: SearchMode::Pattern, + pattern: Some("fn $NAME() {}".to_string()), + language: Some("rust".to_string()), + rule_yaml: None, + paths: None, + globs: None, + json_style: None, + context: None, + threads: None, + include_metadata: None, + no_ignore: None, + severity: None, + timeout_secs: None, + }; + + assert!(validate_search_spec(&search).is_ok()); + } + + #[test] + fn test_validate_yaml_search() { + let yaml_rule = r#" +id: test-rule +language: Rust +rule: + pattern: "fn $NAME() {}" +"#; + + let search = SearchSpec { + name: "test".to_string(), + mode: SearchMode::Yaml, + pattern: None, + language: None, + rule_yaml: Some(yaml_rule.to_string()), + paths: None, + globs: None, + json_style: None, + context: None, + threads: None, + include_metadata: None, + no_ignore: None, + severity: None, + timeout_secs: None, + }; + + assert!(validate_search_spec(&search).is_ok()); + } + + #[test] + fn test_build_pattern_command() { + let search = SearchSpec { + name: "test".to_string(), + mode: SearchMode::Pattern, + pattern: Some("fn $NAME() {}".to_string()), + language: Some("rust".to_string()), + rule_yaml: None, + paths: Some(vec!["src/".to_string()]), + globs: None, + json_style: Some(JsonStyle::Stream), + context: Some(2), + threads: Some(4), + include_metadata: None, + no_ignore: None, + severity: None, + timeout_secs: None, + }; + + let cmd = build_ast_grep_command(&search).unwrap(); + + assert_eq!(cmd[0], "ast-grep"); + assert_eq!(cmd[1], "run"); + assert!(cmd.contains(&"-p".to_string())); + assert!(cmd.contains(&"fn $NAME() {}".to_string())); + assert!(cmd.contains(&"-l".to_string())); + assert!(cmd.contains(&"rust".to_string())); + assert!(cmd.contains(&"--json=stream".to_string())); + assert!(cmd.contains(&"-C".to_string())); + assert!(cmd.contains(&"2".to_string())); + assert!(cmd.contains(&"-j".to_string())); + assert!(cmd.contains(&"4".to_string())); + assert!(cmd.contains(&"src/".to_string())); + } + + #[test] + fn test_parse_stream_json() { + let output = r#"{"file":"test.rs","text":"fn hello() {}"} +{"file":"test2.rs","text":"fn world() {}"}"#; + + let (matches, truncated) = parse_ast_grep_output(output, 10).unwrap(); + + assert_eq!(matches.len(), 2); + assert!(!truncated); + assert_eq!(matches[0]["file"], "test.rs"); + assert_eq!(matches[1]["file"], "test2.rs"); + } + + #[test] + fn test_parse_truncated_output() { + let output = r#"{"file":"test1.rs","text":"fn a() {}"} +{"file":"test2.rs","text":"fn b() {}"} +{"file":"test3.rs","text":"fn c() {}"}"#; + + let (matches, truncated) = parse_ast_grep_output(output, 2).unwrap(); + + assert_eq!(matches.len(), 2); + assert!(truncated); + } +} diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 1b8b12e..e8f7569 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -2,6 +2,7 @@ pub mod error_handling; pub mod project; pub mod task_result; pub mod ui_writer; +pub mod code_search; pub use task_result::TaskResult; #[cfg(test)] @@ -1184,10 +1185,26 @@ The tool will execute immediately and you'll receive the result (success or erro # Instructions 1. Analyze the request and break down into smaller tasks if appropriate -2. Execute ONE tool at a time +2. Execute ONE tool at a time. An exception exists for when you're writing files. See below. 3. STOP when the original request was satisfied 4. Call the final_output tool when done +Exception to using ONE tool at a time: +If all you’re doing is WRITING files, and you don’t need to do anything else between each step. +You can issue MULTIPLE write_file tool calls in a request, however you may ONLY make a SINGLE write_file call for any file in that request. +For example you may call: +[START OF REQUEST] +write_file(\"helper.rs\", \"...\") +write_file(\"file2.txt\", \"...\") +[DONE] + +But NOT: +[START OF REQUEST] +write_file(\"helper.rs\", \"...\") +write_file(\"file2.txt\", \"...\") +write_file(\"helper.rs\", \"...\") +[DONE] + # Task Management Use todo_read and todo_write for tasks with 3+ steps, multiple files/components, or uncertain scope. @@ -1860,6 +1877,64 @@ Template: }), }, ]; + + // Add code_search tool + tools.push(Tool { + name: "code_search".to_string(), + description: "Batch syntax-aware searches via ast-grep. Supports up to 20 pattern or YAML-rule searches in parallel; returns JSON matches (stream-collated).".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "searches": { + "type": "array", + "maxItems": 20, + "items": { + "type": "object", + "properties": { + "name": { "type": "string", "description": "Label for this search." }, + "mode": { + "type": "string", + "enum": ["pattern", "yaml"], + "description": "`pattern` uses `ast-grep run`; `yaml` uses `ast-grep scan --inline-rules`." + }, + // pattern mode (fast one-off) + "pattern": { "type": "string", "description": "ast-grep pattern code (e.g., \"async fn $NAME($$$ARGS) { $$$ }\")"}, + "language": { "type": "string", "description": "Optional language for pattern mode; ast-grep may infer from file extensions if omitted." }, + // yaml mode (full rule object) + "rule_yaml": { "type": "string", "description": "A full YAML rule object text. Must include `id`, `language`, and `rule`." }, + // targeting + "paths": { "type": "array", "items": { "type": "string" }, "description": "Paths/dirs to search. Defaults to current dir if empty." }, + "globs": { "type": "array", "items": { "type": "string" }, "description": "Optional include/exclude globs for CLI --globs." }, + // result formatting & performance knobs + "json_style": { "type": "string", "enum": ["pretty","stream","compact"], "default": "stream", "description": "Use stream for large codebases." }, + "context": { "type": "integer", "minimum": 0, "maximum": 20, "default": 0, "description": "CLI -C context lines in text output; also affects JSON `lines` field." }, + "threads": { "type": "integer", "minimum": 1, "description": "Optional override for ast-grep -j (per process)." }, + "include_metadata": { "type": "boolean", "default": false, "description": "If yaml mode and rule has metadata, add --include-metadata." }, + // robustness + "no_ignore": { + "type": "array", + "items": { "type": "string", "enum": ["hidden","dot","exclude","global","parent","vcs"] }, + "description": "Forwarded to --no-ignore to bypass ignore files/hidden." + }, + // severity overrides for yaml mode + "severity": { + "type": "object", + "additionalProperties": { "type": "string", "enum": ["error","warning","info","hint","off"] }, + "description": "Optional map -> passed via --error/--warning/--info/--hint/--off." + }, + // per-search timeout seconds (default 60) + "timeout_secs": { "type": "integer", "minimum": 1, "default": 60 } + }, + "required": ["name","mode"] + } + }, + // global concurrency & truncation + "max_concurrency": { "type": "integer", "minimum": 1, "default": 4 }, + "max_matches_per_search": { "type": "integer", "minimum": 1, "default": 500 } + }, + "required": ["searches"] + }), + }); // Add WebDriver tools if enabled if enable_webdriver { @@ -4437,6 +4512,41 @@ Template: Ok("❌ Computer control not enabled. Set computer_control.enabled = true in config.".to_string()) } } + "code_search" => { + debug!("Processing code_search tool call"); + + // Parse the request + let request: crate::code_search::CodeSearchRequest = match serde_json::from_value(tool_call.args.clone()) { + Ok(req) => req, + Err(e) => { + return Ok(format!("❌ Invalid code_search arguments: {}", e)); + } + }; + + // Execute the code search + match crate::code_search::execute_code_search(request).await { + Ok(response) => { + // Serialize the response to JSON + match serde_json::to_string_pretty(&response) { + Ok(json_output) => { + Ok(format!("✅ Code search completed\n{}", json_output)) + } + Err(e) => { + Ok(format!("❌ Failed to serialize response: {}", e)) + } + } + } + Err(e) => { + // Check if it's an ast-grep not found error and provide helpful message + let error_msg = e.to_string(); + if error_msg.contains("ast-grep not found") { + Ok(format!("❌ {}", error_msg)) + } else { + Ok(format!("❌ Code search failed: {}", error_msg)) + } + } + } + } _ => { warn!("Unknown tool: {}", tool_call.tool); Ok(format!("❓ Unknown tool: {}", tool_call.tool)) From 982c0bbfb3d4e1bade8588d43218cf694a552974 Mon Sep 17 00:00:00 2001 From: Jochen Date: Sat, 1 Nov 2025 15:52:08 +1100 Subject: [PATCH 2/3] amend instructions for tool use --- crates/g3-core/src/code_search.rs | 4 ++-- crates/g3-core/src/lib.rs | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/crates/g3-core/src/code_search.rs b/crates/g3-core/src/code_search.rs index 1e4d38f..e4f7153 100644 --- a/crates/g3-core/src/code_search.rs +++ b/crates/g3-core/src/code_search.rs @@ -169,7 +169,7 @@ pub async fn execute_code_search(request: CodeSearchRequest) -> Result Search }; debug!("Executing ast-grep command: {:?}", cmd_args); - + // Execute with timeout let timeout_duration = Duration::from_secs(timeout_secs); diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index e8f7569..2e33655 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1154,6 +1154,8 @@ The tool will execute immediately and you'll receive the result (success or erro # Available Tools +Short description for providers without native calling specs: + - **shell**: Execute shell commands - Format: {\"tool\": \"shell\", \"args\": {\"command\": \"your_command_here\"} - Example: {\"tool\": \"shell\", \"args\": {\"command\": \"ls ~/Downloads\"} @@ -1182,6 +1184,16 @@ The tool will execute immediately and you'll receive the result (success or erro - Format: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Task 1\\n- [ ] Task 2\"}} - Example: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Implement feature\\n - [ ] Write tests\\n - [ ] Run tests\"}} +- **code_search**: Batch syntax-aware searches via ast-grep. Supports up to 20 pattern or YAML-rule searches in parallel. + - Format: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"search_label\", \"mode\": \"pattern|yaml\", ...}], \"max_concurrency\": 4, \"max_matches_per_search\": 500}} + - Example for pattern mode: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_functions\", \"mode\": \"pattern\", \"pattern\": \"fn $NAME($$$ARGS) { $$$ }\", \"language\": \"rust\", \"paths\": [\"src/\"]}]}} + - Example for YAML mode: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_async\", \"mode\": \"yaml\", \"rule_yaml\": \"id: async-fn\nlanguage: Rust\nrule:\n pattern: async fn $NAME($$$) { $$$ }\"}]}} + - Example for multiple searches: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"mode\": \"pattern\", \"pattern\": \"fn $NAME\", \"language\": \"rust\"}, {\"name\": \"structs\", \"mode\": \"pattern\", \"pattern\": \"struct $NAME\", \"language\": \"rust\"}]}} + - Example for passing optional args like \"context\": {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"mode\": \"pattern\", \"context\": 3, \"pattern\": \"fn $NAME\", \"language\": \"rust\"}]} + - Common optional args for searches: + - \"context\": 3 (show surrounding lines), + - \"json_style\": \"stream\" (for large results) + # Instructions 1. Analyze the request and break down into smaller tasks if appropriate @@ -1189,6 +1201,8 @@ The tool will execute immediately and you'll receive the result (success or erro 3. STOP when the original request was satisfied 4. Call the final_output tool when done +For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense. + Exception to using ONE tool at a time: If all you’re doing is WRITING files, and you don’t need to do anything else between each step. You can issue MULTIPLE write_file tool calls in a request, however you may ONLY make a SINGLE write_file call for any file in that request. From a1ad94ed752002388f1af5072487459377d61d91 Mon Sep 17 00:00:00 2001 From: Jochen Date: Sun, 2 Nov 2025 21:02:43 +1100 Subject: [PATCH 3/3] Added comment & example for native flow detailed examples for using code_search tool for native tool use. --- crates/g3-core/src/lib.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 2e33655..ed7fc7f 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1105,6 +1105,18 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request: For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\". If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir. +For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense. + +Additional examples for the 'code_search' tool: + - Example for pattern mode: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_functions\", \"mode\": \"pattern\", \"pattern\": \"fn $NAME($$$ARGS) { $$$ }\", \"language\": \"rust\", \"paths\": [\"src/\"]}]}} + - Example for YAML mode: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_async\", \"mode\": \"yaml\", \"rule_yaml\": \"id: async-fn\nlanguage: Rust\nrule:\n pattern: async fn $NAME($$$) { $$$ }\"}]}} + - Example for multiple searches: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"mode\": \"pattern\", \"pattern\": \"fn $NAME\", \"language\": \"rust\"}, {\"name\": \"structs\", \"mode\": \"pattern\", \"pattern\": \"struct $NAME\", \"language\": \"rust\"}]}} + - Example for passing optional args like \"context\": {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"mode\": \"pattern\", \"context\": 3, \"pattern\": \"fn $NAME\", \"language\": \"rust\"}]} + - Common optional args for searches: + - \"context\": 3 (show surrounding lines), + - \"json_style\": \"stream\" (for large results) + + IMPORTANT: If the user asks you to just respond with text (like \"just say hello\" or \"tell me about X\"), do NOT use tools. Simply respond with the requested text directly. Only use tools when you need to execute commands or complete tasks that require action. When taking screenshots of specific windows (like \"my Safari window\" or \"my terminal\"), ALWAYS use list_windows first to identify the correct window ID, then use take_screenshot with the window_id parameter.