diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs new file mode 100644 index 0000000..a16f194 --- /dev/null +++ b/crates/g3-core/src/context_window.rs @@ -0,0 +1,828 @@ +//! Context window management for conversation history and token tracking. +//! +//! This module handles: +//! - Token counting and usage tracking +//! - Conversation history management +//! - Context thinning (reducing context size by saving large tool results to disk) +//! - Summarization triggers + +use g3_providers::{Message, MessageRole, Usage}; +use serde::{Deserialize, Serialize}; +use tracing::{debug, warn}; + +use crate::paths::get_thinned_dir; +use crate::ToolCall; + +/// Scope for context thinning operations +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ThinScope { + /// Process only the first third of messages (incremental thinning) + FirstThird, + /// Process all messages (aggressive thinning, aka "skinnify") + All, +} + +impl ThinScope { + fn label(&self) -> &'static str { + match self { + ThinScope::FirstThird => "thinned", + ThinScope::All => "skinnified", + } + } + + fn emoji(&self) -> &'static str { + match self { + ThinScope::FirstThird => "🥒", + ThinScope::All => "🦴", + } + } + + fn file_prefix(&self) -> &'static str { + match self { + ThinScope::FirstThird => "leaned", + ThinScope::All => "skinny", + } + } + + fn error_action(&self) -> &'static str { + match self { + ThinScope::FirstThird => "thinning", + ThinScope::All => "skinnifying", + } + } +} + +/// Represents a modification to be applied to a message +#[derive(Debug)] +enum ThinModification { + /// Replace the entire message content + ReplaceContent { index: usize, new_content: String, chars_saved: usize }, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ContextWindow { + pub used_tokens: u32, + pub total_tokens: u32, + pub cumulative_tokens: u32, // Track cumulative tokens across all interactions + pub conversation_history: Vec, + pub last_thinning_percentage: u32, // Track the last percentage at which we thinned +} + +impl ContextWindow { + pub fn new(total_tokens: u32) -> Self { + Self { + used_tokens: 0, + total_tokens, + cumulative_tokens: 0, + conversation_history: Vec::new(), + last_thinning_percentage: 0, + } + } + + pub fn add_message(&mut self, message: Message) { + self.add_message_with_tokens(message, None); + } + + /// Add a message with optional token count from the provider + pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option) { + // Skip messages with empty content to avoid API errors + if message.content.trim().is_empty() { + warn!("Skipping empty message to avoid API error"); + return; + } + + // Use provided token count if available, otherwise estimate + let token_count = tokens.unwrap_or_else(|| Self::estimate_tokens(&message.content)); + self.used_tokens += token_count; + self.cumulative_tokens += token_count; + self.conversation_history.push(message); + + debug!( + "Added message with {} tokens (used: {}/{}, cumulative: {})", + token_count, self.used_tokens, self.total_tokens, self.cumulative_tokens + ); + } + + /// Update token usage from provider response + /// NOTE: This only updates cumulative_tokens (total API usage tracking). + /// It does NOT update used_tokens because: + /// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message) + /// 2. completion_tokens will be tracked when the assistant message is added via add_message + /// Adding total_tokens here would cause double/triple counting and break the 80% threshold check. + pub fn update_usage_from_response(&mut self, usage: &Usage) { + // Only update cumulative tokens for API usage tracking + // Do NOT update used_tokens - that's tracked via add_message to avoid double counting + self.cumulative_tokens += usage.total_tokens; + + debug!( + "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})", + usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens + ); + } + + /// More accurate token estimation + pub fn estimate_tokens(text: &str) -> u32 { + // Better heuristic: + // - Average English text: ~4 characters per token + // - Code/JSON: ~3 characters per token (more symbols) + // - Add 10% buffer for safety + let base_estimate = if text.contains("{") || text.contains("```") || text.contains("fn ") { + (text.len() as f32 / 3.0).ceil() as u32 // Code/JSON + } else { + (text.len() as f32 / 4.0).ceil() as u32 // Regular text + }; + (base_estimate as f32 * 1.1).ceil() as u32 // Add 10% buffer + } + + pub fn update_usage(&mut self, usage: &Usage) { + // Deprecated: Use update_usage_from_response instead + self.update_usage_from_response(usage); + } + + /// Update cumulative token usage (for streaming) when no provider usage data is available + /// NOTE: This only updates cumulative_tokens, not used_tokens. + /// The assistant message will be added via add_message which tracks used_tokens. + pub fn add_streaming_tokens(&mut self, new_tokens: u32) { + // Only update cumulative tokens - used_tokens is tracked via add_message + self.cumulative_tokens += new_tokens; + debug!( + "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})", + new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens + ); + } + + pub fn percentage_used(&self) -> f32 { + if self.total_tokens == 0 { + 0.0 + } else { + (self.used_tokens as f32 / self.total_tokens as f32) * 100.0 + } + } + + /// Clear the conversation history while preserving system messages + /// Used by /clear command to start fresh + pub fn clear_conversation(&mut self) { + // Keep only system messages (system prompt, README, etc.) + let system_messages: Vec = self + .conversation_history + .iter() + .filter(|m| matches!(m.role, MessageRole::System)) + .cloned() + .collect(); + + self.conversation_history = system_messages; + self.used_tokens = self + .conversation_history + .iter() + .map(|m| Self::estimate_tokens(&m.content)) + .sum(); + self.last_thinning_percentage = 0; + } + + pub fn remaining_tokens(&self) -> u32 { + self.total_tokens.saturating_sub(self.used_tokens) + } + + /// Check if we should trigger summarization (at 80% capacity) + pub fn should_summarize(&self) -> bool { + // Trigger at 80% OR if we're getting close to absolute limits + // This prevents issues with models that have large contexts but still hit limits + let percentage_trigger = self.percentage_used() >= 80.0; + + // Also trigger if we're approaching common token limits + // Most models start having issues around 150k tokens + let absolute_trigger = self.used_tokens > 150_000; + + percentage_trigger || absolute_trigger + } + + /// Create a summary request prompt for the current conversation + pub fn create_summary_prompt(&self) -> String { + "Please provide a comprehensive summary of our conversation so far. Include: + +1. **Main Topic/Goal**: What is the primary task or objective being worked on? +2. **Key Decisions**: What important decisions have been made? +3. **Actions Taken**: What specific actions, commands, or code changes have been completed? +4. **Current State**: What is the current status of the work? +5. **Important Context**: Any critical information, file paths, configurations, or constraints that should be remembered? +6. **Pending Items**: What remains to be done or what was the user's last request? + +Format this as a detailed but concise summary that can be used to resume the conversation from scratch while maintaining full context.".to_string() + } + + /// Reset the context window with a summary + /// Preserves the original system prompt as the first message + pub fn reset_with_summary( + &mut self, + summary: String, + latest_user_message: Option, + ) -> usize { + // Calculate chars saved (old history minus new summary) + let old_chars: usize = self + .conversation_history + .iter() + .map(|m| m.content.len()) + .sum(); + + // Preserve the original system prompt (first message) and optionally the README (second message) + let original_system_prompt = self.conversation_history.first().cloned(); + let readme_message = self.conversation_history.get(1).and_then(|msg| { + if matches!(msg.role, MessageRole::System) + && (msg.content.contains("Project README") + || msg.content.contains("Agent Configuration")) + { + Some(msg.clone()) + } else { + None + } + }); + + // Clear the conversation history + self.conversation_history.clear(); + self.used_tokens = 0; + + // Re-add the original system prompt first (critical invariant) + if let Some(system_prompt) = original_system_prompt { + self.add_message(system_prompt); + } + + // Re-add the README message if it existed + if let Some(readme) = readme_message { + self.add_message(readme); + } + + // Add the summary as a system message + let summary_message = Message::new( + MessageRole::System, + format!("Previous conversation summary:\n\n{}", summary), + ); + self.add_message(summary_message); + + // Add the latest user message if provided + if let Some(user_msg) = latest_user_message { + self.add_message(Message::new(MessageRole::User, user_msg)); + } + + let new_chars: usize = self + .conversation_history + .iter() + .map(|m| m.content.len()) + .sum(); + old_chars.saturating_sub(new_chars) + } + + /// Check if we should trigger context thinning + /// Triggers at 50%, 60%, 70%, and 80% thresholds + pub fn should_thin(&self) -> bool { + let current_percentage = self.percentage_used() as u32; + + // Check if we've crossed a new 10% threshold starting at 50% + if current_percentage >= 50 { + let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10% + if current_threshold > self.last_thinning_percentage && current_threshold <= 80 { + return true; + } + } + + false + } + + /// Perform context thinning: scan messages and replace large tool results with file references. + /// + /// # Arguments + /// * `session_id` - If provided, thinned content is saved to .g3/session//thinned/ + /// * `scope` - Controls which messages to process (first third or all) + /// + /// # Returns + /// A tuple of (summary message, chars saved) + pub fn thin_context_with_scope( + &mut self, + session_id: Option<&str>, + scope: ThinScope, + ) -> (String, usize) { + let current_percentage = self.percentage_used() as u32; + + // Only update last_thinning_percentage for incremental thinning + if scope == ThinScope::FirstThird { + let current_threshold = (current_percentage / 10) * 10; + self.last_thinning_percentage = current_threshold; + } + + // Calculate message range based on scope + let total_messages = self.conversation_history.len(); + let end_index = match scope { + ThinScope::FirstThird => (total_messages / 3).max(1), + ThinScope::All => total_messages, + }; + + // Determine output directory: use session dir if available, otherwise ~/tmp + let tmp_dir = match Self::resolve_thinned_dir(session_id, scope) { + Ok(dir) => dir, + Err(msg) => return (msg, 0), + }; + + // Collect modifications to apply (avoids borrow checker issues) + let modifications = self.collect_thin_modifications(end_index, &tmp_dir, scope.file_prefix()); + + // Count results + let mut leaned_count = 0; + let mut tool_call_leaned_count = 0; + let mut chars_saved = 0; + + // Apply modifications + for modification in &modifications { + match modification { + ThinModification::ReplaceContent { index, new_content, chars_saved: saved } => { + if let Some(msg) = self.conversation_history.get_mut(*index) { + // Determine if this was a tool result or tool call based on content + if msg.content.starts_with("Tool result:") { + leaned_count += 1; + } else { + tool_call_leaned_count += 1; + } + msg.content = new_content.clone(); + chars_saved += saved; + } + } + } + } + + // Recalculate token usage after thinning + self.recalculate_tokens(); + + // Build result message + self.build_thin_result_message( + scope, + current_percentage, + leaned_count, + tool_call_leaned_count, + chars_saved, + ) + } + + /// Collect all modifications needed for thinning without mutating + fn collect_thin_modifications( + &self, + end_index: usize, + tmp_dir: &str, + file_prefix: &str, + ) -> Vec { + let mut modifications = Vec::new(); + + for i in 0..end_index { + if let Some(message) = self.conversation_history.get(i) { + // Check if the previous message was a TODO tool call + let is_todo_result = self.is_todo_tool_result(i); + + // Process User messages that look like tool results + if matches!(message.role, MessageRole::User) + && message.content.starts_with("Tool result:") + && !is_todo_result + && message.content.len() > 500 + { + if let Some(modification) = Self::create_tool_result_modification( + &message.content, + i, + tmp_dir, + file_prefix, + ) { + modifications.push(modification); + } + } + + // Process Assistant messages that contain tool calls with large arguments + if matches!(message.role, MessageRole::Assistant) { + if let Some(modification) = Self::create_tool_call_modification( + &message.content, + i, + tmp_dir, + file_prefix, + ) { + modifications.push(modification); + } + } + } + } + + modifications + } + + /// Backward-compatible wrapper for thin_context (first third only) + pub fn thin_context(&mut self, session_id: Option<&str>) -> (String, usize) { + self.thin_context_with_scope(session_id, ThinScope::FirstThird) + } + + /// Backward-compatible wrapper for thin_context_all (entire history) + pub fn thin_context_all(&mut self, session_id: Option<&str>) -> (String, usize) { + self.thin_context_with_scope(session_id, ThinScope::All) + } + + /// Resolve the directory for storing thinned content + fn resolve_thinned_dir(session_id: Option<&str>, scope: ThinScope) -> Result { + if let Some(sid) = session_id { + let thinned_dir = get_thinned_dir(sid); + if let Err(e) = std::fs::create_dir_all(&thinned_dir) { + warn!("Failed to create thinned directory: {}", e); + return Err(format!( + "⚠️ Context {} failed: could not create thinned directory", + scope.error_action() + )); + } + Ok(thinned_dir.to_string_lossy().to_string()) + } else { + let fallback_dir = shellexpand::tilde("~/tmp").to_string(); + if let Err(e) = std::fs::create_dir_all(&fallback_dir) { + warn!("Failed to create ~/tmp directory: {}", e); + return Err(format!( + "⚠️ Context {} failed: could not create ~/tmp directory", + scope.error_action() + )); + } + Ok(fallback_dir) + } + } + + /// Check if message at index i is a result of a TODO tool call + fn is_todo_tool_result(&self, i: usize) -> bool { + if i == 0 { + return false; + } + + if let Some(prev_message) = self.conversation_history.get(i - 1) { + if matches!(prev_message.role, MessageRole::Assistant) { + return prev_message.content.contains(r#""tool":"todo_read""#) + || prev_message.content.contains(r#""tool":"todo_write""#) + || prev_message.content.contains(r#""tool": "todo_read""#) + || prev_message.content.contains(r#""tool": "todo_write""#); + } + } + false + } + + /// Create a modification for thinning a tool result message + fn create_tool_result_modification( + content: &str, + index: usize, + tmp_dir: &str, + file_prefix: &str, + ) -> Option { + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let filename = format!("{}_tool_result_{}_{}.txt", file_prefix, timestamp, index); + let file_path = format!("{}/{}", tmp_dir, filename); + + if let Err(e) = std::fs::write(&file_path, content) { + warn!("Failed to write thinned content to {}: {}", file_path, e); + return None; + } + + let original_len = content.len(); + let new_content = format!("Tool result saved to {}", file_path); + let chars_saved = original_len - new_content.len(); + + debug!( + "Thinned tool result {} ({} chars) to {}", + index, original_len, file_path + ); + + Some(ThinModification::ReplaceContent { + index, + new_content, + chars_saved, + }) + } + + /// Create a modification for thinning tool calls in an assistant message + fn create_tool_call_modification( + content: &str, + index: usize, + tmp_dir: &str, + file_prefix: &str, + ) -> Option { + // Look for JSON tool call patterns + let tool_call_start = content + .find(r#"{"tool":"#) + .or_else(|| content.find(r#"{ "tool":"#)) + .or_else(|| content.find(r#"{"tool" :"#)) + .or_else(|| content.find(r#"{ "tool" :"#))?; + + let json_portion = &content[tool_call_start..]; + let json_end = Self::find_json_end(json_portion)?; + let json_str = &json_portion[..=json_end]; + + let mut tool_call: ToolCall = serde_json::from_str(json_str).ok()?; + let mut modified = false; + let mut chars_saved = 0; + + // Handle write_file tool calls + if tool_call.tool == "write_file" { + if let Some((saved, new_args)) = + Self::thin_write_file_args(&tool_call.args, index, tmp_dir, file_prefix) + { + tool_call.args = new_args; + modified = true; + chars_saved += saved; + } + } + + // Handle str_replace tool calls + if tool_call.tool == "str_replace" { + if let Some((saved, new_args)) = + Self::thin_str_replace_args(&tool_call.args, index, tmp_dir, file_prefix) + { + tool_call.args = new_args; + modified = true; + chars_saved += saved; + } + } + + if !modified { + return None; + } + + // Reconstruct the message + let prefix = &content[..tool_call_start]; + let suffix = &content[tool_call_start + json_str.len()..]; + let new_json = serde_json::to_string(&tool_call).ok()?; + let new_content = format!("{}{}{}", prefix, new_json, suffix); + + Some(ThinModification::ReplaceContent { + index, + new_content, + chars_saved, + }) + } + + /// Thin write_file args by saving content to file + /// Returns (chars_saved, new_args) if thinned + fn thin_write_file_args( + args: &serde_json::Value, + index: usize, + tmp_dir: &str, + file_prefix: &str, + ) -> Option<(usize, serde_json::Value)> { + let args_obj = args.as_object()?; + let content_str = args_obj.get("content")?.as_str()?; + let content_len = content_str.len(); + + if content_len <= 500 { + return None; + } + + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let filename = format!("{}_write_file_content_{}_{}.txt", file_prefix, timestamp, index); + let file_path = format!("{}/{}", tmp_dir, filename); + + if std::fs::write(&file_path, content_str).is_err() { + return None; + } + + let mut new_args = args_obj.clone(); + new_args.insert( + "content".to_string(), + serde_json::Value::String(format!("", file_path)), + ); + + debug!( + "Thinned write_file content {} ({} chars) to {}", + index, content_len, file_path + ); + + Some((content_len, serde_json::Value::Object(new_args))) + } + + /// Thin str_replace args by saving diff to file + /// Returns (chars_saved, new_args) if thinned + fn thin_str_replace_args( + args: &serde_json::Value, + index: usize, + tmp_dir: &str, + file_prefix: &str, + ) -> Option<(usize, serde_json::Value)> { + let args_obj = args.as_object()?; + let diff_str = args_obj.get("diff")?.as_str()?; + let diff_len = diff_str.len(); + + if diff_len <= 500 { + return None; + } + + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let filename = format!("{}_str_replace_diff_{}_{}.txt", file_prefix, timestamp, index); + let file_path = format!("{}/{}", tmp_dir, filename); + + if std::fs::write(&file_path, diff_str).is_err() { + return None; + } + + let mut new_args = args_obj.clone(); + new_args.insert( + "diff".to_string(), + serde_json::Value::String(format!("", file_path)), + ); + + debug!( + "Thinned str_replace diff {} ({} chars) to {}", + index, diff_len, file_path + ); + + Some((diff_len, serde_json::Value::Object(new_args))) + } + + /// Build the result message for thinning operations + fn build_thin_result_message( + &self, + scope: ThinScope, + current_percentage: u32, + leaned_count: usize, + tool_call_leaned_count: usize, + chars_saved: usize, + ) -> (String, usize) { + let emoji = scope.emoji(); + let label = scope.label(); + let scope_desc = match scope { + ThinScope::FirstThird => "", + ThinScope::All => " across entire history", + }; + + if leaned_count > 0 && tool_call_leaned_count > 0 { + ( + format!( + "{} Context {} at {}%: {} tool results + {} tool calls{}, ~{} chars saved", + emoji, label, current_percentage, leaned_count, tool_call_leaned_count, scope_desc, chars_saved + ), + chars_saved, + ) + } else if leaned_count > 0 { + ( + format!( + "{} Context {} at {}%: {} tool results{}, ~{} chars saved", + emoji, label, current_percentage, leaned_count, scope_desc, chars_saved + ), + chars_saved, + ) + } else if tool_call_leaned_count > 0 { + ( + format!( + "{} Context {} at {}%: {} tool calls{}, ~{} chars saved", + emoji, label, current_percentage, tool_call_leaned_count, scope_desc, chars_saved + ), + chars_saved, + ) + } else { + ( + format!( + "ℹ Context {} triggered at {}% but no large tool results or tool calls found{}", + scope.error_action(), current_percentage, scope_desc + ), + 0, + ) + } + } + + /// Recalculate token usage based on current conversation history + fn recalculate_tokens(&mut self) { + let mut total = 0; + for message in &self.conversation_history { + total += Self::estimate_tokens(&message.content); + } + self.used_tokens = total; + + debug!("Recalculated tokens after thinning: {} tokens", total); + } + + /// Helper function to find the end of a JSON object + pub fn find_json_end(json_str: &str) -> Option { + let mut brace_count = 0; + let mut in_string = false; + let mut escape_next = false; + + for (i, ch) in json_str.char_indices() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => escape_next = true, + '"' if !escape_next => in_string = !in_string, + '{' if !in_string => brace_count += 1, + '}' if !in_string => { + brace_count -= 1; + if brace_count == 0 { + return Some(i); + } + } + _ => {} + } + } + + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_context_window() { + let cw = ContextWindow::new(100_000); + assert_eq!(cw.used_tokens, 0); + assert_eq!(cw.total_tokens, 100_000); + assert_eq!(cw.cumulative_tokens, 0); + assert!(cw.conversation_history.is_empty()); + } + + #[test] + fn test_percentage_used() { + let mut cw = ContextWindow::new(100); + cw.used_tokens = 50; + assert!((cw.percentage_used() - 50.0).abs() < 0.01); + } + + #[test] + fn test_remaining_tokens() { + let mut cw = ContextWindow::new(100); + cw.used_tokens = 30; + assert_eq!(cw.remaining_tokens(), 70); + } + + #[test] + fn test_should_summarize_at_80_percent() { + let mut cw = ContextWindow::new(100); + cw.used_tokens = 79; + assert!(!cw.should_summarize()); + cw.used_tokens = 80; + assert!(cw.should_summarize()); + } + + #[test] + fn test_should_summarize_at_absolute_limit() { + let mut cw = ContextWindow::new(1_000_000); + cw.used_tokens = 150_001; + assert!(cw.should_summarize()); + } + + #[test] + fn test_should_thin_thresholds() { + let mut cw = ContextWindow::new(100); + + // Below 50% - should not thin + cw.used_tokens = 49; + assert!(!cw.should_thin()); + + // At 50% - should thin (first time) + cw.used_tokens = 50; + assert!(cw.should_thin()); + + // After thinning at 50%, shouldn't thin again until 60% + cw.last_thinning_percentage = 50; + cw.used_tokens = 55; + assert!(!cw.should_thin()); + + // At 60% - should thin again + cw.used_tokens = 60; + assert!(cw.should_thin()); + } + + #[test] + fn test_estimate_tokens_regular_text() { + let text = "Hello world, this is a test."; + let tokens = ContextWindow::estimate_tokens(text); + // ~28 chars / 4 * 1.1 = ~8 tokens + assert!(tokens > 0 && tokens < 20); + } + + #[test] + fn test_estimate_tokens_code() { + let code = "fn main() { println!(\"hello\"); }"; + let tokens = ContextWindow::estimate_tokens(code); + // Code uses 3 chars per token estimate + assert!(tokens > 0); + } + + #[test] + fn test_find_json_end() { + assert_eq!(ContextWindow::find_json_end("{}"), Some(1)); + assert_eq!(ContextWindow::find_json_end(r#"{"a": 1}"#), Some(7)); + assert_eq!(ContextWindow::find_json_end(r#"{"a": {"b": 2}}"#), Some(14)); + assert_eq!(ContextWindow::find_json_end("{incomplete"), None); + } + + #[test] + fn test_thin_scope_properties() { + assert_eq!(ThinScope::FirstThird.emoji(), "🥒"); + assert_eq!(ThinScope::All.emoji(), "🦴"); + assert_eq!(ThinScope::FirstThird.label(), "thinned"); + assert_eq!(ThinScope::All.label(), "skinnified"); + } +} diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 4a7adf3..7dee95d 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1,3 +1,4 @@ +pub mod context_window; pub mod background_process; pub mod code_search; pub mod error_handling; @@ -8,6 +9,7 @@ pub mod retry; pub mod session_continuation; pub mod streaming_parser; pub mod task_result; +pub mod tool_definitions; pub mod ui_writer; pub mod utils; pub mod webdriver_session; @@ -17,6 +19,9 @@ pub use retry::{RetryConfig, RetryResult, execute_with_retry, retry_operation}; pub use feedback_extraction::{ExtractedFeedback, FeedbackSource, FeedbackExtractionConfig, extract_coach_feedback}; pub use session_continuation::{SessionContinuation, load_continuation, save_continuation, clear_continuation, has_valid_continuation, get_session_dir, load_context_from_session_log}; +// Re-export context window types +pub use context_window::{ContextWindow, ThinScope}; + // Export agent prompt generation for CLI use pub use prompts::get_agent_system_prompt; @@ -35,12 +40,11 @@ use anyhow::Result; use g3_computer_control::WebDriverController; use g3_config::Config; use g3_execution::CodeExecutor; -use g3_providers::{CacheControl, CompletionRequest, Message, MessageRole, ProviderRegistry, Tool}; +use g3_providers::{CacheControl, CompletionRequest, Message, MessageRole, ProviderRegistry}; use prompts::{get_system_prompt_for_native, SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE}; #[allow(unused_imports)] use regex::Regex; use serde::{Deserialize, Serialize}; -use serde_json::json; use std::io::Write; use std::time::{Duration, Instant}; use tokio_util::sync::CancellationToken; @@ -82,762 +86,6 @@ pub enum StreamState { // Re-export StreamingToolParser from its own module pub use streaming_parser::StreamingToolParser; - -#[derive(Debug, Clone)] -pub struct ContextWindow { - pub used_tokens: u32, - pub total_tokens: u32, - pub cumulative_tokens: u32, // Track cumulative tokens across all interactions - pub conversation_history: Vec, - pub last_thinning_percentage: u32, // Track the last percentage at which we thinned -} - -impl ContextWindow { - pub fn new(total_tokens: u32) -> Self { - Self { - used_tokens: 0, - total_tokens, - cumulative_tokens: 0, - conversation_history: Vec::new(), - last_thinning_percentage: 0, - } - } - - pub fn add_message(&mut self, message: Message) { - self.add_message_with_tokens(message, None); - } - - /// Add a message with optional token count from the provider - pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option) { - // Skip messages with empty content to avoid API errors - if message.content.trim().is_empty() { - warn!("Skipping empty message to avoid API error"); - return; - } - - // Use provided token count if available, otherwise estimate - let token_count = tokens.unwrap_or_else(|| Self::estimate_tokens(&message.content)); - self.used_tokens += token_count; - self.cumulative_tokens += token_count; - self.conversation_history.push(message); - - debug!( - "Added message with {} tokens (used: {}/{}, cumulative: {})", - token_count, self.used_tokens, self.total_tokens, self.cumulative_tokens - ); - } - - /// Update token usage from provider response - /// NOTE: This only updates cumulative_tokens (total API usage tracking). - /// It does NOT update used_tokens because: - /// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message) - /// 2. completion_tokens will be tracked when the assistant message is added via add_message - /// Adding total_tokens here would cause double/triple counting and break the 80% threshold check. - pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) { - // Only update cumulative tokens for API usage tracking - // Do NOT update used_tokens - that's tracked via add_message to avoid double counting - self.cumulative_tokens += usage.total_tokens; - - debug!( - "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})", - usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens - ); - } - - /// More accurate token estimation - fn estimate_tokens(text: &str) -> u32 { - // Better heuristic: - // - Average English text: ~4 characters per token - // - Code/JSON: ~3 characters per token (more symbols) - // - Add 10% buffer for safety - let base_estimate = if text.contains("{") || text.contains("```") || text.contains("fn ") { - (text.len() as f32 / 3.0).ceil() as u32 // Code/JSON - } else { - (text.len() as f32 / 4.0).ceil() as u32 // Regular text - }; - (base_estimate as f32 * 1.1).ceil() as u32 // Add 10% buffer - } - - pub fn update_usage(&mut self, usage: &g3_providers::Usage) { - // Deprecated: Use update_usage_from_response instead - self.update_usage_from_response(usage); - } - - /// Update cumulative token usage (for streaming) when no provider usage data is available - /// NOTE: This only updates cumulative_tokens, not used_tokens. - /// The assistant message will be added via add_message which tracks used_tokens. - pub fn add_streaming_tokens(&mut self, new_tokens: u32) { - // Only update cumulative tokens - used_tokens is tracked via add_message - self.cumulative_tokens += new_tokens; - debug!( - "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})", - new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens - ); - } - - pub fn percentage_used(&self) -> f32 { - if self.total_tokens == 0 { - 0.0 - } else { - (self.used_tokens as f32 / self.total_tokens as f32) * 100.0 - } - } - - /// Clear the conversation history while preserving system messages - /// Used by /clear command to start fresh - pub fn clear_conversation(&mut self) { - // Keep only system messages (system prompt, README, etc.) - let system_messages: Vec = self.conversation_history - .iter() - .filter(|m| matches!(m.role, MessageRole::System)) - .cloned() - .collect(); - - self.conversation_history = system_messages; - self.used_tokens = self.conversation_history.iter() - .map(|m| Self::estimate_tokens(&m.content)) - .sum(); - self.last_thinning_percentage = 0; - } - - pub fn remaining_tokens(&self) -> u32 { - self.total_tokens.saturating_sub(self.used_tokens) - } - - /// Check if we should trigger summarization (at 80% capacity) - pub fn should_summarize(&self) -> bool { - // Trigger at 80% OR if we're getting close to absolute limits - // This prevents issues with models that have large contexts but still hit limits - let percentage_trigger = self.percentage_used() >= 80.0; - - // Also trigger if we're approaching common token limits - // Most models start having issues around 150k tokens - let absolute_trigger = self.used_tokens > 150_000; - - percentage_trigger || absolute_trigger - } - - /// Create a summary request prompt for the current conversation - pub fn create_summary_prompt(&self) -> String { - "Please provide a comprehensive summary of our conversation so far. Include: - -1. **Main Topic/Goal**: What is the primary task or objective being worked on? -2. **Key Decisions**: What important decisions have been made? -3. **Actions Taken**: What specific actions, commands, or code changes have been completed? -4. **Current State**: What is the current status of the work? -5. **Important Context**: Any critical information, file paths, configurations, or constraints that should be remembered? -6. **Pending Items**: What remains to be done or what was the user's last request? - -Format this as a detailed but concise summary that can be used to resume the conversation from scratch while maintaining full context.".to_string() - } - - /// Reset the context window with a summary - /// Preserves the original system prompt as the first message - pub fn reset_with_summary( - &mut self, - summary: String, - latest_user_message: Option, - ) -> usize { - // Calculate chars saved (old history minus new summary) - let old_chars: usize = self - .conversation_history - .iter() - .map(|m| m.content.len()) - .sum(); - - // Preserve the original system prompt (first message) and optionally the README (second message) - let original_system_prompt = self.conversation_history.first().cloned(); - let readme_message = self.conversation_history.get(1).and_then(|msg| { - if matches!(msg.role, MessageRole::System) && - (msg.content.contains("Project README") || msg.content.contains("Agent Configuration")) { - Some(msg.clone()) - } else { - None - } - }); - - // Clear the conversation history - self.conversation_history.clear(); - self.used_tokens = 0; - - // Re-add the original system prompt first (critical invariant) - if let Some(system_prompt) = original_system_prompt { - self.add_message(system_prompt); - } - - // Re-add the README message if it existed - if let Some(readme) = readme_message { - self.add_message(readme); - } - - // Add the summary as a system message - let summary_message = Message::new( - MessageRole::System, - format!("Previous conversation summary:\n\n{}", summary), - ); - self.add_message(summary_message); - - // Add the latest user message if provided - if let Some(user_msg) = latest_user_message { - self.add_message(Message::new(MessageRole::User, user_msg)); - } - - let new_chars: usize = self - .conversation_history - .iter() - .map(|m| m.content.len()) - .sum(); - old_chars.saturating_sub(new_chars) - } - - /// Check if we should trigger context thinning - /// Triggers at 50%, 60%, 70%, and 80% thresholds - pub fn should_thin(&self) -> bool { - let current_percentage = self.percentage_used() as u32; - - // Check if we've crossed a new 10% threshold starting at 50% - if current_percentage >= 50 { - let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10% - if current_threshold > self.last_thinning_percentage && current_threshold <= 80 { - return true; - } - } - - false - } - - /// Perform context thinning: scan first third of conversation and replace large tool results - /// Returns a summary message about what was thinned - /// If session_id is provided, thinned content is saved to .g3/session//thinned/ - pub fn thin_context(&mut self, session_id: Option<&str>) -> (String, usize) { - let current_percentage = self.percentage_used() as u32; - let current_threshold = (current_percentage / 10) * 10; - - // Update the last thinning percentage - self.last_thinning_percentage = current_threshold; - - // Calculate the first third of the conversation - let total_messages = self.conversation_history.len(); - let first_third_end = (total_messages / 3).max(1); - - let mut leaned_count = 0; - let mut tool_call_leaned_count = 0; - let mut chars_saved = 0; - - // Determine output directory: use session dir if available, otherwise ~/tmp - let tmp_dir = if let Some(sid) = session_id { - let thinned_dir = get_thinned_dir(sid); - if let Err(e) = std::fs::create_dir_all(&thinned_dir) { - warn!("Failed to create thinned directory: {}", e); - return ( - "⚠️ Context thinning failed: could not create thinned directory".to_string(), - 0, - ); - } - thinned_dir.to_string_lossy().to_string() - } else { - let fallback_dir = shellexpand::tilde("~/tmp").to_string(); - if let Err(e) = std::fs::create_dir_all(&fallback_dir) { - warn!("Failed to create ~/tmp directory: {}", e); - return ( - "⚠️ Context thinning failed: could not create ~/tmp directory".to_string(), - 0, - ); - } - fallback_dir - }; - - // Scan the first third of messages - for i in 0..first_third_end { - // Check if the previous message was a TODO tool call (before getting mutable reference) - let is_todo_result = if i > 0 { - if let Some(prev_message) = self.conversation_history.get(i - 1) { - if matches!(prev_message.role, MessageRole::Assistant) { - prev_message.content.contains(r#""tool":"todo_read""#) - || prev_message.content.contains(r#""tool":"todo_write""#) - || prev_message.content.contains(r#""tool": "todo_read""#) - || prev_message.content.contains(r#""tool": "todo_write""#) - } else { - false - } - } else { - false - } - } else { - false - }; - - if let Some(message) = self.conversation_history.get_mut(i) { - // Process User messages that look like tool results - if matches!(message.role, MessageRole::User) - && message.content.starts_with("Tool result:") - { - let content_len = message.content.len(); - - // Only thin if the content is greater than 500 chars and not a TODO tool result - if !is_todo_result && content_len > 500 { - // Generate a unique filename based on timestamp and index - let timestamp = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - let filename = format!("leaned_tool_result_{}_{}.txt", timestamp, i); - let file_path = format!("{}/{}", tmp_dir, filename); - - // Write the content to file - if let Err(e) = std::fs::write(&file_path, &message.content) { - warn!("Failed to write thinned content to {}: {}", file_path, e); - continue; - } - - // Replace the message content with a note - let original_len = message.content.len(); - message.content = format!("Tool result saved to {}", file_path); - - leaned_count += 1; - chars_saved += original_len - message.content.len(); - - debug!( - "Thinned tool result {} ({} chars) to {}", - i, original_len, file_path - ); - } - } - - // Process Assistant messages that contain tool calls with large arguments - if matches!(message.role, MessageRole::Assistant) { - // Try to parse the message content as JSON to find tool calls - let content = &message.content; - - // Look for JSON tool call patterns - if let Some(tool_call_start) = content - .find(r#"{"tool":"#) - .or_else(|| content.find(r#"{ "tool":"#)) - .or_else(|| content.find(r#"{"tool" :"#)) - .or_else(|| content.find(r#"{ "tool" :"#)) - { - // Try to extract and parse the JSON tool call - let json_portion = &content[tool_call_start..]; - - // Find the end of the JSON object - if let Some(json_end) = Self::find_json_end(json_portion) { - let json_str = &json_portion[..=json_end]; - - // Try to parse as ToolCall - if let Ok(mut tool_call) = serde_json::from_str::(json_str) { - let mut modified = false; - - // Handle write_file tool calls - if tool_call.tool == "write_file" { - if let Some(args_obj) = tool_call.args.as_object_mut() { - // Extract content to avoid borrow issues - let content_info = args_obj - .get("content") - .and_then(|v| v.as_str()) - .map(|s| (s.to_string(), s.len())); - - if let Some((content_str, content_len)) = content_info { - // Only thin if content is greater than 500 chars - if content_len > 500 { - let timestamp = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - let filename = format!( - "leaned_write_file_content_{}_{}.txt", - timestamp, i - ); - let file_path = format!("{}/{}", tmp_dir, filename); - - if std::fs::write(&file_path, &content_str).is_ok() - { - args_obj.insert( - "content".to_string(), - serde_json::Value::String(format!( - "", - file_path - )), - ); - modified = true; - chars_saved += content_len; - tool_call_leaned_count += 1; - debug!("Thinned write_file content {} ({} chars) to {}", i, content_len, file_path); - } - } - } - } - } - - // Handle str_replace tool calls - if tool_call.tool == "str_replace" { - if let Some(args_obj) = tool_call.args.as_object_mut() { - // Extract diff to avoid borrow issues - let diff_info = args_obj - .get("diff") - .and_then(|v| v.as_str()) - .map(|s| (s.to_string(), s.len())); - - if let Some((diff_str, diff_len)) = diff_info { - // Only thin if diff is greater than 500 chars - if diff_len > 500 { - let timestamp = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - let filename = format!( - "leaned_str_replace_diff_{}_{}.txt", - timestamp, i - ); - let file_path = format!("{}/{}", tmp_dir, filename); - - if std::fs::write(&file_path, &diff_str).is_ok() { - args_obj.insert( - "diff".to_string(), - serde_json::Value::String(format!( - "", - file_path - )), - ); - modified = true; - chars_saved += diff_len; - tool_call_leaned_count += 1; - debug!("Thinned str_replace diff {} ({} chars) to {}", i, diff_len, file_path); - } - } - } - } - } - - // If we modified the tool call, reconstruct the message - if modified { - let prefix = &content[..tool_call_start]; - let suffix = &content[tool_call_start + json_str.len()..]; - - // Serialize the modified tool call - if let Ok(new_json) = serde_json::to_string(&tool_call) { - message.content = - format!("{}{}{}", prefix, new_json, suffix); - } - } - } - } - } - } - } - } - - // Recalculate token usage after thinning - self.recalculate_tokens(); - - if leaned_count > 0 { - if tool_call_leaned_count > 0 { - (format!("🥒 Context thinned at {}%: {} tool results + {} tool calls, ~{} chars saved", - current_threshold, leaned_count, tool_call_leaned_count, chars_saved), chars_saved) - } else { - ( - format!( - "🥒 Context thinned at {}%: {} tool results, ~{} chars saved", - current_threshold, leaned_count, chars_saved - ), - chars_saved, - ) - } - } else if tool_call_leaned_count > 0 { - ( - format!( - "🥒 Context thinned at {}%: {} tool calls, ~{} chars saved", - current_threshold, tool_call_leaned_count, chars_saved - ), - chars_saved, - ) - } else { - (format!("ℹ Context thinning triggered at {}% but no large tool results or tool calls found in first third", - current_threshold), 0) - } - } - - /// Perform context thinning on the ENTIRE conversation history (not just first third) - /// This is the "skinnify" variant that processes all messages - /// Returns a summary message about what was thinned - /// If session_id is provided, thinned content is saved to .g3/session//thinned/ - pub fn thin_context_all(&mut self, session_id: Option<&str>) -> (String, usize) { - let current_percentage = self.percentage_used() as u32; - - // Calculate the total messages - process ALL of them - let total_messages = self.conversation_history.len(); - - let mut leaned_count = 0; - let mut tool_call_leaned_count = 0; - let mut chars_saved = 0; - - // Determine output directory: use session dir if available, otherwise ~/tmp - let tmp_dir = if let Some(sid) = session_id { - let thinned_dir = get_thinned_dir(sid); - if let Err(e) = std::fs::create_dir_all(&thinned_dir) { - warn!("Failed to create thinned directory: {}", e); - return ( - "⚠️ Context skinnifying failed: could not create thinned directory".to_string(), - 0, - ); - } - thinned_dir.to_string_lossy().to_string() - } else { - let fallback_dir = shellexpand::tilde("~/tmp").to_string(); - if let Err(e) = std::fs::create_dir_all(&fallback_dir) { - warn!("Failed to create ~/tmp directory: {}", e); - return ( - "⚠️ Context skinnifying failed: could not create ~/tmp directory".to_string(), - 0, - ); - } - fallback_dir - }; - - // Scan ALL messages (not just first third) - for i in 0..total_messages { - // Check if the previous message was a TODO tool call (before getting mutable reference) - let is_todo_result = if i > 0 { - if let Some(prev_message) = self.conversation_history.get(i - 1) { - if matches!(prev_message.role, MessageRole::Assistant) { - prev_message.content.contains(r#""tool":"todo_read""#) - || prev_message.content.contains(r#""tool":"todo_write""#) - || prev_message.content.contains(r#""tool": "todo_read""#) - || prev_message.content.contains(r#""tool": "todo_write""#) - } else { - false - } - } else { - false - } - } else { - false - }; - - if let Some(message) = self.conversation_history.get_mut(i) { - // Process User messages that look like tool results - if matches!(message.role, MessageRole::User) - && message.content.starts_with("Tool result:") - { - let content_len = message.content.len(); - - // Only thin if the content is greater than 500 chars and not a TODO tool result - if !is_todo_result && content_len > 500 { - // Generate a unique filename based on timestamp and index - let timestamp = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - let filename = format!("skinny_tool_result_{}_{}.txt", timestamp, i); - let file_path = format!("{}/{}", tmp_dir, filename); - - // Write the content to file - if let Err(e) = std::fs::write(&file_path, &message.content) { - warn!("Failed to write skinnified content to {}: {}", file_path, e); - continue; - } - - // Replace the message content with a note - let original_len = message.content.len(); - message.content = format!("Tool result saved to {}", file_path); - - leaned_count += 1; - chars_saved += original_len - message.content.len(); - - debug!( - "Skinnified tool result {} ({} chars) to {}", - i, original_len, file_path - ); - } - } - - // Process Assistant messages that contain tool calls with large arguments - if matches!(message.role, MessageRole::Assistant) { - // Try to parse the message content as JSON to find tool calls - let content = &message.content; - - // Look for JSON tool call patterns - if let Some(tool_call_start) = content - .find(r#"{"tool":"#) - .or_else(|| content.find(r#"{ "tool":"#)) - .or_else(|| content.find(r#"{"tool" :"#)) - .or_else(|| content.find(r#"{ "tool" :"#)) - { - // Try to extract and parse the JSON tool call - let json_portion = &content[tool_call_start..]; - - // Find the end of the JSON object - if let Some(json_end) = Self::find_json_end(json_portion) { - let json_str = &json_portion[..=json_end]; - - // Try to parse as ToolCall - if let Ok(mut tool_call) = serde_json::from_str::(json_str) { - let mut modified = false; - - // Handle write_file tool calls - if tool_call.tool == "write_file" { - if let Some(args_obj) = tool_call.args.as_object_mut() { - let content_info = args_obj - .get("content") - .and_then(|v| v.as_str()) - .map(|s| (s.to_string(), s.len())); - - if let Some((content_str, content_len)) = content_info { - if content_len > 500 { - let timestamp = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - let filename = format!( - "skinny_write_file_content_{}_{}.txt", - timestamp, i - ); - let file_path = format!("{}/{}", tmp_dir, filename); - - if std::fs::write(&file_path, &content_str).is_ok() { - args_obj.insert( - "content".to_string(), - serde_json::Value::String(format!( - "", - file_path - )), - ); - modified = true; - chars_saved += content_len; - tool_call_leaned_count += 1; - debug!("Skinnified write_file content {} ({} chars) to {}", i, content_len, file_path); - } - } - } - } - } - - // Handle str_replace tool calls - if tool_call.tool == "str_replace" { - if let Some(args_obj) = tool_call.args.as_object_mut() { - let diff_info = args_obj - .get("diff") - .and_then(|v| v.as_str()) - .map(|s| (s.to_string(), s.len())); - - if let Some((diff_str, diff_len)) = diff_info { - if diff_len > 500 { - let timestamp = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); - let filename = format!( - "skinny_str_replace_diff_{}_{}.txt", - timestamp, i - ); - let file_path = format!("{}/{}", tmp_dir, filename); - - if std::fs::write(&file_path, &diff_str).is_ok() { - args_obj.insert( - "diff".to_string(), - serde_json::Value::String(format!( - "", - file_path - )), - ); - modified = true; - chars_saved += diff_len; - tool_call_leaned_count += 1; - debug!("Skinnified str_replace diff {} ({} chars) to {}", i, diff_len, file_path); - } - } - } - } - } - - // If we modified the tool call, reconstruct the message - if modified { - let prefix = &content[..tool_call_start]; - let suffix = &content[tool_call_start + json_str.len()..]; - - // Serialize the modified tool call - if let Ok(new_json) = serde_json::to_string(&tool_call) { - message.content = - format!("{}{}{}", prefix, new_json, suffix); - } - } - } - } - } - } - } - } - - // Recalculate token usage after thinning - self.recalculate_tokens(); - - if leaned_count > 0 { - if tool_call_leaned_count > 0 { - (format!("🦴 Context skinnified at {}%: {} tool results + {} tool calls across entire history, ~{} chars saved", - current_percentage, leaned_count, tool_call_leaned_count, chars_saved), chars_saved) - } else { - ( - format!( - "🦴 Context skinnified at {}%: {} tool results across entire history, ~{} chars saved", - current_percentage, leaned_count, chars_saved - ), - chars_saved, - ) - } - } else if tool_call_leaned_count > 0 { - ( - format!( - "🦴 Context skinnified at {}%: {} tool calls across entire history, ~{} chars saved", - current_percentage, tool_call_leaned_count, chars_saved - ), - chars_saved, - ) - } else { - (format!("ℹ Context skinnifying triggered at {}% but no large tool results or tool calls found in entire history", - current_percentage), 0) - } - } - - /// Recalculate token usage based on current conversation history - fn recalculate_tokens(&mut self) { - let mut total = 0; - for message in &self.conversation_history { - total += Self::estimate_tokens(&message.content); - } - self.used_tokens = total; - - debug!("Recalculated tokens after thinning: {} tokens", total); - } - - /// Helper function to find the end of a JSON object - fn find_json_end(json_str: &str) -> Option { - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; - - for (i, ch) in json_str.char_indices() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' if !escape_next => in_string = !in_string, - '{' if !in_string => brace_count += 1, - '}' if !in_string => { - brace_count -= 1; - if brace_count == 0 { - return Some(i); - } - } - _ => {} - } - } - - None - } -} - pub struct Agent { providers: ProviderRegistry, context_window: ContextWindow, @@ -1861,11 +1109,12 @@ impl Agent { let _has_native_tool_calling = provider.has_native_tool_calling(); let _supports_cache_control = provider.supports_cache_control(); let tools = if provider.has_native_tool_calling() { - Some(Self::create_tool_definitions( - self.config.webdriver.enabled, - self.config.macax.enabled, - self.config.computer_control.enabled, - )) + Some(tool_definitions::create_tool_definitions( + tool_definitions::ToolConfig::new( + self.config.webdriver.enabled, + self.config.macax.enabled, + self.config.computer_control.enabled, + ))) } else { None }; @@ -2776,616 +2025,6 @@ impl Agent { } /// Create tool definitions for native tool calling providers - fn create_tool_definitions( - enable_webdriver: bool, - enable_macax: bool, - enable_computer_control: bool, - ) -> Vec { - let mut tools = vec![ - Tool { - name: "shell".to_string(), - description: "Execute shell commands".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "command": { - "type": "string", - "description": "The shell command to execute" - } - }, - "required": ["command"] - }), - }, - Tool { - name: "background_process".to_string(), - description: "Launch a long-running process in the background (e.g., game servers, dev servers). The process runs independently and logs are captured to a file. Use the regular 'shell' tool to read logs (cat/tail), check status (ps), or stop the process (kill). Returns the PID and log file path.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "name": { - "type": "string", - "description": "A unique name for this process (e.g., 'game_server', 'my_app'). Used to identify the process and its log file." - }, - "command": { - "type": "string", - "description": "The shell command to execute in the background" - }, - "working_dir": { - "type": "string", - "description": "Optional working directory. Defaults to current directory if not specified." - } - }, - "required": ["name", "command"] - }), - }, - Tool { - name: "read_file".to_string(), - description: "Read the contents of a file. For image files (png, jpg, jpeg, gif, bmp, tiff, webp), automatically extracts text using OCR. For text files, optionally read a specific character range.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "file_path": { - "type": "string", - "description": "The path to the file to read" - }, - "start": { - "type": "integer", - "description": "Starting character position (0-indexed, inclusive). If omitted, reads from beginning." - }, - "end": { - "type": "integer", - "description": "Ending character position (0-indexed, EXCLUSIVE). If omitted, reads to end of file." - } - }, - "required": ["file_path"] - }), - }, - Tool { - name: "read_image".to_string(), - description: "Read one or more image files and send them to the LLM for visual analysis. Supports PNG, JPEG, GIF, and WebP formats. Use this when you need to visually inspect images (e.g., find sprites, analyze UI, read diagrams). The images will be included in your next response for analysis.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "file_paths": { - "type": "array", - "items": { "type": "string" }, - "description": "Array of paths to image files to read" - } - }, - "required": ["file_paths"] - }), - }, - Tool { - name: "write_file".to_string(), - description: "Write content to a file (creates or overwrites). You MUST provide all arguments".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "file_path": { - "type": "string", - "description": "The path to the file to write" - }, - "content": { - "type": "string", - "description": "The content to write to the file" - } - }, - "required": ["file_path", "content"] - }), - }, - Tool { - name: "str_replace".to_string(), - description: "Apply a unified diff to a file. Supports multiple hunks and context lines. Optionally constrain the search to a [start, end) character range (0-indexed; end is EXCLUSIVE). Useful to disambiguate matches or limit scope in large files.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "file_path": { - "type": "string", - "description": "The path to the file to edit" - }, - "diff": { - "type": "string", - "description": "A unified diff showing what to replace. Supports @@ hunk headers, context lines, and multiple hunks (---/+++ headers optional for minimal diffs)." - }, - "start": { - "type": "integer", - "description": "Starting character position in the file (0-indexed, inclusive). If omitted, searches from beginning." - }, - "end": { - "type": "integer", - "description": "Ending character position in the file (0-indexed, EXCLUSIVE - character at this position is NOT included). If omitted, searches to end of file." - } - }, - "required": ["file_path", "diff"] - }), - }, - Tool { - name: "final_output".to_string(), - description: "Signal task completion with a detailed summary".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "summary": { - "type": "string", - "description": "A detailed summary in markdown of what was accomplished" - } - }, - "required": ["summary"] - }), - }, - Tool { - name: "take_screenshot".to_string(), - description: "Capture a screenshot of a specific application window. You MUST specify the window_id parameter with the application name (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will automatically use the native screencapture command with the application's window ID for a clean capture. Use list_windows first to identify available windows.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Filename for the screenshot (e.g., 'safari.png'). If a relative path is provided, the screenshot will be saved to ~/tmp or $TMPDIR. Use an absolute path to save elsewhere." - }, - "window_id": { - "type": "string", - "description": "REQUIRED: Application name to capture (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will capture the frontmost window of that application using its native window ID." - }, - "region": { - "type": "object", - "properties": { - "x": {"type": "integer"}, - "y": {"type": "integer"}, - "width": {"type": "integer"}, - "height": {"type": "integer"} - } - } - }, - "required": ["path", "window_id"] - }), - }, - Tool { - name: "extract_text".to_string(), - description: "Extract text from an image file using OCR. For extracting text from a specific window, use vision_find_text instead which automatically handles window capture.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to image file (optional if region is provided)" - }, - } - }), - }, - Tool { - name: "todo_read".to_string(), - description: "Read your current TODO list from todo.g3.md file in the session directory. Shows what tasks are planned and their status. Call this at the start of multi-step tasks to check for existing plans, and during execution to review progress before updating. TODO lists are scoped to the current session.".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "todo_write".to_string(), - description: "Create or update your TODO list in todo.g3.md file with a complete task plan. Use markdown checkboxes: - [ ] for pending, - [x] for complete. This tool replaces the entire file content, so always call todo_read first to preserve existing content. Essential for multi-step tasks. TODO lists are scoped to the current session.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "content": { - "type": "string", - "description": "The TODO list content to save. Use markdown checkbox format: - [ ] for incomplete tasks, - [x] for completed tasks. Support nested tasks with indentation." - } - }, - "required": ["content"] - }), - }, - Tool { - name: "code_coverage".to_string(), - description: "Generate a code coverage report for the entire workspace using cargo llvm-cov. This runs all tests with coverage instrumentation and returns a summary of coverage statistics. Requires llvm-tools-preview and cargo-llvm-cov to be installed (they will be auto-installed if missing).".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - ]; - - // Add code_search tool - tools.push(Tool { - name: "code_search".to_string(), - description: "Syntax-aware code search that understands code structure, not just text. Finds actual functions, classes, methods, and other code constructs - ignores matches in comments and strings. Much more accurate than grep for code searches. Supports batch searches (up to 20 parallel) with structured results and context lines. Languages: Rust, Python, JavaScript, TypeScript, Go, Java, C, C++, Kotlin. Uses tree-sitter query syntax.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "searches": { - "type": "array", - "maxItems": 20, - "items": { - "type": "object", - "properties": { - "name": { "type": "string", "description": "Label for this search." }, - "query": { "type": "string", "description": "tree-sitter query in S-expression format (e.g., \"(function_item name: (identifier) @name)\")"}, - "language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript", "go", "java", "c", "cpp", "kotlin"], "description": "Programming language to search." }, - "paths": { "type": "array", "items": { "type": "string" }, "description": "Paths/dirs to search. Defaults to current dir if empty." }, - "context_lines": { "type": "integer", "minimum": 0, "maximum": 20, "default": 0, "description": "Lines of context to include around each match." } - }, - "required": ["name", "query", "language"] - } - }, - "max_concurrency": { "type": "integer", "minimum": 1, "default": 4 }, - "max_matches_per_search": { "type": "integer", "minimum": 1, "default": 500 } - }, - "required": ["searches"] - }), - }); - - // Add WebDriver tools if enabled - if enable_webdriver { - tools.extend(vec![ - Tool { - name: "webdriver_start".to_string(), - description: "Start a Safari WebDriver session for browser automation. Must be called before any other webdriver tools. Requires Safari's 'Allow Remote Automation' to be enabled in Develop menu.".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "webdriver_navigate".to_string(), - description: "Navigate to a URL in the browser".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "url": { - "type": "string", - "description": "The URL to navigate to (must include protocol, e.g., https://)" - } - }, - "required": ["url"] - }), - }, - Tool { - name: "webdriver_get_url".to_string(), - description: "Get the current URL of the browser".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "webdriver_get_title".to_string(), - description: "Get the title of the current page".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "webdriver_find_element".to_string(), - description: "Find an element on the page by CSS selector and return its text content".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "selector": { - "type": "string", - "description": "CSS selector to find the element (e.g., 'h1', '.class-name', '#id')" - } - }, - "required": ["selector"] - }), - }, - Tool { - name: "webdriver_find_elements".to_string(), - description: "Find all elements matching a CSS selector and return their text content".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "selector": { - "type": "string", - "description": "CSS selector to find elements" - } - }, - "required": ["selector"] - }), - }, - Tool { - name: "webdriver_click".to_string(), - description: "Click an element on the page".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "selector": { - "type": "string", - "description": "CSS selector for the element to click" - } - }, - "required": ["selector"] - }), - }, - Tool { - name: "webdriver_send_keys".to_string(), - description: "Type text into an input element".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "selector": { - "type": "string", - "description": "CSS selector for the input element" - }, - "text": { - "type": "string", - "description": "Text to type into the element" - }, - "clear_first": { - "type": "boolean", - "description": "Whether to clear the element before typing (default: true)" - } - }, - "required": ["selector", "text"] - }), - }, - Tool { - name: "webdriver_execute_script".to_string(), - description: "Execute JavaScript code in the browser and return the result".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "script": { - "type": "string", - "description": "JavaScript code to execute (use 'return' to return a value)" - } - }, - "required": ["script"] - }), - }, - Tool { - name: "webdriver_get_page_source".to_string(), - description: "Get the rendered HTML source of the current page. Returns the current DOM state after JavaScript execution.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "max_length": { - "type": "integer", - "description": "Maximum length of HTML to return (default: 10000, use 0 for no truncation)" - }, - "save_to_file": { - "type": "string", - "description": "Optional file path to save the HTML instead of returning it inline" - } - }, - "required": [] - }), - }, - Tool { - name: "webdriver_screenshot".to_string(), - description: "Take a screenshot of the browser window".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path where to save the screenshot (e.g., '/tmp/screenshot.png')" - } - }, - "required": ["path"] - }), - }, - Tool { - name: "webdriver_back".to_string(), - description: "Navigate back in browser history".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "webdriver_forward".to_string(), - description: "Navigate forward in browser history".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "webdriver_refresh".to_string(), - description: "Refresh the current page".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "webdriver_quit".to_string(), - description: "Close the browser and end the WebDriver session".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - ]); - } - - // Add macOS Accessibility tools if enabled - if enable_macax { - tools.extend(vec![ - Tool { - name: "macax_list_apps".to_string(), - description: "List all running applications that can be controlled via macOS Accessibility API".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "macax_get_frontmost_app".to_string(), - description: "Get the name of the currently active (frontmost) application".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "macax_activate_app".to_string(), - description: "Bring an application to the front (activate it)".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "app_name": { - "type": "string", - "description": "Name of the application to activate (e.g., 'Safari', 'TextEdit')" - } - }, - "required": ["app_name"] - }), - }, - Tool { - name: "macax_press_key".to_string(), - description: "Press a keyboard key or shortcut in an application (e.g., Cmd+S to save)".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "app_name": { - "type": "string", - "description": "Name of the application" - }, - "key": { - "type": "string", - "description": "Key to press (e.g., 's', 'return', 'tab')" - }, - "modifiers": { - "type": "array", - "items": { - "type": "string" - }, - "description": "Modifier keys (e.g., ['command', 'shift'])" - } - }, - "required": ["app_name", "key"] - }), - }, - ]); - - // Add type_text tool for typing arbitrary text - tools.push(Tool { - name: "macax_type_text".to_string(), - description: "Type arbitrary text into the currently focused element in an application (supports unicode, emojis, etc.)".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "app_name": { - "type": "string", - "description": "Name of the application" - }, - "text": { - "type": "string", - "description": "Text to type (can include unicode, emojis, special characters)" - } - }, - "required": ["app_name", "text"] - }), - }); - } - - // Add extract_text_with_boxes tool (requires macax flag) - if enable_macax { - tools.push(Tool { - name: "extract_text_with_boxes".to_string(), - description: "Extract all text from an image file with bounding box coordinates for each text element. Returns JSON array with text, position (x, y), size (width, height), and confidence for each detected text. Uses Apple Vision Framework for precise sub-pixel accuracy.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "path": { - "type": "string", - "description": "Path to image file to extract text from" - }, - "app_name": { - "type": "string", - "description": "Optional: Name of application to screenshot first (e.g., 'Safari', 'Things3'). If provided, takes screenshot of app before extracting text." - } - }, - "required": ["path"] - }), - }); - } - - // Add vision-guided tools (requires computer control) - if enable_computer_control { - // Add vision-guided tools - tools.push(Tool { - name: "vision_find_text".to_string(), - description: "Find text in a specific application window and return its location with bounding box coordinates (x, y, width, height) and confidence score. Useful for locating UI elements. Uses Apple Vision Framework for precise sub-pixel accuracy.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "app_name": { - "type": "string", - "description": "Name of the application to search in (e.g., 'Things3', 'Safari', 'TextEdit')" - }, - "text": { - "type": "string", - "description": "The text to search for on screen" - } - }, - "required": ["app_name", "text"] - }), - }); - - tools.push(Tool { - name: "vision_click_text".to_string(), - description: "Find text in a specific application window and click on it (useful for clicking buttons, links, menu items)".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "app_name": { - "type": "string", - "description": "Name of the application (e.g., 'Things3', 'Safari', 'TextEdit')" - }, - "text": { - "type": "string", - "description": "The text to click on (e.g., 'Submit', 'OK', 'Cancel', '+')" - } - }, - "required": ["app_name", "text"] - }), - }); - - tools.push(Tool { - name: "vision_click_near_text".to_string(), - description: "Find text in a specific application window and click near it (useful for clicking text fields next to labels)".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "app_name": { - "type": "string", - "description": "Name of the application (e.g., 'Things3', 'Safari', 'TextEdit')" - }, - "text": { - "type": "string", - "description": "The label text to find (e.g., 'Name:', 'Email:', 'Task:')" - }, - "direction": { - "type": "string", - "enum": ["right", "below", "left", "above"], - "description": "Direction to click relative to the text (default: right)" - }, - "distance": { - "type": "integer", - "description": "Distance in pixels from the text (default: 50)" - } - }, - "required": ["app_name", "text"] - }), - }); - } - - tools - } /// Helper method to stream with retry logic async fn stream_with_retry( @@ -4172,11 +2811,12 @@ impl Agent { // Ensure tools are included for native providers in subsequent iterations let provider_for_tools = self.providers.get(None)?; if provider_for_tools.has_native_tool_calling() { - request.tools = Some(Self::create_tool_definitions( - self.config.webdriver.enabled, - self.config.macax.enabled, - self.config.computer_control.enabled, - )); + request.tools = Some(tool_definitions::create_tool_definitions( + tool_definitions::ToolConfig::new( + self.config.webdriver.enabled, + self.config.macax.enabled, + self.config.computer_control.enabled, + ))); } // DO NOT add final_display_content to full_response here! diff --git a/crates/g3-core/src/tool_definitions.rs b/crates/g3-core/src/tool_definitions.rs new file mode 100644 index 0000000..5c2c245 --- /dev/null +++ b/crates/g3-core/src/tool_definitions.rs @@ -0,0 +1,705 @@ +//! Tool definitions for the agent's available tools. +//! +//! This module contains the JSON schema definitions for all tools that can be +//! used by the agent when interacting with LLM providers that support native +//! tool calling. + +use g3_providers::Tool; +use serde_json::json; + +/// Configuration for which optional tool sets to enable +#[derive(Debug, Clone, Copy, Default)] +pub struct ToolConfig { + pub webdriver: bool, + pub macax: bool, + pub computer_control: bool, +} + +impl ToolConfig { + pub fn new(webdriver: bool, macax: bool, computer_control: bool) -> Self { + Self { + webdriver, + macax, + computer_control, + } + } +} + +/// Create tool definitions for native tool calling providers. +/// +/// Returns a vector of Tool definitions that describe the available tools +/// and their input schemas. +pub fn create_tool_definitions(config: ToolConfig) -> Vec { + let mut tools = create_core_tools(); + + if config.webdriver { + tools.extend(create_webdriver_tools()); + } + + if config.macax { + tools.extend(create_macax_tools()); + } + + if config.computer_control { + tools.extend(create_computer_control_tools()); + } + + tools +} + +/// Create the core tools that are always available +fn create_core_tools() -> Vec { + vec![ + Tool { + name: "shell".to_string(), + description: "Execute shell commands".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": ["command"] + }), + }, + Tool { + name: "background_process".to_string(), + description: "Launch a long-running process in the background (e.g., game servers, dev servers). The process runs independently and logs are captured to a file. Use the regular 'shell' tool to read logs (cat/tail), check status (ps), or stop the process (kill). Returns the PID and log file path.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "A unique name for this process (e.g., 'game_server', 'my_app'). Used to identify the process and its log file." + }, + "command": { + "type": "string", + "description": "The shell command to execute in the background" + }, + "working_dir": { + "type": "string", + "description": "Optional working directory. Defaults to current directory if not specified." + } + }, + "required": ["name", "command"] + }), + }, + Tool { + name: "read_file".to_string(), + description: "Read the contents of a file. For image files (png, jpg, jpeg, gif, bmp, tiff, webp), automatically extracts text using OCR. For text files, optionally read a specific character range.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "The path to the file to read" + }, + "start": { + "type": "integer", + "description": "Starting character position (0-indexed, inclusive). If omitted, reads from beginning." + }, + "end": { + "type": "integer", + "description": "Ending character position (0-indexed, EXCLUSIVE). If omitted, reads to end of file." + } + }, + "required": ["file_path"] + }), + }, + Tool { + name: "read_image".to_string(), + description: "Read one or more image files and send them to the LLM for visual analysis. Supports PNG, JPEG, GIF, and WebP formats. Use this when you need to visually inspect images (e.g., find sprites, analyze UI, read diagrams). The images will be included in your next response for analysis.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "file_paths": { + "type": "array", + "items": { "type": "string" }, + "description": "Array of paths to image files to read" + } + }, + "required": ["file_paths"] + }), + }, + Tool { + name: "write_file".to_string(), + description: "Write content to a file (creates or overwrites). You MUST provide all arguments".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "The path to the file to write" + }, + "content": { + "type": "string", + "description": "The content to write to the file" + } + }, + "required": ["file_path", "content"] + }), + }, + Tool { + name: "str_replace".to_string(), + description: "Apply a unified diff to a file. Supports multiple hunks and context lines. Optionally constrain the search to a [start, end) character range (0-indexed; end is EXCLUSIVE). Useful to disambiguate matches or limit scope in large files.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "file_path": { + "type": "string", + "description": "The path to the file to edit" + }, + "diff": { + "type": "string", + "description": "A unified diff showing what to replace. Supports @@ hunk headers, context lines, and multiple hunks (---/+++ headers optional for minimal diffs)." + }, + "start": { + "type": "integer", + "description": "Starting character position in the file (0-indexed, inclusive). If omitted, searches from beginning." + }, + "end": { + "type": "integer", + "description": "Ending character position in the file (0-indexed, EXCLUSIVE - character at this position is NOT included). If omitted, searches to end of file." + } + }, + "required": ["file_path", "diff"] + }), + }, + Tool { + name: "final_output".to_string(), + description: "Signal task completion with a detailed summary".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "summary": { + "type": "string", + "description": "A detailed summary in markdown of what was accomplished" + } + }, + "required": ["summary"] + }), + }, + Tool { + name: "take_screenshot".to_string(), + description: "Capture a screenshot of a specific application window. You MUST specify the window_id parameter with the application name (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will automatically use the native screencapture command with the application's window ID for a clean capture. Use list_windows first to identify available windows.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Filename for the screenshot (e.g., 'safari.png'). If a relative path is provided, the screenshot will be saved to ~/tmp or $TMPDIR. Use an absolute path to save elsewhere." + }, + "window_id": { + "type": "string", + "description": "REQUIRED: Application name to capture (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will capture the frontmost window of that application using its native window ID." + }, + "region": { + "type": "object", + "properties": { + "x": {"type": "integer"}, + "y": {"type": "integer"}, + "width": {"type": "integer"}, + "height": {"type": "integer"} + } + } + }, + "required": ["path", "window_id"] + }), + }, + Tool { + name: "extract_text".to_string(), + description: "Extract text from an image file using OCR. For extracting text from a specific window, use vision_find_text instead which automatically handles window capture.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Path to image file (optional if region is provided)" + }, + } + }), + }, + Tool { + name: "todo_read".to_string(), + description: "Read your current TODO list from todo.g3.md file in the session directory. Shows what tasks are planned and their status. Call this at the start of multi-step tasks to check for existing plans, and during execution to review progress before updating. TODO lists are scoped to the current session.".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "todo_write".to_string(), + description: "Create or update your TODO list in todo.g3.md file with a complete task plan. Use markdown checkboxes: - [ ] for pending, - [x] for complete. This tool replaces the entire file content, so always call todo_read first to preserve existing content. Essential for multi-step tasks. TODO lists are scoped to the current session.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "content": { + "type": "string", + "description": "The TODO list content to save. Use markdown checkbox format: - [ ] for incomplete tasks, - [x] for completed tasks. Support nested tasks with indentation." + } + }, + "required": ["content"] + }), + }, + Tool { + name: "code_coverage".to_string(), + description: "Generate a code coverage report for the entire workspace using cargo llvm-cov. This runs all tests with coverage instrumentation and returns a summary of coverage statistics. Requires llvm-tools-preview and cargo-llvm-cov to be installed (they will be auto-installed if missing).".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "code_search".to_string(), + description: "Syntax-aware code search that understands code structure, not just text. Finds actual functions, classes, methods, and other code constructs - ignores matches in comments and strings. Much more accurate than grep for code searches. Supports batch searches (up to 20 parallel) with structured results and context lines. Languages: Rust, Python, JavaScript, TypeScript, Go, Java, C, C++, Kotlin. Uses tree-sitter query syntax.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "searches": { + "type": "array", + "maxItems": 20, + "items": { + "type": "object", + "properties": { + "name": { "type": "string", "description": "Label for this search." }, + "query": { "type": "string", "description": "tree-sitter query in S-expression format (e.g., \"(function_item name: (identifier) @name)\")" }, + "language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript", "go", "java", "c", "cpp", "kotlin"], "description": "Programming language to search." }, + "paths": { "type": "array", "items": { "type": "string" }, "description": "Paths/dirs to search. Defaults to current dir if empty." }, + "context_lines": { "type": "integer", "minimum": 0, "maximum": 20, "default": 0, "description": "Lines of context to include around each match." } + }, + "required": ["name", "query", "language"] + } + }, + "max_concurrency": { "type": "integer", "minimum": 1, "default": 4 }, + "max_matches_per_search": { "type": "integer", "minimum": 1, "default": 500 } + }, + "required": ["searches"] + }), + }, + ] +} + +/// Create WebDriver browser automation tools +fn create_webdriver_tools() -> Vec { + vec![ + Tool { + name: "webdriver_start".to_string(), + description: "Start a Safari WebDriver session for browser automation. Must be called before any other webdriver tools. Requires Safari's 'Allow Remote Automation' to be enabled in Develop menu.".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "webdriver_navigate".to_string(), + description: "Navigate to a URL in the browser".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "url": { + "type": "string", + "description": "The URL to navigate to (must include protocol, e.g., https://)" + } + }, + "required": ["url"] + }), + }, + Tool { + name: "webdriver_get_url".to_string(), + description: "Get the current URL of the browser".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "webdriver_get_title".to_string(), + description: "Get the title of the current page".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "webdriver_find_element".to_string(), + description: "Find an element on the page by CSS selector and return its text content".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "selector": { + "type": "string", + "description": "CSS selector to find the element (e.g., 'h1', '.class-name', '#id')" + } + }, + "required": ["selector"] + }), + }, + Tool { + name: "webdriver_find_elements".to_string(), + description: "Find all elements matching a CSS selector and return their text content".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "selector": { + "type": "string", + "description": "CSS selector to find elements" + } + }, + "required": ["selector"] + }), + }, + Tool { + name: "webdriver_click".to_string(), + description: "Click an element on the page".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "selector": { + "type": "string", + "description": "CSS selector for the element to click" + } + }, + "required": ["selector"] + }), + }, + Tool { + name: "webdriver_send_keys".to_string(), + description: "Type text into an input element".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "selector": { + "type": "string", + "description": "CSS selector for the input element" + }, + "text": { + "type": "string", + "description": "Text to type into the element" + }, + "clear_first": { + "type": "boolean", + "description": "Whether to clear the element before typing (default: true)" + } + }, + "required": ["selector", "text"] + }), + }, + Tool { + name: "webdriver_execute_script".to_string(), + description: "Execute JavaScript code in the browser and return the result".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "script": { + "type": "string", + "description": "JavaScript code to execute (use 'return' to return a value)" + } + }, + "required": ["script"] + }), + }, + Tool { + name: "webdriver_get_page_source".to_string(), + description: "Get the rendered HTML source of the current page. Returns the current DOM state after JavaScript execution.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "max_length": { + "type": "integer", + "description": "Maximum length of HTML to return (default: 10000, use 0 for no truncation)" + }, + "save_to_file": { + "type": "string", + "description": "Optional file path to save the HTML instead of returning it inline" + } + }, + "required": [] + }), + }, + Tool { + name: "webdriver_screenshot".to_string(), + description: "Take a screenshot of the browser window".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Path where to save the screenshot (e.g., '/tmp/screenshot.png')" + } + }, + "required": ["path"] + }), + }, + Tool { + name: "webdriver_back".to_string(), + description: "Navigate back in browser history".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "webdriver_forward".to_string(), + description: "Navigate forward in browser history".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "webdriver_refresh".to_string(), + description: "Refresh the current page".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "webdriver_quit".to_string(), + description: "Close the browser and end the WebDriver session".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + ] +} + +/// Create macOS Accessibility tools +fn create_macax_tools() -> Vec { + vec![ + Tool { + name: "macax_list_apps".to_string(), + description: "List all running applications that can be controlled via macOS Accessibility API".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "macax_get_frontmost_app".to_string(), + description: "Get the name of the currently active (frontmost) application".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "macax_activate_app".to_string(), + description: "Bring an application to the front (activate it)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application to activate (e.g., 'Safari', 'TextEdit')" + } + }, + "required": ["app_name"] + }), + }, + Tool { + name: "macax_press_key".to_string(), + description: "Press a keyboard key or shortcut in an application (e.g., Cmd+S to save)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "key": { + "type": "string", + "description": "Key to press (e.g., 's', 'return', 'tab')" + }, + "modifiers": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Modifier keys (e.g., ['command', 'shift'])" + } + }, + "required": ["app_name", "key"] + }), + }, + Tool { + name: "macax_type_text".to_string(), + description: "Type arbitrary text into the currently focused element in an application (supports unicode, emojis, etc.)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "text": { + "type": "string", + "description": "Text to type (can include unicode, emojis, special characters)" + } + }, + "required": ["app_name", "text"] + }), + }, + Tool { + name: "extract_text_with_boxes".to_string(), + description: "Extract all text from an image file with bounding box coordinates for each text element. Returns JSON array with text, position (x, y), size (width, height), and confidence for each detected text. Uses Apple Vision Framework for precise sub-pixel accuracy.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Path to image file to extract text from" + }, + "app_name": { + "type": "string", + "description": "Optional: Name of application to screenshot first (e.g., 'Safari', 'Things3'). If provided, takes screenshot of app before extracting text." + } + }, + "required": ["path"] + }), + }, + ] +} + +/// Create computer control / vision-guided tools +fn create_computer_control_tools() -> Vec { + vec![ + Tool { + name: "vision_find_text".to_string(), + description: "Find text in a specific application window and return its location with bounding box coordinates (x, y, width, height) and confidence score. Useful for locating UI elements. Uses Apple Vision Framework for precise sub-pixel accuracy.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application to search in (e.g., 'Things3', 'Safari', 'TextEdit')" + }, + "text": { + "type": "string", + "description": "The text to search for on screen" + } + }, + "required": ["app_name", "text"] + }), + }, + Tool { + name: "vision_click_text".to_string(), + description: "Find text in a specific application window and click on it (useful for clicking buttons, links, menu items)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application (e.g., 'Things3', 'Safari', 'TextEdit')" + }, + "text": { + "type": "string", + "description": "The text to click on (e.g., 'Submit', 'OK', 'Cancel', '+')" + } + }, + "required": ["app_name", "text"] + }), + }, + Tool { + name: "vision_click_near_text".to_string(), + description: "Find text in a specific application window and click near it (useful for clicking text fields next to labels)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application (e.g., 'Things3', 'Safari', 'TextEdit')" + }, + "text": { + "type": "string", + "description": "The label text to find (e.g., 'Name:', 'Email:', 'Task:')" + }, + "direction": { + "type": "string", + "enum": ["right", "below", "left", "above"], + "description": "Direction to click relative to the text (default: right)" + }, + "distance": { + "type": "integer", + "description": "Distance in pixels from the text (default: 50)" + } + }, + "required": ["app_name", "text"] + }), + }, + ] +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_core_tools_count() { + let tools = create_core_tools(); + // Should have the core tools: shell, background_process, read_file, read_image, + // write_file, str_replace, final_output, take_screenshot, extract_text, + // todo_read, todo_write, code_coverage, code_search + assert_eq!(tools.len(), 13); + } + + #[test] + fn test_webdriver_tools_count() { + let tools = create_webdriver_tools(); + // 15 webdriver tools + assert_eq!(tools.len(), 15); + } + + #[test] + fn test_macax_tools_count() { + let tools = create_macax_tools(); + // 6 macax tools + assert_eq!(tools.len(), 6); + } + + #[test] + fn test_computer_control_tools_count() { + let tools = create_computer_control_tools(); + // 3 vision tools + assert_eq!(tools.len(), 3); + } + + #[test] + fn test_create_tool_definitions_core_only() { + let config = ToolConfig::default(); + let tools = create_tool_definitions(config); + assert_eq!(tools.len(), 13); + } + + #[test] + fn test_create_tool_definitions_all_enabled() { + let config = ToolConfig::new(true, true, true); + let tools = create_tool_definitions(config); + // 13 core + 15 webdriver + 6 macax + 3 computer_control = 37 + assert_eq!(tools.len(), 37); + } + + #[test] + fn test_tool_has_required_fields() { + let tools = create_core_tools(); + for tool in tools { + assert!(!tool.name.is_empty(), "Tool name should not be empty"); + assert!(!tool.description.is_empty(), "Tool description should not be empty"); + assert!(tool.input_schema.is_object(), "Tool input_schema should be an object"); + } + } +}