diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index f4f2c81..969e854 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -19,9 +19,9 @@ mod tilde_expansion_tests; #[cfg(test)] mod error_handling_test; use anyhow::Result; +use g3_computer_control::WebDriverController; use g3_config::Config; use g3_execution::CodeExecutor; -use g3_computer_control::WebDriverController; use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry, Tool}; #[allow(unused_imports)] use regex::Regex; @@ -405,12 +405,18 @@ Format this as a detailed but concise summary that can be used to resume the con } /// Reset the context window with a summary - pub fn reset_with_summary(&mut self, summary: String, latest_user_message: Option) -> usize { + pub fn reset_with_summary( + &mut self, + summary: String, + latest_user_message: Option, + ) -> usize { // Calculate chars saved (old history minus new summary) - let old_chars: usize = self.conversation_history.iter() + let old_chars: usize = self + .conversation_history + .iter() .map(|m| m.content.len()) .sum(); - + // Clear the conversation history self.conversation_history.clear(); self.used_tokens = 0; @@ -429,8 +435,10 @@ Format this as a detailed but concise summary that can be used to resume the con content: user_msg, }); } - - let new_chars: usize = self.conversation_history.iter() + + let new_chars: usize = self + .conversation_history + .iter() .map(|m| m.content.len()) .sum(); old_chars.saturating_sub(new_chars) @@ -440,7 +448,7 @@ Format this as a detailed but concise summary that can be used to resume the con /// Triggers at 50%, 60%, 70%, and 80% thresholds pub fn should_thin(&self) -> bool { let current_percentage = self.percentage_used() as u32; - + // Check if we've crossed a new 10% threshold starting at 50% if current_percentage >= 50 { let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10% @@ -448,7 +456,7 @@ Format this as a detailed but concise summary that can be used to resume the con return true; } } - + false } @@ -457,32 +465,37 @@ Format this as a detailed but concise summary that can be used to resume the con pub fn thin_context(&mut self) -> (String, usize) { let current_percentage = self.percentage_used() as u32; let current_threshold = (current_percentage / 10) * 10; - + // Update the last thinning percentage self.last_thinning_percentage = current_threshold; - + // Calculate the first third of the conversation let total_messages = self.conversation_history.len(); let first_third_end = (total_messages / 3).max(1); - + let mut leaned_count = 0; let mut tool_call_leaned_count = 0; let mut chars_saved = 0; - + // Create ~/tmp directory if it doesn't exist let tmp_dir = shellexpand::tilde("~/tmp").to_string(); if let Err(e) = std::fs::create_dir_all(&tmp_dir) { warn!("Failed to create ~/tmp directory: {}", e); - return ("āš ļø Context thinning failed: could not create ~/tmp directory".to_string(), 0); + return ( + "āš ļø Context thinning failed: could not create ~/tmp directory".to_string(), + 0, + ); } - + // Scan the first third of messages for i in 0..first_third_end { if let Some(message) = self.conversation_history.get_mut(i) { // Process User messages that look like tool results - if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") { + if matches!(message.role, MessageRole::User) + && message.content.starts_with("Tool result:") + { let content_len = message.content.len(); - + // Only thin if the content is greater than 500 chars if content_len > 500 { // Generate a unique filename based on timestamp and index @@ -492,54 +505,59 @@ Format this as a detailed but concise summary that can be used to resume the con .as_secs(); let filename = format!("leaned_tool_result_{}_{}.txt", timestamp, i); let file_path = format!("{}/{}", tmp_dir, filename); - + // Write the content to file if let Err(e) = std::fs::write(&file_path, &message.content) { warn!("Failed to write thinned content to {}: {}", file_path, e); continue; } - + // Replace the message content with a note let original_len = message.content.len(); message.content = format!("Tool result saved to {}", file_path); - + leaned_count += 1; chars_saved += original_len - message.content.len(); - - debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path); + + debug!( + "Thinned tool result {} ({} chars) to {}", + i, original_len, file_path + ); } } - + // Process Assistant messages that contain tool calls with large arguments if matches!(message.role, MessageRole::Assistant) { // Try to parse the message content as JSON to find tool calls let content = &message.content; - + // Look for JSON tool call patterns - if let Some(tool_call_start) = content.find(r#"{"tool":"#) + if let Some(tool_call_start) = content + .find(r#"{"tool":"#) .or_else(|| content.find(r#"{ "tool":"#)) .or_else(|| content.find(r#"{"tool" :"#)) .or_else(|| content.find(r#"{ "tool" :"#)) { // Try to extract and parse the JSON tool call let json_portion = &content[tool_call_start..]; - + // Find the end of the JSON object if let Some(json_end) = Self::find_json_end(json_portion) { let json_str = &json_portion[..=json_end]; - + // Try to parse as ToolCall if let Ok(mut tool_call) = serde_json::from_str::(json_str) { let mut modified = false; - + // Handle write_file tool calls if tool_call.tool == "write_file" { if let Some(args_obj) = tool_call.args.as_object_mut() { // Extract content to avoid borrow issues - let content_info = args_obj.get("content") + let content_info = args_obj + .get("content") .and_then(|v| v.as_str()) .map(|s| (s.to_string(), s.len())); - + if let Some((content_str, content_len)) = content_info { // Only thin if content is greater than 500 chars if content_len > 500 { @@ -547,13 +565,20 @@ Format this as a detailed but concise summary that can be used to resume the con .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs(); - let filename = format!("leaned_write_file_content_{}_{}.txt", timestamp, i); + let filename = format!( + "leaned_write_file_content_{}_{}.txt", + timestamp, i + ); let file_path = format!("{}/{}", tmp_dir, filename); - - if std::fs::write(&file_path, &content_str).is_ok() { + + if std::fs::write(&file_path, &content_str).is_ok() + { args_obj.insert( "content".to_string(), - serde_json::Value::String(format!("", file_path)) + serde_json::Value::String(format!( + "", + file_path + )), ); modified = true; chars_saved += content_len; @@ -564,15 +589,16 @@ Format this as a detailed but concise summary that can be used to resume the con } } } - + // Handle str_replace tool calls if tool_call.tool == "str_replace" { if let Some(args_obj) = tool_call.args.as_object_mut() { // Extract diff to avoid borrow issues - let diff_info = args_obj.get("diff") + let diff_info = args_obj + .get("diff") .and_then(|v| v.as_str()) .map(|s| (s.to_string(), s.len())); - + if let Some((diff_str, diff_len)) = diff_info { // Only thin if diff is greater than 500 chars if diff_len > 500 { @@ -580,13 +606,19 @@ Format this as a detailed but concise summary that can be used to resume the con .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs(); - let filename = format!("leaned_str_replace_diff_{}_{}.txt", timestamp, i); + let filename = format!( + "leaned_str_replace_diff_{}_{}.txt", + timestamp, i + ); let file_path = format!("{}/{}", tmp_dir, filename); - + if std::fs::write(&file_path, &diff_str).is_ok() { args_obj.insert( "diff".to_string(), - serde_json::Value::String(format!("", file_path)) + serde_json::Value::String(format!( + "", + file_path + )), ); modified = true; chars_saved += diff_len; @@ -597,15 +629,16 @@ Format this as a detailed but concise summary that can be used to resume the con } } } - + // If we modified the tool call, reconstruct the message if modified { let prefix = &content[..tool_call_start]; let suffix = &content[tool_call_start + json_str.len()..]; - + // Serialize the modified tool call if let Ok(new_json) = serde_json::to_string(&tool_call) { - message.content = format!("{}{}{}", prefix, new_json, suffix); + message.content = + format!("{}{}{}", prefix, new_json, suffix); } } } @@ -614,27 +647,37 @@ Format this as a detailed but concise summary that can be used to resume the con } } } - + // Recalculate token usage after thinning self.recalculate_tokens(); - + if leaned_count > 0 { if tool_call_leaned_count > 0 { - (format!("šŸ„’ Context thinned at {}%: {} tool results + {} tool calls, ~{} chars saved", + (format!("šŸ„’ Context thinned at {}%: {} tool results + {} tool calls, ~{} chars saved", current_threshold, leaned_count, tool_call_leaned_count, chars_saved), chars_saved) } else { - (format!("šŸ„’ Context thinned at {}%: {} tool results, ~{} chars saved", - current_threshold, leaned_count, chars_saved), chars_saved) + ( + format!( + "šŸ„’ Context thinned at {}%: {} tool results, ~{} chars saved", + current_threshold, leaned_count, chars_saved + ), + chars_saved, + ) } } else if tool_call_leaned_count > 0 { - (format!("šŸ„’ Context thinned at {}%: {} tool calls, ~{} chars saved", - current_threshold, tool_call_leaned_count, chars_saved), chars_saved) + ( + format!( + "šŸ„’ Context thinned at {}%: {} tool calls, ~{} chars saved", + current_threshold, tool_call_leaned_count, chars_saved + ), + chars_saved, + ) } else { - (format!("ℹ Context thinning triggered at {}% but no large tool results or tool calls found in first third", + (format!("ℹ Context thinning triggered at {}% but no large tool results or tool calls found in first third", current_threshold), 0) } } - + /// Recalculate token usage based on current conversation history fn recalculate_tokens(&mut self) { let mut total = 0; @@ -642,22 +685,22 @@ Format this as a detailed but concise summary that can be used to resume the con total += Self::estimate_tokens(&message.content); } self.used_tokens = total; - + debug!("Recalculated tokens after thinning: {} tokens", total); } - + /// Helper function to find the end of a JSON object fn find_json_end(json_str: &str) -> Option { let mut brace_count = 0; let mut in_string = false; let mut escape_next = false; - + for (i, ch) in json_str.char_indices() { if escape_next { escape_next = false; continue; } - + match ch { '\\' => escape_next = true, '"' if !escape_next => in_string = !in_string, @@ -671,7 +714,7 @@ Format this as a detailed but concise summary that can be used to resume the con _ => {} } } - + None } } @@ -679,7 +722,7 @@ Format this as a detailed but concise summary that can be used to resume the con pub struct Agent { providers: ProviderRegistry, context_window: ContextWindow, - thinning_events: Vec, // chars saved per thinning event + thinning_events: Vec, // chars saved per thinning event summarization_events: Vec, // chars saved per summarization event first_token_times: Vec, // time to first token for each completion config: Config, @@ -690,9 +733,14 @@ pub struct Agent { quiet: bool, computer_controller: Option>, todo_content: std::sync::Arc>, - webdriver_session: std::sync::Arc>>>>, + webdriver_session: std::sync::Arc< + tokio::sync::RwLock< + Option>>, + >, + >, safaridriver_process: std::sync::Arc>>, - macax_controller: std::sync::Arc>>, + macax_controller: + std::sync::Arc>>, } impl Agent { @@ -923,10 +971,11 @@ impl Agent { webdriver_session: std::sync::Arc::new(tokio::sync::RwLock::new(None)), safaridriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)), macax_controller: { - std::sync::Arc::new(tokio::sync::RwLock::new( - if macax_enabled { Some(g3_computer_control::MacAxController::new()?) } - else { None } - )) + std::sync::Arc::new(tokio::sync::RwLock::new(if macax_enabled { + Some(g3_computer_control::MacAxController::new()?) + } else { + None + })) }, }) } @@ -956,9 +1005,7 @@ impl Agent { config.agent.max_context_length as u32 } } - "openai" => { - 192000 - } + "openai" => 192000, "anthropic" => { // Claude models have large context windows 200000 // Default for Claude models @@ -1102,6 +1149,13 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request: 5. Call the final_output tool with a detailed summary when done. For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\". + +IMPORTANT: Use ripgrep (`rg`) when available for searching files and code - it respects .gitignore and is fast: + - List files by name: `rg --files | rg ` + - List files that contain a regex: `rg '' -l` + - Search with context: `rg -C 3 'pattern'` + - Always prefer ripgrep to grep, find, or ls -r to avoid large outputs from hidden files + If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir. IMPORTANT: If the user asks you to just respond with text (like \"just say hello\" or \"tell me about X\"), do NOT use tools. Simply respond with the requested text directly. Only use tools when you need to execute commands or complete tasks that require action. @@ -1239,7 +1293,11 @@ Template: // Check if provider supports native tool calling and add tools if so let provider = self.providers.get(None)?; let tools = if provider.has_native_tool_calling() { - Some(Self::create_tool_definitions(self.config.webdriver.enabled, self.config.macax.enabled, self.config.computer_control.enabled)) + Some(Self::create_tool_definitions( + self.config.webdriver.enabled, + self.config.macax.enabled, + self.config.computer_control.enabled, + )) } else { None }; @@ -1401,7 +1459,7 @@ Template: /// Returns Ok(true) if summarization was successful, Ok(false) if it failed pub async fn force_summarize(&mut self) -> Result { info!("Manual summarization triggered"); - + self.ui_writer.print_context_status(&format!( "\nšŸ—œļø Manual summarization requested (current usage: {}%)...", self.context_window.percentage_used() as u32 @@ -1422,8 +1480,7 @@ Template: let summary_messages = vec![ Message { role: MessageRole::System, - content: "You are a helpful assistant that creates concise summaries." - .to_string(), + content: "You are a helpful assistant that creates concise summaries.".to_string(), }, Message { role: MessageRole::User, @@ -1476,9 +1533,8 @@ Template: // Get the summary match provider.complete(summary_request).await { Ok(summary_response) => { - self.ui_writer.print_context_status( - "āœ… Context compacted successfully.\n", - ); + self.ui_writer + .print_context_status("āœ… Context compacted successfully.\n"); // Get the latest user message to preserve it let latest_user_msg = self @@ -1490,7 +1546,8 @@ Template: .map(|m| m.content.clone()); // Reset context with summary - let chars_saved = self.context_window + let chars_saved = self + .context_window .reset_with_summary(summary_response.content, latest_user_msg); self.summarization_events.push(chars_saved); @@ -1518,37 +1575,40 @@ Template: /// Returns Ok(true) if README was found and reloaded, Ok(false) if no README was present initially pub fn reload_readme(&mut self) -> Result { info!("Manual README reload triggered"); - + // Check if the first message in conversation history is a system message with README content let has_readme = self .context_window .conversation_history .first() - .map(|m| matches!(m.role, MessageRole::System) && - (m.content.contains("Project README") || m.content.contains("Agent Configuration"))) + .map(|m| { + matches!(m.role, MessageRole::System) + && (m.content.contains("Project README") + || m.content.contains("Agent Configuration")) + }) .unwrap_or(false); - + if !has_readme { return Ok(false); } - + // Try to load README.md and AGENTS.md let mut combined_content = String::new(); let mut found_any = false; - + if let Ok(agents_content) = std::fs::read_to_string("AGENTS.md") { combined_content.push_str("# Agent Configuration\n\n"); combined_content.push_str(&agents_content); combined_content.push_str("\n\n"); found_any = true; } - + if let Ok(readme_content) = std::fs::read_to_string("README.md") { combined_content.push_str("# Project README\n\n"); combined_content.push_str(&readme_content); found_any = true; } - + if found_any { // Replace the first message with the new content if let Some(first_msg) = self.context_window.conversation_history.first_mut() { @@ -1567,69 +1627,94 @@ Template: pub fn get_stats(&self) -> String { let mut stats = String::new(); use std::time::Duration; - + stats.push_str("\nšŸ“Š Context Window Statistics\n"); stats.push_str(&"=".repeat(60)); stats.push_str("\n\n"); - + // Context window usage stats.push_str("šŸ—‚ļø Context Window:\n"); - stats.push_str(&format!(" • Used Tokens: {:>10} / {}\n", - self.context_window.used_tokens, - self.context_window.total_tokens)); - stats.push_str(&format!(" • Usage Percentage: {:>10.1}%\n", - self.context_window.percentage_used())); - stats.push_str(&format!(" • Remaining Tokens: {:>10}\n", - self.context_window.remaining_tokens())); - stats.push_str(&format!(" • Cumulative Tokens: {:>10}\n", - self.context_window.cumulative_tokens)); - stats.push_str(&format!(" • Last Thinning: {:>10}%\n", - self.context_window.last_thinning_percentage)); + stats.push_str(&format!( + " • Used Tokens: {:>10} / {}\n", + self.context_window.used_tokens, self.context_window.total_tokens + )); + stats.push_str(&format!( + " • Usage Percentage: {:>10.1}%\n", + self.context_window.percentage_used() + )); + stats.push_str(&format!( + " • Remaining Tokens: {:>10}\n", + self.context_window.remaining_tokens() + )); + stats.push_str(&format!( + " • Cumulative Tokens: {:>10}\n", + self.context_window.cumulative_tokens + )); + stats.push_str(&format!( + " • Last Thinning: {:>10}%\n", + self.context_window.last_thinning_percentage + )); stats.push('\n'); - + // Context optimization metrics stats.push_str("šŸ—œļø Context Optimization:\n"); - stats.push_str(&format!(" • Thinning Events: {:>10}\n", - self.thinning_events.len())); + stats.push_str(&format!( + " • Thinning Events: {:>10}\n", + self.thinning_events.len() + )); if !self.thinning_events.is_empty() { let total_thinned: usize = self.thinning_events.iter().sum(); let avg_thinned = total_thinned / self.thinning_events.len(); stats.push_str(&format!(" • Total Chars Saved: {:>10}\n", total_thinned)); stats.push_str(&format!(" • Avg Chars/Event: {:>10}\n", avg_thinned)); } - - stats.push_str(&format!(" • Summarizations: {:>10}\n", - self.summarization_events.len())); + + stats.push_str(&format!( + " • Summarizations: {:>10}\n", + self.summarization_events.len() + )); if !self.summarization_events.is_empty() { let total_summarized: usize = self.summarization_events.iter().sum(); let avg_summarized = total_summarized / self.summarization_events.len(); - stats.push_str(&format!(" • Total Chars Saved: {:>10}\n", total_summarized)); + stats.push_str(&format!( + " • Total Chars Saved: {:>10}\n", + total_summarized + )); stats.push_str(&format!(" • Avg Chars/Event: {:>10}\n", avg_summarized)); } stats.push('\n'); - + // Performance metrics stats.push_str("⚔ Performance:\n"); if !self.first_token_times.is_empty() { - let avg_ttft = self.first_token_times.iter().sum::() / self.first_token_times.len() as u32; + let avg_ttft = self.first_token_times.iter().sum::() + / self.first_token_times.len() as u32; let mut sorted_times = self.first_token_times.clone(); sorted_times.sort(); let median_ttft = sorted_times[sorted_times.len() / 2]; - stats.push_str(&format!(" • Avg Time to First Token: {:>6.3}s\n", avg_ttft.as_secs_f64())); - stats.push_str(&format!(" • Median Time to First Token: {:>6.3}s\n", median_ttft.as_secs_f64())); + stats.push_str(&format!( + " • Avg Time to First Token: {:>6.3}s\n", + avg_ttft.as_secs_f64() + )); + stats.push_str(&format!( + " • Median Time to First Token: {:>6.3}s\n", + median_ttft.as_secs_f64() + )); } stats.push('\n'); - + // Conversation history stats.push_str("šŸ’¬ Conversation History:\n"); - stats.push_str(&format!(" • Total Messages: {:>10}\n", - self.context_window.conversation_history.len())); - + stats.push_str(&format!( + " • Total Messages: {:>10}\n", + self.context_window.conversation_history.len() + )); + // Count messages by role let mut system_count = 0; let mut user_count = 0; let mut assistant_count = 0; - + for msg in &self.context_window.conversation_history { match msg.role { MessageRole::System => system_count += 1, @@ -1637,48 +1722,64 @@ Template: MessageRole::Assistant => assistant_count += 1, } } - + stats.push_str(&format!(" • System Messages: {:>10}\n", system_count)); stats.push_str(&format!(" • User Messages: {:>10}\n", user_count)); - stats.push_str(&format!(" • Assistant Messages:{:>10}\n", assistant_count)); + stats.push_str(&format!( + " • Assistant Messages:{:>10}\n", + assistant_count + )); stats.push('\n'); - + // Tool call metrics stats.push_str("šŸ”§ Tool Call Metrics:\n"); - stats.push_str(&format!(" • Total Tool Calls: {:>10}\n", - self.tool_call_metrics.len())); - - let successful_calls = self.tool_call_metrics.iter() + stats.push_str(&format!( + " • Total Tool Calls: {:>10}\n", + self.tool_call_metrics.len() + )); + + let successful_calls = self + .tool_call_metrics + .iter() .filter(|(_, _, success)| *success) .count(); let failed_calls = self.tool_call_metrics.len() - successful_calls; - - stats.push_str(&format!(" • Successful: {:>10}\n", successful_calls)); + + stats.push_str(&format!( + " • Successful: {:>10}\n", + successful_calls + )); stats.push_str(&format!(" • Failed: {:>10}\n", failed_calls)); - + if !self.tool_call_metrics.is_empty() { - let total_duration: Duration = self.tool_call_metrics.iter() + let total_duration: Duration = self + .tool_call_metrics + .iter() .map(|(_, duration, _)| *duration) .sum(); let avg_duration = total_duration / self.tool_call_metrics.len() as u32; - - stats.push_str(&format!(" • Total Duration: {:>10.2}s\n", - total_duration.as_secs_f64())); - stats.push_str(&format!(" • Average Duration: {:>10.2}s\n", - avg_duration.as_secs_f64())); + + stats.push_str(&format!( + " • Total Duration: {:>10.2}s\n", + total_duration.as_secs_f64() + )); + stats.push_str(&format!( + " • Average Duration: {:>10.2}s\n", + avg_duration.as_secs_f64() + )); } stats.push('\n'); - + // Provider info stats.push_str("šŸ”Œ Provider:\n"); if let Ok((provider, model)) = self.get_provider_info() { stats.push_str(&format!(" • Provider: {}\n", provider)); stats.push_str(&format!(" • Model: {}\n", model)); } - + stats.push_str(&"=".repeat(60)); stats.push('\n'); - + stats } @@ -1700,7 +1801,11 @@ Template: } /// Create tool definitions for native tool calling providers - fn create_tool_definitions(enable_webdriver: bool, enable_macax: bool, enable_computer_control: bool) -> Vec { + fn create_tool_definitions( + enable_webdriver: bool, + enable_macax: bool, + enable_computer_control: bool, + ) -> Vec { let mut tools = vec![ Tool { name: "shell".to_string(), @@ -2106,7 +2211,7 @@ Template: }), }, ]); - + // Add type_text tool for typing arbitrary text tools.push(Tool { name: "macax_type_text".to_string(), @@ -2126,9 +2231,8 @@ Template: "required": ["app_name", "text"] }), }); - } - + // Add extract_text_with_boxes tool (requires macax flag) if enable_macax { tools.push(Tool { @@ -2150,7 +2254,7 @@ Template: }), }); } - + // Add vision-guided tools (requires computer control) if enable_computer_control { // Add vision-guided tools @@ -2172,7 +2276,7 @@ Template: "required": ["app_name", "text"] }), }); - + tools.push(Tool { name: "vision_click_text".to_string(), description: "Find text in a specific application window and click on it (useful for clicking buttons, links, menu items)".to_string(), @@ -2191,7 +2295,7 @@ Template: "required": ["app_name", "text"] }), }); - + tools.push(Tool { name: "vision_click_near_text".to_string(), description: "Find text in a specific application window and click near it (useful for clicking text fields next to labels)".to_string(), @@ -2220,7 +2324,7 @@ Template: }), }); } - + tools } @@ -2384,9 +2488,8 @@ Template: // Get the summary match provider.complete(summary_request).await { Ok(summary_response) => { - self.ui_writer.print_context_status( - "āœ… Context compacted successfully. Continuing...\n", - ); + self.ui_writer + .print_context_status("āœ… Context compacted successfully. Continuing...\n"); // Extract the latest user message from the request let latest_user_msg = request @@ -2397,13 +2500,14 @@ Template: .map(|m| m.content.clone()); // Reset context with summary - let chars_saved = self.context_window + let chars_saved = self + .context_window .reset_with_summary(summary_response.content, latest_user_msg); self.summarization_events.push(chars_saved); // Update the request with new context request.messages = self.context_window.conversation_history.clone(); - } + } Err(e) => { error!("Failed to create summary: {}", e); self.ui_writer.print_context_status("āš ļø Unable to create summary. Consider starting a new session if you continue to see errors.\n"); @@ -2549,10 +2653,11 @@ Template: // Handle completed tool calls if let Some(tool_call) = completed_tools.into_iter().next() { debug!("Processing completed tool call: {:?}", tool_call); - + // Check if we should thin the context BEFORE executing the tool if self.context_window.should_thin() { - let (thin_summary, chars_saved) = self.context_window.thin_context(); + let (thin_summary, chars_saved) = + self.context_window.thin_context(); self.thinning_events.push(chars_saved); // Print the thinning summary to the user self.ui_writer.print_context_thinning(&thin_summary); @@ -2679,39 +2784,46 @@ Template: // Check if UI wants full output (machine mode) or truncated (human mode) let wants_full = self.ui_writer.wants_full_output(); - + // Helper function to safely truncate strings at character boundaries - let truncate_line = |line: &str, max_width: usize, truncate: bool| -> String { - if !truncate { - // Machine mode - return full line - line.to_string() - } else if line.chars().count() <= max_width { - // Human mode - line fits within limit - line.to_string() - } else { - // Human mode - truncate long line - let truncated: String = line - .chars() - .take(max_width.saturating_sub(3)) - .collect(); - format!("{}...", truncated) - } - }; + let truncate_line = + |line: &str, max_width: usize, truncate: bool| -> String { + if !truncate { + // Machine mode - return full line + line.to_string() + } else if line.chars().count() <= max_width { + // Human mode - line fits within limit + line.to_string() + } else { + // Human mode - truncate long line + let truncated: String = line + .chars() + .take(max_width.saturating_sub(3)) + .collect(); + format!("{}...", truncated) + } + }; const MAX_LINES: usize = 5; const MAX_LINE_WIDTH: usize = 80; let output_len = output_lines.len(); - + // For todo tools, show all lines without truncation - let is_todo_tool = tool_call.tool == "todo_read" || tool_call.tool == "todo_write"; - let max_lines_to_show = if is_todo_tool || wants_full { output_len } else { MAX_LINES }; + let is_todo_tool = + tool_call.tool == "todo_read" || tool_call.tool == "todo_write"; + let max_lines_to_show = if is_todo_tool || wants_full { + output_len + } else { + MAX_LINES + }; for (idx, line) in output_lines.iter().enumerate() { if !is_todo_tool && !wants_full && idx >= max_lines_to_show { break; } // Clip line to max width - let clipped_line = truncate_line(line, MAX_LINE_WIDTH, !wants_full); + let clipped_line = + truncate_line(line, MAX_LINE_WIDTH, !wants_full); self.ui_writer.update_tool_output_line(&clipped_line); } @@ -2760,7 +2872,9 @@ Template: // Add the tool call and result to the context window using RAW unfiltered content // This ensures the log file contains the true raw content including JSON tool calls - let tool_message = if !full_response.contains(final_display_content) && !raw_content_for_log.trim().is_empty() { + let tool_message = if !full_response.contains(final_display_content) + && !raw_content_for_log.trim().is_empty() + { Message { role: MessageRole::Assistant, content: format!( @@ -2793,7 +2907,11 @@ Template: // Ensure tools are included for native providers in subsequent iterations if provider.has_native_tool_calling() { - request.tools = Some(Self::create_tool_definitions(self.config.webdriver.enabled, self.config.macax.enabled, self.config.computer_control.enabled)); + request.tools = Some(Self::create_tool_definitions( + self.config.webdriver.enabled, + self.config.macax.enabled, + self.config.computer_control.enabled, + )); } // Only add to full_response if we haven't already added it @@ -3097,7 +3215,7 @@ Template: .replace("", "") .replace("[/INST]", "") .replace("<>", ""); - + if !raw_clean.trim().is_empty() { let assistant_message = Message { role: MessageRole::Assistant, @@ -3629,7 +3747,10 @@ Template: .unwrap_or(0) as i32, }); - match controller.take_screenshot(path, region, Some(window_id)).await { + match controller + .take_screenshot(path, region, Some(window_id)) + .await + { Ok(_) => { // Get the actual path where the screenshot was saved let actual_path = if path.starts_with('/') { @@ -3656,10 +3777,12 @@ Template: } "extract_text" => { if let Some(controller) = &self.computer_controller { - let path = tool_call.args.get("path") + let path = tool_call + .args + .get("path") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing path argument"))?; - + // Extract text from image file only match controller.extract_text_from_image(path).await { Ok(text) => Ok(format!("āœ… Extracted text:\n{}", text)), @@ -3689,7 +3812,10 @@ Template: .unwrap_or(50_000); if max_chars > 0 && char_count > max_chars { - return Ok(format!("āŒ TODO list too large: {} chars (max: {})", char_count, max_chars)); + return Ok(format!( + "āŒ TODO list too large: {} chars (max: {})", + char_count, max_chars + )); } let mut todo = self.todo_content.write().await; @@ -3704,11 +3830,13 @@ Template: } "webdriver_start" => { debug!("Processing webdriver_start tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + // Check if session already exists let session_guard = self.webdriver_session.read().await; if session_guard.is_some() { @@ -3716,71 +3844,78 @@ Template: return Ok("āœ… WebDriver session already active".to_string()); } drop(session_guard); - + // Note: Safari Remote Automation must be enabled before using WebDriver. // Run this once: safaridriver --enable // Or enable manually: Safari → Develop → Allow Remote Automation - + // Start safaridriver process let port = self.config.webdriver.safari_port; info!("Starting safaridriver on port {}", port); - + let safaridriver_result = tokio::process::Command::new("safaridriver") .arg("--port") .arg(port.to_string()) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .spawn(); - + let mut safaridriver_process = match safaridriver_result { Ok(process) => process, Err(e) => { return Ok(format!("āŒ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.", e)); } }; - + // Wait for safaridriver to start up info!("Waiting for safaridriver to start..."); tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; - + // Connect to SafariDriver match g3_computer_control::SafariDriver::with_port(port).await { Ok(driver) => { let session = std::sync::Arc::new(tokio::sync::Mutex::new(driver)); *self.webdriver_session.write().await = Some(session); - + // Store the process handle *self.safaridriver_process.write().await = Some(safaridriver_process); - + info!("WebDriver session started successfully"); Ok("āœ… WebDriver session started successfully! Safari should open automatically.".to_string()) } Err(e) => { // Kill the safaridriver process if connection failed let _ = safaridriver_process.kill().await; - + Ok(format!("āŒ Failed to connect to SafariDriver: {}\n\nThis might be because:\n - Safari Remote Automation is not enabled (run: safaridriver --enable)\n - Port {} is already in use\n - Safari failed to start\n - Network connectivity issue\n\nTo enable Remote Automation:\n 1. Run: safaridriver --enable (requires password, one-time setup)\n 2. Or manually: Safari → Develop → Allow Remote Automation", e, port)) } } } "webdriver_navigate" => { debug!("Processing webdriver_navigate tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; drop(session_guard); let url = match tool_call.args.get("url").and_then(|v| v.as_str()) { Some(u) => u, None => return Ok("āŒ Missing url argument".to_string()), }; - + let mut driver = session.lock().await; match driver.navigate(url).await { Ok(_) => Ok(format!("āœ… Navigated to {}", url)), @@ -3789,17 +3924,24 @@ Template: } "webdriver_get_url" => { debug!("Processing webdriver_get_url tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let driver = session.lock().await; match driver.current_url().await { Ok(url) => Ok(format!("Current URL: {}", url)), @@ -3808,17 +3950,24 @@ Template: } "webdriver_get_title" => { debug!("Processing webdriver_get_title tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let driver = session.lock().await; match driver.title().await { Ok(title) => Ok(format!("Page title: {}", title)), @@ -3827,51 +3976,63 @@ Template: } "webdriver_find_element" => { debug!("Processing webdriver_find_element tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { Some(s) => s, None => return Ok("āŒ Missing selector argument".to_string()), }; - + let mut driver = session.lock().await; match driver.find_element(selector).await { - Ok(elem) => { - match elem.text().await { - Ok(text) => Ok(format!("Element text: {}", text)), - Err(e) => Ok(format!("āŒ Failed to get element text: {}", e)), - } - } + Ok(elem) => match elem.text().await { + Ok(text) => Ok(format!("Element text: {}", text)), + Err(e) => Ok(format!("āŒ Failed to get element text: {}", e)), + }, Err(e) => Ok(format!("āŒ Failed to find element '{}': {}", selector, e)), } } "webdriver_find_elements" => { debug!("Processing webdriver_find_elements tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { Some(s) => s, None => return Ok("āŒ Missing selector argument".to_string()), }; - + let mut driver = session.lock().await; match driver.find_elements(selector).await { Ok(elements) => { @@ -3882,67 +4043,85 @@ Template: Err(_) => results.push(format!("[{}]: ", i)), } } - Ok(format!("Found {} elements:\n{}", results.len(), results.join("\n"))) + Ok(format!( + "Found {} elements:\n{}", + results.len(), + results.join("\n") + )) } Err(e) => Ok(format!("āŒ Failed to find elements '{}': {}", selector, e)), } } "webdriver_click" => { debug!("Processing webdriver_click tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { Some(s) => s, None => return Ok("āŒ Missing selector argument".to_string()), }; - + let mut driver = session.lock().await; match driver.find_element(selector).await { - Ok(mut elem) => { - match elem.click().await { - Ok(_) => Ok(format!("āœ… Clicked element '{}'", selector)), - Err(e) => Ok(format!("āŒ Failed to click element: {}", e)), - } - } + Ok(mut elem) => match elem.click().await { + Ok(_) => Ok(format!("āœ… Clicked element '{}'", selector)), + Err(e) => Ok(format!("āŒ Failed to click element: {}", e)), + }, Err(e) => Ok(format!("āŒ Failed to find element '{}': {}", selector, e)), } } "webdriver_send_keys" => { debug!("Processing webdriver_send_keys tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { Some(s) => s, None => return Ok("āŒ Missing selector argument".to_string()), }; - + let text = match tool_call.args.get("text").and_then(|v| v.as_str()) { Some(t) => t, None => return Ok("āŒ Missing text argument".to_string()), }; - - let clear_first = tool_call.args.get("clear_first") + + let clear_first = tool_call + .args + .get("clear_first") .and_then(|v| v.as_bool()) .unwrap_or(true); - + let mut driver = session.lock().await; match driver.find_element(selector).await { Ok(mut elem) => { @@ -3961,22 +4140,29 @@ Template: } "webdriver_execute_script" => { debug!("Processing webdriver_execute_script tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let script = match tool_call.args.get("script").and_then(|v| v.as_str()) { Some(s) => s, None => return Ok("āŒ Missing script argument".to_string()), }; - + let mut driver = session.lock().await; match driver.execute_script(script, vec![]).await { Ok(result) => Ok(format!("Script result: {:?}", result)), @@ -3985,23 +4171,34 @@ Template: } "webdriver_get_page_source" => { debug!("Processing webdriver_get_page_source tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let driver = session.lock().await; match driver.page_source().await { Ok(source) => { // Truncate if too long if source.len() > 10000 { - Ok(format!("Page source ({} chars, truncated to 10000):\n{}...", source.len(), &source[..10000])) + Ok(format!( + "Page source ({} chars, truncated to 10000):\n{}...", + source.len(), + &source[..10000] + )) } else { Ok(format!("Page source ({} chars):\n{}", source.len(), source)) } @@ -4011,22 +4208,29 @@ Template: } "webdriver_screenshot" => { debug!("Processing webdriver_screenshot tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let path = match tool_call.args.get("path").and_then(|v| v.as_str()) { Some(p) => p, None => return Ok("āŒ Missing path argument".to_string()), }; - + let mut driver = session.lock().await; match driver.screenshot(path).await { Ok(_) => Ok(format!("āœ… Screenshot saved to {}", path)), @@ -4035,17 +4239,24 @@ Template: } "webdriver_back" => { debug!("Processing webdriver_back tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let mut driver = session.lock().await; match driver.back().await { Ok(_) => Ok("āœ… Navigated back".to_string()), @@ -4054,17 +4265,24 @@ Template: } "webdriver_forward" => { debug!("Processing webdriver_forward tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let mut driver = session.lock().await; match driver.forward().await { Ok(_) => Ok("āœ… Navigated forward".to_string()), @@ -4073,17 +4291,24 @@ Template: } "webdriver_refresh" => { debug!("Processing webdriver_refresh tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), - None => return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()), + None => { + return Ok( + "āŒ No active WebDriver session. Call webdriver_start first." + .to_string(), + ) + } }; - + let mut driver = session.lock().await; match driver.refresh().await { Ok(_) => Ok("āœ… Page refreshed".to_string()), @@ -4092,17 +4317,19 @@ Template: } "webdriver_quit" => { debug!("Processing webdriver_quit tool call"); - + if !self.config.webdriver.enabled { - return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + return Ok( + "āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string(), + ); } - + // Take the session let session = match self.webdriver_session.write().await.take() { Some(s) => s.clone(), None => return Ok("āŒ No active WebDriver session.".to_string()), }; - + // Quit the WebDriver session match std::sync::Arc::try_unwrap(session) { Ok(mutex) => { @@ -4110,17 +4337,20 @@ Template: match driver.quit().await { Ok(_) => { info!("WebDriver session closed successfully"); - + // Kill the safaridriver process - if let Some(mut process) = self.safaridriver_process.write().await.take() { + if let Some(mut process) = + self.safaridriver_process.write().await.take() + { if let Err(e) = process.kill().await { warn!("Failed to kill safaridriver process: {}", e); } else { info!("Safaridriver process terminated"); } } - - Ok("āœ… WebDriver session closed and safaridriver stopped".to_string()) + + Ok("āœ… WebDriver session closed and safaridriver stopped" + .to_string()) } Err(e) => Ok(format!("āŒ Failed to quit WebDriver: {}", e)), } @@ -4130,17 +4360,22 @@ Template: } "macax_list_apps" => { debug!("Processing macax_list_apps tool call"); - + if !self.config.macax.enabled { - return Ok("āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable." + .to_string(), + ); } - + let controller_guard = self.macax_controller.read().await; let controller = match controller_guard.as_ref() { Some(c) => c, - None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + None => { + return Ok("āŒ macOS Accessibility controller not initialized.".to_string()) + } }; - + match controller.list_applications() { Ok(apps) => { let app_list: Vec = apps.iter().map(|a| a.name.clone()).collect(); @@ -4151,17 +4386,22 @@ Template: } "macax_get_frontmost_app" => { debug!("Processing macax_get_frontmost_app tool call"); - + if !self.config.macax.enabled { - return Ok("āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable." + .to_string(), + ); } - + let controller_guard = self.macax_controller.read().await; let controller = match controller_guard.as_ref() { Some(c) => c, - None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + None => { + return Ok("āŒ macOS Accessibility controller not initialized.".to_string()) + } }; - + match controller.get_frontmost_app() { Ok(app) => Ok(format!("Frontmost application: {}", app.name)), Err(e) => Ok(format!("āŒ Failed to get frontmost app: {}", e)), @@ -4169,22 +4409,27 @@ Template: } "macax_activate_app" => { debug!("Processing macax_activate_app tool call"); - + if !self.config.macax.enabled { - return Ok("āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable." + .to_string(), + ); } - + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { Some(n) => n, None => return Ok("āŒ Missing app_name argument".to_string()), }; - + let controller_guard = self.macax_controller.read().await; let controller = match controller_guard.as_ref() { Some(c) => c, - None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + None => { + return Ok("āŒ macOS Accessibility controller not initialized.".to_string()) + } }; - + match controller.activate_app(app_name) { Ok(_) => Ok(format!("āœ… Activated application: {}", app_name)), Err(e) => Ok(format!("āŒ Failed to activate app: {}", e)), @@ -4192,34 +4437,39 @@ Template: } "macax_press_key" => { debug!("Processing macax_press_key tool call"); - + if !self.config.macax.enabled { - return Ok("āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable." + .to_string(), + ); } - + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { Some(n) => n, None => return Ok("āŒ Missing app_name argument".to_string()), }; - + let key = match tool_call.args.get("key").and_then(|v| v.as_str()) { Some(k) => k, None => return Ok("āŒ Missing key argument".to_string()), }; - - let modifiers_vec: Vec<&str> = tool_call.args.get("modifiers") + + let modifiers_vec: Vec<&str> = tool_call + .args + .get("modifiers") .and_then(|v| v.as_array()) - .map(|arr| arr.iter() - .filter_map(|v| v.as_str()) - .collect()) + .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect()) .unwrap_or_default(); - + let controller_guard = self.macax_controller.read().await; let controller = match controller_guard.as_ref() { Some(c) => c, - None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + None => { + return Ok("āŒ macOS Accessibility controller not initialized.".to_string()) + } }; - + match controller.press_key(app_name, key, modifiers_vec.clone()) { Ok(_) => { let modifier_str = if modifiers_vec.is_empty() { @@ -4234,27 +4484,32 @@ Template: } "macax_type_text" => { debug!("Processing macax_type_text tool call"); - + if !self.config.macax.enabled { - return Ok("āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable." + .to_string(), + ); } - + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { Some(n) => n, None => return Ok("āŒ Missing app_name argument".to_string()), }; - + let text = match tool_call.args.get("text").and_then(|v| v.as_str()) { Some(t) => t, None => return Ok("āŒ Missing text argument".to_string()), }; - + let controller_guard = self.macax_controller.read().await; let controller = match controller_guard.as_ref() { Some(c) => c, - None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + None => { + return Ok("āŒ macOS Accessibility controller not initialized.".to_string()) + } }; - + match controller.type_text(app_name, text) { Ok(_) => Ok(format!("āœ… Typed text into {}", app_name)), Err(e) => Ok(format!("āŒ Failed to type text: {}", e)), @@ -4262,16 +4517,20 @@ Template: } "vision_find_text" => { debug!("Processing vision_find_text tool call"); - + if let Some(controller) = &self.computer_controller { - let app_name = tool_call.args.get("app_name") + let app_name = tool_call + .args + .get("app_name") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?; - - let text = tool_call.args.get("text") + + let text = tool_call + .args + .get("text") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?; - + match controller.find_text_in_app(app_name, text).await { Ok(Some(location)) => { Ok(format!( @@ -4289,16 +4548,20 @@ Template: } "vision_click_text" => { debug!("Processing vision_click_text tool call"); - + if let Some(controller) = &self.computer_controller { - let app_name = tool_call.args.get("app_name") + let app_name = tool_call + .args + .get("app_name") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?; - - let text = tool_call.args.get("text") + + let text = tool_call + .args + .get("text") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?; - + match controller.find_text_in_app(app_name, text).await { Ok(Some(location)) => { // Click on center of text @@ -4307,29 +4570,35 @@ Template: // location.y is the TOP edge of the bounding box (highest Y value in NSScreen space) // location.width and location.height are already scaled to screen space // To get center: we need to add half the SCALED width and subtract half the SCALED height - + if location.width == 0 || location.height == 0 { - return Ok(format!("āŒ Invalid bounding box dimensions: width={}, height={}", location.width, location.height)); + return Ok(format!( + "āŒ Invalid bounding box dimensions: width={}, height={}", + location.width, location.height + )); } - + debug!("[vision_click_text] Location from find_text_in_app: x={}, y={}, width={}, height={}, text='{}'", location.x, location.y, location.width, location.height, location.text); - + // Calculate center using the SCALED dimensions // X: Use right edge instead of center (Vision OCR bounding box seems offset) // This gives us: left edge + full width = right edge // Y: top edge - half of scaled height (subtract because Y increases upward) - let click_x = location.x + location.width; // Right edge + let click_x = location.x + location.width; // Right edge let half_height = location.height / 2; let click_y = location.y - half_height; - + debug!("[vision_click_text] Click position calculation: x={} + {} = {} (right edge), y={} - {} = {}", location.x, location.width, click_x, location.y, half_height, click_y); debug!("[vision_click_text] This means: left_edge={}, center={}, right_edge={}", location.x, click_x, location.x + location.width); - + match controller.click_at(click_x, click_y, Some(app_name)) { - Ok(_) => Ok(format!("āœ… Clicked on '{}' in {} at ({}, {})", text, app_name, click_x, click_y)), + Ok(_) => Ok(format!( + "āœ… Clicked on '{}' in {} at ({}, {})", + text, app_name, click_x, click_y + )), Err(e) => Ok(format!("āŒ Failed to click: {}", e)), } } @@ -4342,27 +4611,38 @@ Template: } "extract_text_with_boxes" => { debug!("Processing extract_text_with_boxes tool call"); - + if !self.config.macax.enabled { - return Ok("āŒ extract_text_with_boxes requires --macax flag to be enabled".to_string()); + return Ok( + "āŒ extract_text_with_boxes requires --macax flag to be enabled" + .to_string(), + ); } - + if let Some(controller) = &self.computer_controller { - let path = tool_call.args.get("path") + let path = tool_call + .args + .get("path") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing path parameter"))?; - + // Optional: take screenshot of app first - let final_path = if let Some(app_name) = tool_call.args.get("app_name").and_then(|v| v.as_str()) { - let temp_path = format!("/tmp/g3_extract_boxes_{}.png", uuid::Uuid::new_v4()); - match controller.take_screenshot(&temp_path, None, Some(app_name)).await { + let final_path = if let Some(app_name) = + tool_call.args.get("app_name").and_then(|v| v.as_str()) + { + let temp_path = + format!("/tmp/g3_extract_boxes_{}.png", uuid::Uuid::new_v4()); + match controller + .take_screenshot(&temp_path, None, Some(app_name)) + .await + { Ok(_) => temp_path, Err(e) => return Ok(format!("āŒ Failed to take screenshot: {}", e)), } } else { path.to_string() }; - + // Extract text with locations match controller.extract_text_with_locations(&final_path).await { Ok(locations) => { @@ -4370,10 +4650,14 @@ Template: if final_path != path { let _ = std::fs::remove_file(&final_path); } - + // Return as JSON match serde_json::to_string_pretty(&locations) { - Ok(json) => Ok(format!("āœ… Extracted {} text elements:\n{}", locations.len(), json)), + Ok(json) => Ok(format!( + "āœ… Extracted {} text elements:\n{}", + locations.len(), + json + )), Err(e) => Ok(format!("āŒ Failed to serialize results: {}", e)), } } @@ -4385,37 +4669,61 @@ Template: } "vision_click_near_text" => { debug!("Processing vision_click_near_text tool call"); - + if let Some(controller) = &self.computer_controller { - let app_name = tool_call.args.get("app_name") + let app_name = tool_call + .args + .get("app_name") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?; - - let text = tool_call.args.get("text") + + let text = tool_call + .args + .get("text") .and_then(|v| v.as_str()) .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?; - - let direction = tool_call.args.get("direction") + + let direction = tool_call + .args + .get("direction") .and_then(|v| v.as_str()) .unwrap_or("right"); - - let distance = tool_call.args.get("distance") + + let distance = tool_call + .args + .get("distance") .and_then(|v| v.as_i64()) .unwrap_or(50) as i32; - + match controller.find_text_in_app(app_name, text).await { Ok(Some(location)) => { // Calculate click position based on direction // location.x is LEFT edge, location.y is TOP edge (in NSScreen space) let (click_x, click_y) = match direction { - "right" => (location.x + location.width + distance, location.y - (location.height / 2)), - "below" => (location.x + (location.width / 2), location.y - location.height - distance), - "left" => (location.x - distance, location.y - (location.height / 2)), - "above" => (location.x + (location.width / 2), location.y + distance), - _ => (location.x + location.width + distance, location.y - (location.height / 2)), + "right" => ( + location.x + location.width + distance, + location.y - (location.height / 2), + ), + "below" => ( + location.x + (location.width / 2), + location.y - location.height - distance, + ), + "left" => { + (location.x - distance, location.y - (location.height / 2)) + } + "above" => { + (location.x + (location.width / 2), location.y + distance) + } + _ => ( + location.x + location.width + distance, + location.y - (location.height / 2), + ), }; - debug!("[vision_click_near_text] Clicking {} of text at ({}, {})", direction, click_x, click_y); - + debug!( + "[vision_click_near_text] Clicking {} of text at ({}, {})", + direction, click_x, click_y + ); + match controller.click_at(click_x, click_y, Some(app_name)) { Ok(_) => Ok(format!( "āœ… Clicked {} of '{}' in {} at ({}, {})", @@ -4895,7 +5203,7 @@ impl Drop for Agent { .arg("-9") .arg(process.id().unwrap_or(0).to_string()) .output(); - + debug!("Attempted to clean up safaridriver process on Agent drop"); } }