From 595ad6ad218643bd9475f8eb93c42f5cd76755b4 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Sat, 3 Jan 2026 14:50:08 +1100 Subject: [PATCH] agent mode resumption --- crates/g3-cli/src/lib.rs | 37 +- crates/g3-core/src/lib.rs | 37 +- crates/g3-core/src/session_continuation.rs | 112 +++ crates/g3-core/src/tools/executor.rs | 54 ++ crates/g3-core/src/tools/file_ops.rs | 510 +++++++++++++ crates/g3-core/src/tools/macax.rs | 178 +++++ crates/g3-core/src/tools/misc.rs | 225 ++++++ crates/g3-core/src/tools/mod.rs | 22 + crates/g3-core/src/tools/shell.rs | 115 +++ crates/g3-core/src/tools/todo.rs | 195 +++++ crates/g3-core/src/tools/vision.rs | 275 +++++++ crates/g3-core/src/tools/webdriver.rs | 678 ++++++++++++++++++ .../tests/test_preflight_max_tokens.rs | 36 + .../tests/test_session_continuation.rs | 129 +++- 14 files changed, 2584 insertions(+), 19 deletions(-) create mode 100644 crates/g3-core/src/tools/executor.rs create mode 100644 crates/g3-core/src/tools/file_ops.rs create mode 100644 crates/g3-core/src/tools/macax.rs create mode 100644 crates/g3-core/src/tools/misc.rs create mode 100644 crates/g3-core/src/tools/mod.rs create mode 100644 crates/g3-core/src/tools/shell.rs create mode 100644 crates/g3-core/src/tools/todo.rs create mode 100644 crates/g3-core/src/tools/vision.rs create mode 100644 crates/g3-core/src/tools/webdriver.rs diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index e27667b..9212c51 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -666,6 +666,7 @@ async fn run_agent_mode( _quiet: bool, ) -> Result<()> { use g3_core::get_agent_system_prompt; + use g3_core::find_incomplete_agent_session; // Initialize logging use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter}; @@ -684,6 +685,36 @@ async fn run_agent_mode( // Determine workspace directory (current dir if not specified) let workspace_dir = workspace.unwrap_or_else(|| std::env::current_dir().unwrap_or_default()); + // Change to the workspace directory first so session scanning works correctly + std::env::set_current_dir(&workspace_dir)?; + + // Check for incomplete agent sessions before starting a new one + if let Ok(Some(incomplete_session)) = find_incomplete_agent_session(agent_name) { + output.print(&format!( + "\nšŸ”„ Found incomplete session for agent '{}'", + agent_name + )); + output.print(&format!( + " Session: {}", + incomplete_session.session_id + )); + output.print(&format!( + " Created: {}", + incomplete_session.created_at + )); + if let Some(ref todo) = incomplete_session.todo_snapshot { + // Show first few lines of TODO + let preview: String = todo.lines().take(5).collect::>().join("\n"); + output.print(&format!(" TODO preview:\n{}", preview)); + } + output.print(""); + output.print(" Resuming incomplete session..."); + output.print(""); + + // TODO: Actually resume the session - for now we just notify and continue + // In a future iteration, we could restore the context and continue + } + // Load agent prompt from agents/.md let agent_prompt_path = workspace_dir.join("agents").join(format!("{}.md", agent_name)); @@ -720,9 +751,6 @@ async fn run_agent_mode( output.print(&format!("šŸ¤– Running as agent: {}", agent_name)); output.print(&format!("šŸ“ Working directory: {:?}", workspace_dir)); - // Change to the workspace directory so all file operations happen there - std::env::set_current_dir(&workspace_dir)?; - // Load config let config = g3_config::Config::load(config_path)?; @@ -744,6 +772,9 @@ async fn run_agent_mode( readme_for_prompt, ).await?; + // Set agent mode for session tracking + agent.set_agent_mode(agent_name); + // The agent prompt should contain instructions to start working immediately // Send an initial message to trigger the agent let initial_task = "Begin your analysis and work on the current project. Follow your mission and workflow as specified in your instructions."; diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 17ad7d3..2b65637 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -10,6 +10,7 @@ pub mod session_continuation; pub mod streaming_parser; pub mod task_result; pub mod tool_definitions; +pub mod tools; pub mod ui_writer; pub mod utils; pub mod webdriver_session; @@ -17,7 +18,7 @@ pub mod webdriver_session; pub use task_result::TaskResult; pub use retry::{RetryConfig, RetryResult, execute_with_retry, retry_operation}; pub use feedback_extraction::{ExtractedFeedback, FeedbackSource, FeedbackExtractionConfig, extract_coach_feedback}; -pub use session_continuation::{SessionContinuation, load_continuation, save_continuation, clear_continuation, has_valid_continuation, get_session_dir, load_context_from_session_log}; +pub use session_continuation::{SessionContinuation, load_continuation, save_continuation, clear_continuation, has_valid_continuation, get_session_dir, load_context_from_session_log, find_incomplete_agent_session}; // Re-export context window types pub use context_window::{ContextWindow, ThinScope}; @@ -117,9 +118,17 @@ pub struct Agent { background_process_manager: std::sync::Arc, /// Pending images to attach to the next user message pending_images: Vec, + /// Whether this agent is running in agent mode (--agent flag) + is_agent_mode: bool, + /// Name of the agent if running in agent mode (e.g., "fowler", "pike") + agent_name: Option, } impl Agent { + /// Minimum tokens for summary requests to avoid API errors when context is nearly full. + /// This ensures max_tokens is never 0 even when context usage is 90%+. + const SUMMARY_MIN_TOKENS: u32 = 1000; + pub async fn new(config: Config, ui_writer: W) -> Result { Self::new_with_mode(config, ui_writer, false, false).await } @@ -418,6 +427,8 @@ impl Agent { paths::get_logs_dir().join("background_processes") )), pending_images: Vec::new(), + is_agent_mode: false, + agent_name: None, }) } @@ -606,6 +617,9 @@ impl Agent { /// Calculate max_tokens for a summary request, ensuring it satisfies the thinking constraint. /// Applies fallback sequence: thinnify -> skinnify -> hard-coded minimum /// Returns (max_tokens, whether_fallback_was_used) + /// + /// IMPORTANT: Always returns at least SUMMARY_MIN_TOKENS to avoid API errors + /// when context is nearly full (90%+). fn calculate_summary_max_tokens( &mut self, provider_name: &str, @@ -621,7 +635,10 @@ impl Agent { let available = model_limit .saturating_sub(current_usage) .saturating_sub(buffer); - // Use the smaller of available tokens or configured max_tokens, + // Ensure we have at least a minimum floor for summary requests + // This prevents max_tokens=0 errors when context is 90%+ full + let available = available.max(Self::SUMMARY_MIN_TOKENS); + // Use the smaller of available tokens (with floor) or configured max_tokens, // but ensure we don't go below thinking budget floor for Anthropic let proposed_max_tokens = available.min(configured_max_tokens); let proposed_max_tokens = if provider_name == "anthropic" { @@ -1554,6 +1571,9 @@ impl Agent { _ => summary_max_tokens.min(5000), }; + // Ensure minimum floor as defense-in-depth (primary protection is in calculate_summary_max_tokens) + summary_max_tokens = summary_max_tokens.max(Self::SUMMARY_MIN_TOKENS); + debug!( "Requesting summary with max_tokens: {} (current usage: {} tokens)", summary_max_tokens, self.context_window.used_tokens @@ -1912,6 +1932,8 @@ impl Agent { .unwrap_or_else(|_| ".".to_string()); let continuation = SessionContinuation::new( + self.is_agent_mode, + self.agent_name.clone(), session_id, final_output_summary, session_log_path.to_string_lossy().to_string(), @@ -1927,6 +1949,14 @@ impl Agent { } } + /// Set agent mode information for session tracking + /// Called when running with --agent flag to enable agent-specific session resume + pub fn set_agent_mode(&mut self, agent_name: &str) { + self.is_agent_mode = true; + self.agent_name = Some(agent_name.to_string()); + debug!("Agent mode enabled for agent: {}", agent_name); + } + /// Clear session state and continuation artifacts (for /clear command) pub fn clear_session(&mut self) { use crate::session_continuation::clear_continuation; @@ -2158,6 +2188,9 @@ impl Agent { _ => summary_max_tokens.min(5000), }; + // Ensure minimum floor as defense-in-depth (primary protection is in calculate_summary_max_tokens) + summary_max_tokens = summary_max_tokens.max(Self::SUMMARY_MIN_TOKENS); + debug!( "Requesting summary with max_tokens: {} (current usage: {} tokens)", summary_max_tokens, self.context_window.used_tokens diff --git a/crates/g3-core/src/session_continuation.rs b/crates/g3-core/src/session_continuation.rs index 4cd41d5..7893dd2 100644 --- a/crates/g3-core/src/session_continuation.rs +++ b/crates/g3-core/src/session_continuation.rs @@ -24,6 +24,10 @@ const CONTINUATION_FILENAME: &str = "latest.json"; pub struct SessionContinuation { /// Version of the continuation format pub version: String, + /// Whether this session was running in agent mode + pub is_agent_mode: bool, + /// Name of the agent (e.g., "fowler", "pike") if in agent mode + pub agent_name: Option, /// Timestamp when the continuation was saved pub created_at: String, /// Original session ID @@ -43,6 +47,8 @@ pub struct SessionContinuation { impl SessionContinuation { /// Create a new session continuation artifact pub fn new( + is_agent_mode: bool, + agent_name: Option, session_id: String, final_output_summary: Option, session_log_path: String, @@ -52,6 +58,8 @@ impl SessionContinuation { ) -> Self { Self { version: CONTINUATION_VERSION.to_string(), + is_agent_mode, + agent_name, created_at: chrono::Utc::now().to_rfc3339(), session_id, final_output_summary, @@ -66,6 +74,14 @@ impl SessionContinuation { pub fn can_restore_full_context(&self) -> bool { self.context_percentage < 80.0 } + + /// Check if this session has incomplete TODO items + pub fn has_incomplete_todos(&self) -> bool { + match &self.todo_snapshot { + Some(todo) => todo.contains("- [ ]"), + None => false, + } + } } /// Get the path to the .g3 directory @@ -272,6 +288,76 @@ pub fn load_context_from_session_log(session_log_path: &Path) -> Result Result> { + let sessions_dir = get_sessions_dir(); + + if !sessions_dir.exists() { + debug!("Sessions directory does not exist: {:?}", sessions_dir); + return Ok(None); + } + + let current_dir = std::env::current_dir() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_default(); + + let mut candidates: Vec = Vec::new(); + + // Scan all session directories + for entry in std::fs::read_dir(&sessions_dir)? { + let entry = entry?; + let path = entry.path(); + + if !path.is_dir() { + continue; + } + + // Check for latest.json in this session directory + let latest_path = path.join(CONTINUATION_FILENAME); + if !latest_path.exists() { + continue; + } + + // Try to load the continuation + let json = match std::fs::read_to_string(&latest_path) { + Ok(j) => j, + Err(_) => continue, + }; + + let continuation: SessionContinuation = match serde_json::from_str(&json) { + Ok(c) => c, + Err(_) => continue, // Skip sessions with old format + }; + + // Check if this is an agent mode session with matching name + if !continuation.is_agent_mode { + continue; + } + + if continuation.agent_name.as_deref() != Some(agent_name) { + continue; + } + + // Check if in same working directory + if continuation.working_directory != current_dir { + continue; + } + + // Check if has incomplete TODOs + if continuation.has_incomplete_todos() { + candidates.push(continuation); + } + } + + // Sort by created_at descending and return the most recent + candidates.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + Ok(candidates.into_iter().next()) +} + #[cfg(test)] mod tests { use super::*; @@ -279,6 +365,8 @@ mod tests { #[test] fn test_session_continuation_creation() { let continuation = SessionContinuation::new( + false, + None, "test_session_123".to_string(), Some("Task completed successfully".to_string()), "/path/to/session.json".to_string(), @@ -295,6 +383,8 @@ mod tests { #[test] fn test_can_restore_full_context() { let mut continuation = SessionContinuation::new( + false, + None, "test".to_string(), None, "path".to_string(), @@ -311,4 +401,26 @@ mod tests { continuation.context_percentage = 95.0; assert!(!continuation.can_restore_full_context()); // 95% >= 80% } + + #[test] + fn test_has_incomplete_todos() { + let mut continuation = SessionContinuation::new( + true, + Some("fowler".to_string()), + "test".to_string(), + None, + "path".to_string(), + 50.0, + Some("- [x] Done\n- [ ] Not done".to_string()), + ".".to_string(), + ); + + assert!(continuation.has_incomplete_todos()); + + continuation.todo_snapshot = Some("- [x] All done".to_string()); + assert!(!continuation.has_incomplete_todos()); + + continuation.todo_snapshot = None; + assert!(!continuation.has_incomplete_todos()); + } } diff --git a/crates/g3-core/src/tools/executor.rs b/crates/g3-core/src/tools/executor.rs new file mode 100644 index 0000000..44b02c7 --- /dev/null +++ b/crates/g3-core/src/tools/executor.rs @@ -0,0 +1,54 @@ +//! Tool executor trait and context for tool execution. + +use anyhow::Result; +use std::sync::Arc; +use tokio::sync::RwLock; + +use crate::background_process::BackgroundProcessManager; +use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path}; +use crate::ui_writer::UiWriter; +use crate::webdriver_session::WebDriverSession; +use crate::ToolCall; +use g3_config::Config; + +/// Context passed to tool executors containing shared state. +pub struct ToolContext<'a, W: UiWriter> { + pub config: &'a Config, + pub ui_writer: &'a W, + pub session_id: Option<&'a str>, + pub working_dir: Option<&'a str>, + pub computer_controller: Option<&'a Box>, + pub webdriver_session: &'a Arc>>>>, + pub webdriver_process: &'a Arc>>, + pub macax_controller: &'a Arc>>, + pub background_process_manager: &'a Arc, + pub todo_content: &'a Arc>, + pub pending_images: &'a mut Vec, + pub is_autonomous: bool, + pub requirements_sha: Option<&'a str>, +} + +impl<'a, W: UiWriter> ToolContext<'a, W> { + /// Get the path to the TODO file (session-scoped or workspace). + pub fn get_todo_path(&self) -> std::path::PathBuf { + if let Some(session_id) = self.session_id { + let _ = ensure_session_dir(session_id); + get_session_todo_path(session_id) + } else { + get_todo_path() + } + } +} + +/// Trait for tool executors. +/// Each tool category implements this trait. +pub trait ToolExecutor { + /// Execute a tool call and return the result. + /// Returns None if this executor doesn't handle the given tool. + fn execute<'a>( + tool_call: &'a ToolCall, + ctx: &'a mut ToolContext<'_, W>, + ) -> impl std::future::Future>> + Send + 'a + where + W: 'a; +} diff --git a/crates/g3-core/src/tools/file_ops.rs b/crates/g3-core/src/tools/file_ops.rs new file mode 100644 index 0000000..328d01f --- /dev/null +++ b/crates/g3-core/src/tools/file_ops.rs @@ -0,0 +1,510 @@ +//! File operation tools: read_file, write_file, str_replace, read_image. + +use anyhow::Result; +use tracing::debug; + +use crate::ui_writer::UiWriter; +use crate::utils::apply_unified_diff_to_string; +use crate::ToolCall; + +use super::executor::ToolContext; + +/// Execute the `read_file` tool. +pub async fn execute_read_file( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing read_file tool call"); + + let file_path = match tool_call.args.get("file_path").and_then(|v| v.as_str()) { + Some(p) => p, + None => return Ok("āŒ Missing file_path argument".to_string()), + }; + + // Expand tilde (~) to home directory + let expanded_path = shellexpand::tilde(file_path); + let path_str = expanded_path.as_ref(); + + // Check if this is an image file + let is_image = path_str.to_lowercase().ends_with(".png") + || path_str.to_lowercase().ends_with(".jpg") + || path_str.to_lowercase().ends_with(".jpeg") + || path_str.to_lowercase().ends_with(".gif") + || path_str.to_lowercase().ends_with(".bmp") + || path_str.to_lowercase().ends_with(".tiff") + || path_str.to_lowercase().ends_with(".tif") + || path_str.to_lowercase().ends_with(".webp"); + + // If it's an image file, use OCR via extract_text + if is_image { + if let Some(controller) = ctx.computer_controller { + match controller.extract_text_from_image(path_str).await { + Ok(text) => { + return Ok(format!("šŸ“„ Image file (OCR extracted):\n{}", text)); + } + Err(e) => { + return Ok(format!( + "āŒ Failed to extract text from image '{}': {}", + path_str, e + )); + } + } + } else { + return Ok("āŒ Computer control not enabled. Cannot perform OCR on image files. Set computer_control.enabled = true in config.".to_string()); + } + } + + // Extract optional start and end positions + let start_char = tool_call + .args + .get("start") + .and_then(|v| v.as_u64()) + .map(|n| n as usize); + let end_char = tool_call + .args + .get("end") + .and_then(|v| v.as_u64()) + .map(|n| n as usize); + + debug!( + "Reading file: {}, start={:?}, end={:?}", + path_str, start_char, end_char + ); + + match std::fs::read_to_string(path_str) { + Ok(content) => { + // Validate and apply range if specified + let start = start_char.unwrap_or(0); + let end = end_char.unwrap_or(content.len()); + + // Validation + if start > content.len() { + return Ok(format!( + "āŒ Start position {} exceeds file length {}", + start, + content.len() + )); + } + if end > content.len() { + return Ok(format!( + "āŒ End position {} exceeds file length {}", + end, + content.len() + )); + } + if start > end { + return Ok(format!( + "āŒ Start position {} is greater than end position {}", + start, end + )); + } + + // Extract the requested portion, ensuring we're at char boundaries + let start_boundary = if start == 0 { + 0 + } else { + content + .char_indices() + .find(|(i, _)| *i >= start) + .map(|(i, _)| i) + .unwrap_or(start) + }; + let end_boundary = content + .char_indices() + .find(|(i, _)| *i >= end) + .map(|(i, _)| i) + .unwrap_or(content.len()); + + let partial_content = &content[start_boundary..end_boundary]; + let line_count = partial_content.lines().count(); + let total_lines = content.lines().count(); + + // Format output with range info if partial + if start_char.is_some() || end_char.is_some() { + Ok(format!( + "šŸ“„ File content (chars {}-{}, {} lines of {} total):\n{}", + start_boundary, end_boundary, line_count, total_lines, partial_content + )) + } else { + Ok(format!("šŸ“„ File content ({} lines):\n{}", line_count, content)) + } + } + Err(e) => Ok(format!("āŒ Failed to read file '{}': {}", path_str, e)), + } +} + +/// Execute the `read_image` tool. +pub async fn execute_read_image( + tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + debug!("Processing read_image tool call"); + + // Get paths from file_paths array + let mut paths: Vec = Vec::new(); + + if let Some(file_paths) = tool_call.args.get("file_paths") { + if let Some(arr) = file_paths.as_array() { + for p in arr { + if let Some(s) = p.as_str() { + paths.push(s.to_string()); + } + } + } + } + + if paths.is_empty() { + return Ok("āŒ Missing or empty file_paths argument".to_string()); + } + + let mut results: Vec = Vec::new(); + let mut success_count = 0; + + // Print └─ and newline before images to break out of tool output box + println!("└─\n"); + + for path_str in &paths { + // Expand tilde (~) to home directory + let expanded_path = shellexpand::tilde(path_str); + let path = std::path::Path::new(expanded_path.as_ref()); + + // Check file exists + if !path.exists() { + results.push(format!("āŒ Image file not found: {}", path_str)); + continue; + } + + // Read the file first, then detect format from magic bytes + match std::fs::read(path) { + Ok(bytes) => { + // Detect media type from magic bytes (file signature) + let media_type = match g3_providers::ImageContent::media_type_from_bytes(&bytes) { + Some(mt) => mt, + None => { + // Fall back to extension-based detection + let ext = path.extension().and_then(|e| e.to_str()).unwrap_or(""); + match g3_providers::ImageContent::media_type_from_extension(ext) { + Some(mt) => mt, + None => { + results.push(format!( + "āŒ {}: Unsupported or unrecognized image format", + path_str + )); + continue; + } + } + } + }; + + let file_size = bytes.len(); + + // Try to get image dimensions + let dimensions = get_image_dimensions(&bytes, media_type); + + // Build info string + let dim_str = dimensions + .map(|(w, h)| format!("{}x{}", w, h)) + .unwrap_or_else(|| "unknown".to_string()); + + let size_str = if file_size >= 1024 * 1024 { + format!("{:.1} MB", file_size as f64 / (1024.0 * 1024.0)) + } else if file_size >= 1024 { + format!("{:.1} KB", file_size as f64 / 1024.0) + } else { + format!("{} bytes", file_size) + }; + + // Output imgcat inline image to terminal (height constrained) + print_imgcat(&bytes, path_str, &dim_str, media_type, &size_str, 5); + + // Store the image to be attached to the next user message + use base64::Engine; + let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes); + let image = g3_providers::ImageContent::new(media_type, encoded); + ctx.pending_images.push(image); + + success_count += 1; + } + Err(e) => { + results.push(format!("āŒ Failed to read '{}': {}", path_str, e)); + } + } + } + + // Print ā”Œā”€ to resume tool output box + print!("ā”Œā”€\n"); + + let summary = if success_count == paths.len() { + format!("{} image(s) read.", success_count) + } else { + format!("{}/{} image(s) read.", success_count, paths.len()) + }; + + // Only include error results if there are any + if results.is_empty() { + Ok(summary) + } else { + Ok(format!("{}\n{}", results.join("\n"), summary)) + } +} + +/// Execute the `write_file` tool. +pub async fn execute_write_file( + tool_call: &ToolCall, + _ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing write_file tool call"); + debug!("Raw tool_call.args: {:?}", tool_call.args); + + // Try multiple argument formats that different providers might use + let (path_str, content_str) = extract_path_and_content(&tool_call.args); + + debug!( + "Final extracted values: path_str={:?}, content_str_len={:?}", + path_str, + content_str.map(|c| c.len()) + ); + + if let (Some(path), Some(content)) = (path_str, content_str) { + // Expand tilde (~) to home directory + let expanded_path = shellexpand::tilde(path); + let path = expanded_path.as_ref(); + + debug!("Writing to file: {}", path); + + // Create parent directories if they don't exist + if let Some(parent) = std::path::Path::new(path).parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + return Ok(format!( + "āŒ Failed to create parent directories for '{}': {}", + path, e + )); + } + } + + match std::fs::write(path, content) { + Ok(()) => { + let line_count = content.lines().count(); + let char_count = content.len(); + Ok(format!( + "āœ… Successfully wrote {} lines ({} characters)", + line_count, char_count + )) + } + Err(e) => Ok(format!("āŒ Failed to write to file '{}': {}", path, e)), + } + } else { + // Provide more detailed error information + let available_keys = if let Some(obj) = tool_call.args.as_object() { + obj.keys().collect::>() + } else { + vec![] + }; + + Ok(format!( + "āŒ Missing file_path or content argument. Available keys: {:?}. Expected formats: {{\"file_path\": \"...\", \"content\": \"...\"}}, {{\"path\": \"...\", \"content\": \"...\"}}, {{\"filename\": \"...\", \"text\": \"...\"}}, or {{\"file\": \"...\", \"data\": \"...\"}}", + available_keys + )) + } +} + +/// Execute the `str_replace` tool. +pub async fn execute_str_replace( + tool_call: &ToolCall, + _ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing str_replace tool call"); + + let args_obj = match tool_call.args.as_object() { + Some(obj) => obj, + None => return Ok("āŒ Invalid arguments: expected object".to_string()), + }; + + let file_path = match args_obj.get("file_path").and_then(|v| v.as_str()) { + Some(path) => { + let expanded_path = shellexpand::tilde(path); + expanded_path.into_owned() + } + None => return Ok("āŒ Missing or invalid file_path argument".to_string()), + }; + + let diff = match args_obj.get("diff").and_then(|v| v.as_str()) { + Some(d) => d, + None => return Ok("āŒ Missing or invalid diff argument".to_string()), + }; + + // Optional start and end character positions (0-indexed, end is EXCLUSIVE) + let start_char = args_obj + .get("start") + .and_then(|v| v.as_u64()) + .map(|n| n as usize); + let end_char = args_obj + .get("end") + .and_then(|v| v.as_u64()) + .map(|n| n as usize); + + debug!( + "str_replace: path={}, start={:?}, end={:?}", + file_path, start_char, end_char + ); + + // Read the existing file + let file_content = match std::fs::read_to_string(&file_path) { + Ok(content) => content, + Err(e) => return Ok(format!("āŒ Failed to read file '{}': {}", file_path, e)), + }; + + // Apply unified diff to content + let result = match apply_unified_diff_to_string(&file_content, diff, start_char, end_char) { + Ok(r) => r, + Err(e) => return Ok(format!("āŒ {}", e)), + }; + + // Write the result back to the file + match std::fs::write(&file_path, &result) { + Ok(()) => Ok("āœ… applied unified diff".to_string()), + Err(e) => Ok(format!("āŒ Failed to write to file '{}': {}", file_path, e)), + } +} + +// Helper functions + +/// Extract path and content from various argument formats. +fn extract_path_and_content(args: &serde_json::Value) -> (Option<&str>, Option<&str>) { + if let Some(args_obj) = args.as_object() { + // Format 1: Standard format with file_path and content + if let (Some(path_val), Some(content_val)) = + (args_obj.get("file_path"), args_obj.get("content")) + { + if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) { + return (Some(path), Some(content)); + } + } + // Format 2: Anthropic-style with path and content + if let (Some(path_val), Some(content_val)) = + (args_obj.get("path"), args_obj.get("content")) + { + if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) { + return (Some(path), Some(content)); + } + } + // Format 3: Alternative naming with filename and text + if let (Some(path_val), Some(content_val)) = + (args_obj.get("filename"), args_obj.get("text")) + { + if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) { + return (Some(path), Some(content)); + } + } + // Format 4: Alternative naming with file and data + if let (Some(path_val), Some(content_val)) = (args_obj.get("file"), args_obj.get("data")) { + if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) { + return (Some(path), Some(content)); + } + } + } else if let Some(args_array) = args.as_array() { + // Format 5: Args might be an array [path, content] + if args_array.len() >= 2 { + if let (Some(path), Some(content)) = (args_array[0].as_str(), args_array[1].as_str()) { + return (Some(path), Some(content)); + } + } + } + (None, None) +} + +/// Get image dimensions from raw bytes. +pub fn get_image_dimensions(bytes: &[u8], media_type: &str) -> Option<(u32, u32)> { + match media_type { + "image/png" => { + // PNG: width at bytes 16-19, height at bytes 20-23 (big-endian) + if bytes.len() >= 24 { + let width = u32::from_be_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]); + let height = u32::from_be_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]); + Some((width, height)) + } else { + None + } + } + "image/jpeg" => { + // JPEG: Need to find SOF0/SOF2 marker (FF C0 or FF C2) + let mut i = 2; // Skip FF D8 + while i + 8 < bytes.len() { + if bytes[i] == 0xFF { + let marker = bytes[i + 1]; + // SOF0, SOF1, SOF2 markers contain dimensions + if marker == 0xC0 || marker == 0xC1 || marker == 0xC2 { + let height = u16::from_be_bytes([bytes[i + 5], bytes[i + 6]]) as u32; + let width = u16::from_be_bytes([bytes[i + 7], bytes[i + 8]]) as u32; + return Some((width, height)); + } + // Skip to next marker + if marker == 0xD8 + || marker == 0xD9 + || marker == 0x01 + || (0xD0..=0xD7).contains(&marker) + { + i += 2; + } else { + let len = u16::from_be_bytes([bytes[i + 2], bytes[i + 3]]) as usize; + i += 2 + len; + } + } else { + i += 1; + } + } + None + } + "image/gif" => { + // GIF: width at bytes 6-7, height at bytes 8-9 (little-endian) + if bytes.len() >= 10 { + let width = u16::from_le_bytes([bytes[6], bytes[7]]) as u32; + let height = u16::from_le_bytes([bytes[8], bytes[9]]) as u32; + Some((width, height)) + } else { + None + } + } + "image/webp" => { + // WebP VP8: dimensions at specific offsets (simplified) + if bytes.len() >= 30 && &bytes[12..16] == b"VP8 " { + let width = (u16::from_le_bytes([bytes[26], bytes[27]]) & 0x3FFF) as u32; + let height = (u16::from_le_bytes([bytes[28], bytes[29]]) & 0x3FFF) as u32; + Some((width, height)) + } else { + None + } + } + _ => None, + } +} + +/// Print image using iTerm2 imgcat protocol with info line. +pub fn print_imgcat( + bytes: &[u8], + name: &str, + dimensions: &str, + media_type: &str, + size: &str, + max_height: u32, +) { + use base64::Engine; + let encoded = base64::engine::general_purpose::STANDARD.encode(bytes); + // Extract just the filename from the path + let filename = std::path::Path::new(name) + .file_name() + .and_then(|f| f.to_str()) + .unwrap_or(name); + // iTerm2 inline image protocol (single space prefix) + print!( + " \x1b]1337;File=inline=1;height={};name={}:{}\x07\n", + max_height, name, encoded + ); + // Print dimmed info line with filename only (no │ prefix) + println!( + " \x1b[2m{} | {} | {} | {}\x1b[0m", + filename, dimensions, media_type, size + ); + // Blank line before next image (no │ prefix) + println!(); +} diff --git a/crates/g3-core/src/tools/macax.rs b/crates/g3-core/src/tools/macax.rs new file mode 100644 index 0000000..c0356c7 --- /dev/null +++ b/crates/g3-core/src/tools/macax.rs @@ -0,0 +1,178 @@ +//! macOS Accessibility API tools. + +use anyhow::Result; +use tracing::debug; + +use crate::ui_writer::UiWriter; +use crate::ToolCall; + +use super::executor::ToolContext; + +/// Execute the `macax_list_apps` tool. +pub async fn execute_macax_list_apps( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing macax_list_apps tool call"); + let _ = tool_call; // unused + + if !ctx.config.macax.enabled { + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(), + ); + } + + let controller_guard = ctx.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.list_applications() { + Ok(apps) => { + let app_list: Vec = apps.iter().map(|a| a.name.clone()).collect(); + Ok(format!("Running applications:\n{}", app_list.join("\n"))) + } + Err(e) => Ok(format!("āŒ Failed to list applications: {}", e)), + } +} + +/// Execute the `macax_get_frontmost_app` tool. +pub async fn execute_macax_get_frontmost_app( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing macax_get_frontmost_app tool call"); + let _ = tool_call; // unused + + if !ctx.config.macax.enabled { + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(), + ); + } + + let controller_guard = ctx.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.get_frontmost_app() { + Ok(app) => Ok(format!("Frontmost application: {}", app.name)), + Err(e) => Ok(format!("āŒ Failed to get frontmost app: {}", e)), + } +} + +/// Execute the `macax_activate_app` tool. +pub async fn execute_macax_activate_app( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing macax_activate_app tool call"); + + if !ctx.config.macax.enabled { + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(), + ); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("āŒ Missing app_name argument".to_string()), + }; + + let controller_guard = ctx.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.activate_app(app_name) { + Ok(_) => Ok(format!("āœ… Activated application: {}", app_name)), + Err(e) => Ok(format!("āŒ Failed to activate app: {}", e)), + } +} + +/// Execute the `macax_press_key` tool. +pub async fn execute_macax_press_key( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing macax_press_key tool call"); + + if !ctx.config.macax.enabled { + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(), + ); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("āŒ Missing app_name argument".to_string()), + }; + + let key = match tool_call.args.get("key").and_then(|v| v.as_str()) { + Some(k) => k, + None => return Ok("āŒ Missing key argument".to_string()), + }; + + let modifiers_vec: Vec<&str> = tool_call + .args + .get("modifiers") + .and_then(|v| v.as_array()) + .map(|arr| arr.iter().filter_map(|v| v.as_str()).collect()) + .unwrap_or_default(); + + let controller_guard = ctx.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.press_key(app_name, key, modifiers_vec.clone()) { + Ok(_) => { + let modifier_str = if modifiers_vec.is_empty() { + String::new() + } else { + format!(" with modifiers: {}", modifiers_vec.join("+")) + }; + Ok(format!("āœ… Pressed key: {}{}", key, modifier_str)) + } + Err(e) => Ok(format!("āŒ Failed to press key: {}", e)), + } +} + +/// Execute the `macax_type_text` tool. +pub async fn execute_macax_type_text( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing macax_type_text tool call"); + + if !ctx.config.macax.enabled { + return Ok( + "āŒ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(), + ); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("āŒ Missing app_name argument".to_string()), + }; + + let text = match tool_call.args.get("text").and_then(|v| v.as_str()) { + Some(t) => t, + None => return Ok("āŒ Missing text argument".to_string()), + }; + + let controller_guard = ctx.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("āŒ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.type_text(app_name, text) { + Ok(_) => Ok(format!("āœ… Typed text into {}", app_name)), + Err(e) => Ok(format!("āŒ Failed to type text: {}", e)), + } +} diff --git a/crates/g3-core/src/tools/misc.rs b/crates/g3-core/src/tools/misc.rs new file mode 100644 index 0000000..934a8f8 --- /dev/null +++ b/crates/g3-core/src/tools/misc.rs @@ -0,0 +1,225 @@ +//! Miscellaneous tools: final_output, take_screenshot, extract_text, code_coverage, code_search. + +use anyhow::Result; +use tracing::debug; + +use crate::ui_writer::UiWriter; +use crate::ToolCall; + +use super::executor::ToolContext; + +/// Execute the `final_output` tool. +pub async fn execute_final_output( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing final_output tool call"); + + let summary_str = tool_call.args.get("summary").and_then(|v| v.as_str()); + + // In autonomous mode, check for incomplete TODO items before allowing completion + if ctx.is_autonomous { + let todo_content = ctx.todo_content.read().await; + let has_incomplete_todos = todo_content + .lines() + .any(|line| line.trim().starts_with("- [ ]")); + drop(todo_content); + + if has_incomplete_todos { + return Ok( + "There are still incomplete TODO items. Please continue until \ + *ALL* TODO items in *ALL* phases are marked complete, and \ + *ONLY* then call `final_output`." + .to_string(), + ); + } + } + + // Return the summary or a default message + // Note: Session continuation saving is handled by the caller (Agent) + if let Some(summary) = summary_str { + Ok(summary.to_string()) + } else { + Ok("āœ… Turn completed".to_string()) + } +} + +/// Execute the `take_screenshot` tool. +pub async fn execute_take_screenshot( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing take_screenshot tool call"); + + let controller = match ctx.computer_controller { + Some(c) => c, + None => { + return Ok( + "āŒ Computer control not enabled. Set computer_control.enabled = true in config." + .to_string(), + ) + } + }; + + let path = tool_call + .args + .get("path") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing path argument"))?; + + // Extract window_id (app name) - REQUIRED + let window_id = tool_call + .args + .get("window_id") + .and_then(|v| v.as_str()) + .ok_or_else(|| { + anyhow::anyhow!( + "Missing window_id argument. You must specify which window to capture \ + (e.g., 'Safari', 'Terminal', 'Google Chrome')." + ) + })?; + + // Extract region if provided + let region = tool_call + .args + .get("region") + .and_then(|v| v.as_object()) + .map(|region_obj| g3_computer_control::types::Rect { + x: region_obj.get("x").and_then(|v| v.as_i64()).unwrap_or(0) as i32, + y: region_obj.get("y").and_then(|v| v.as_i64()).unwrap_or(0) as i32, + width: region_obj + .get("width") + .and_then(|v| v.as_i64()) + .unwrap_or(0) as i32, + height: region_obj + .get("height") + .and_then(|v| v.as_i64()) + .unwrap_or(0) as i32, + }); + + match controller.take_screenshot(path, region, Some(window_id)).await { + Ok(_) => { + // Get the actual path where the screenshot was saved + let actual_path = if path.starts_with('/') { + path.to_string() + } else { + let temp_dir = std::env::var("TMPDIR") + .or_else(|_| std::env::var("HOME").map(|h| format!("{}/tmp", h))) + .unwrap_or_else(|_| "/tmp".to_string()); + format!("{}/{}", temp_dir.trim_end_matches('/'), path) + }; + + Ok(format!( + "āœ… Screenshot of {} saved to: {}", + window_id, actual_path + )) + } + Err(e) => Ok(format!("āŒ Failed to take screenshot: {}", e)), + } +} + +/// Execute the `extract_text` tool. +pub async fn execute_extract_text( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing extract_text tool call"); + + let controller = match ctx.computer_controller { + Some(c) => c, + None => { + return Ok( + "āŒ Computer control not enabled. Set computer_control.enabled = true in config." + .to_string(), + ) + } + }; + + let path = tool_call + .args + .get("path") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing path argument"))?; + + match controller.extract_text_from_image(path).await { + Ok(text) => Ok(format!("āœ… Extracted text:\n{}", text)), + Err(e) => Ok(format!("āŒ Failed to extract text: {}", e)), + } +} + +/// Execute the `code_coverage` tool. +pub async fn execute_code_coverage( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing code_coverage tool call"); + let _ = tool_call; // unused + + ctx.ui_writer + .print_context_status("šŸ” Generating code coverage report..."); + + // Ensure coverage tools are installed + match g3_execution::ensure_coverage_tools_installed() { + Ok(already_installed) => { + if !already_installed { + ctx.ui_writer + .print_context_status("āœ… Coverage tools installed successfully"); + } + } + Err(e) => { + return Ok(format!("āŒ Failed to install coverage tools: {}", e)); + } + } + + // Run cargo llvm-cov --workspace + let output = std::process::Command::new("cargo") + .args(["llvm-cov", "--workspace"]) + .current_dir(std::env::current_dir()?) + .output()?; + + if output.status.success() { + let stdout = String::from_utf8_lossy(&output.stdout); + let stderr = String::from_utf8_lossy(&output.stderr); + + let mut result = String::from("āœ… Code coverage report generated successfully\n\n"); + result.push_str("## Coverage Summary\n"); + result.push_str(&stdout); + if !stderr.is_empty() { + result.push_str("\n## Warnings\n"); + result.push_str(&stderr); + } + Ok(result) + } else { + let stderr = String::from_utf8_lossy(&output.stderr); + Ok(format!("āŒ Failed to generate coverage report:\n{}", stderr)) + } +} + +/// Execute the `code_search` tool. +pub async fn execute_code_search( + tool_call: &ToolCall, + _ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing code_search tool call"); + + // Parse the request + let request: crate::code_search::CodeSearchRequest = + match serde_json::from_value(tool_call.args.clone()) { + Ok(req) => req, + Err(e) => { + return Ok(format!("āŒ Invalid code_search arguments: {}", e)); + } + }; + + // Execute the code search + match crate::code_search::execute_code_search(request).await { + Ok(response) => { + // Serialize the response to JSON + match serde_json::to_string_pretty(&response) { + Ok(json_output) => Ok(format!("āœ… Code search completed\n{}", json_output)), + Err(e) => Ok(format!("āŒ Failed to serialize response: {}", e)), + } + } + Err(e) => Ok(format!("āŒ Code search failed: {}", e)), + } +} diff --git a/crates/g3-core/src/tools/mod.rs b/crates/g3-core/src/tools/mod.rs new file mode 100644 index 0000000..4aebf33 --- /dev/null +++ b/crates/g3-core/src/tools/mod.rs @@ -0,0 +1,22 @@ +//! Tool execution module for G3 agent. +//! +//! This module contains all tool implementations that the agent can execute. +//! Tools are organized by category: +//! - `shell` - Shell command execution and background processes +//! - `file_ops` - File reading, writing, and editing +//! - `todo` - TODO list management +//! - `webdriver` - Browser automation via WebDriver +//! - `macax` - macOS Accessibility API tools +//! - `vision` - Vision-based text finding and clicking +//! - `misc` - Other tools (screenshots, code search, etc.) + +pub mod executor; +pub mod file_ops; +pub mod macax; +pub mod misc; +pub mod shell; +pub mod todo; +pub mod vision; +pub mod webdriver; + +pub use executor::ToolExecutor; diff --git a/crates/g3-core/src/tools/shell.rs b/crates/g3-core/src/tools/shell.rs new file mode 100644 index 0000000..5ec93c6 --- /dev/null +++ b/crates/g3-core/src/tools/shell.rs @@ -0,0 +1,115 @@ +//! Shell command execution tools. + +use anyhow::Result; +use tracing::debug; + +use crate::ui_writer::UiWriter; +use crate::utils::shell_escape_command; +use crate::ToolCall; + +use super::executor::ToolContext; + +/// Execute the `shell` tool. +pub async fn execute_shell(tool_call: &ToolCall, ctx: &ToolContext<'_, W>) -> Result { + debug!("Processing shell tool call"); + + let command = match tool_call.args.get("command").and_then(|v| v.as_str()) { + Some(cmd) => cmd, + None => { + debug!("No command parameter found in args: {:?}", tool_call.args); + return Ok("āŒ Missing command argument".to_string()); + } + }; + + debug!("Command string: {}", command); + let escaped_command = shell_escape_command(command); + + let executor = g3_execution::CodeExecutor::new(); + + struct ToolOutputReceiver<'a, W: UiWriter> { + ui_writer: &'a W, + } + + impl<'a, W: UiWriter> g3_execution::OutputReceiver for ToolOutputReceiver<'a, W> { + fn on_output_line(&self, line: &str) { + self.ui_writer.update_tool_output_line(line); + } + } + + let receiver = ToolOutputReceiver { + ui_writer: ctx.ui_writer, + }; + + debug!( + "ABOUT TO CALL execute_bash_streaming_in_dir: escaped_command='{}', working_dir={:?}", + escaped_command, ctx.working_dir + ); + + match executor + .execute_bash_streaming_in_dir(&escaped_command, &receiver, ctx.working_dir) + .await + { + Ok(result) => { + if result.success { + Ok(if result.stdout.is_empty() { + "āœ… Command executed successfully".to_string() + } else { + result.stdout.trim().to_string() + }) + } else { + Ok(format!("āŒ Command failed: {}", result.stderr.trim())) + } + } + Err(e) => Ok(format!("āŒ Execution error: {}", e)), + } +} + +/// Execute the `background_process` tool. +pub async fn execute_background_process( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing background_process tool call"); + + let name = match tool_call.args.get("name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("āŒ Missing 'name' argument".to_string()), + }; + + let command = match tool_call.args.get("command").and_then(|v| v.as_str()) { + Some(c) => c, + None => return Ok("āŒ Missing 'command' argument".to_string()), + }; + + // Use provided working_dir, or fall back to context working_dir, or current dir + let work_dir = tool_call + .args + .get("working_dir") + .and_then(|v| v.as_str()) + .map(|s| std::path::PathBuf::from(shellexpand::tilde(s).as_ref())) + .or_else(|| ctx.working_dir.map(std::path::PathBuf::from)) + .unwrap_or_else(|| std::env::current_dir().unwrap_or_default()); + + match ctx.background_process_manager.start(name, command, &work_dir) { + Ok(info) => Ok(format!( + "āœ… Background process '{}' started\n\n\ + **PID:** {}\n\ + **Log file:** {}\n\ + **Working dir:** {}\n\n\ + To interact with this process, use the shell tool:\n\ + - View logs: `tail -100 {}`\n\ + - Follow logs: `tail -f {}` (blocks until Ctrl+C)\n\ + - Check status: `ps -p {}`\n\ + - Stop process: `kill {}`", + info.name, + info.pid, + info.log_file.display(), + info.working_dir.display(), + info.log_file.display(), + info.log_file.display(), + info.pid, + info.pid + )), + Err(e) => Ok(format!("āŒ Failed to start background process: {}", e)), + } +} diff --git a/crates/g3-core/src/tools/todo.rs b/crates/g3-core/src/tools/todo.rs new file mode 100644 index 0000000..67164c2 --- /dev/null +++ b/crates/g3-core/src/tools/todo.rs @@ -0,0 +1,195 @@ +//! TODO list management tools. + +use anyhow::Result; +use std::io::Write; +use tracing::debug; + +use crate::ui_writer::UiWriter; +use crate::ToolCall; + +use super::executor::ToolContext; + +/// Execute the `todo_read` tool. +pub async fn execute_todo_read( + tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + debug!("Processing todo_read tool call"); + let _ = tool_call; // unused but kept for consistency + + let todo_path = ctx.get_todo_path(); + + if !todo_path.exists() { + // Also update in-memory content to stay in sync + let mut todo = ctx.todo_content.write().await; + *todo = String::new(); + return Ok("šŸ“ TODO list is empty (no todo.g3.md file found)".to_string()); + } + + match std::fs::read_to_string(&todo_path) { + Ok(content) => { + // Update in-memory content to stay in sync + let mut todo = ctx.todo_content.write().await; + *todo = content.clone(); + + // Check for staleness if enabled and we have a requirements SHA + if ctx.config.agent.check_todo_staleness { + if let Some(req_sha) = ctx.requirements_sha { + if let Some(staleness_result) = check_todo_staleness(&content, req_sha, ctx.ui_writer) { + return Ok(staleness_result); + } + } + } + + if content.trim().is_empty() { + Ok("šŸ“ TODO list is empty".to_string()) + } else { + for line in content.lines() { + ctx.ui_writer.print_tool_output_line(line); + } + Ok(format!("šŸ“ TODO list:\n{}", content)) + } + } + Err(e) => Ok(format!("āŒ Failed to read TODO.md: {}", e)), + } +} + +/// Execute the `todo_write` tool. +pub async fn execute_todo_write( + tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + debug!("Processing todo_write tool call"); + + let content_str = match tool_call.args.get("content").and_then(|v| v.as_str()) { + Some(c) => c, + None => return Ok("āŒ Missing content argument".to_string()), + }; + + let char_count = content_str.chars().count(); + let max_chars = std::env::var("G3_TODO_MAX_CHARS") + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or(50_000); + + if max_chars > 0 && char_count > max_chars { + return Ok(format!( + "āŒ TODO list too large: {} chars (max: {})", + char_count, max_chars + )); + } + + // Check if all todos are completed (all checkboxes are checked) + let has_incomplete = content_str + .lines() + .any(|line| line.trim().starts_with("- [ ]")); + + // If all todos are complete, delete the file instead of writing + // EXCEPT in planner mode (G3_TODO_PATH is set) - preserve for rename to completed_todo_*.md + let in_planner_mode = std::env::var("G3_TODO_PATH").is_ok(); + let todo_path = ctx.get_todo_path(); + + if !in_planner_mode + && !has_incomplete + && (content_str.contains("- [x]") || content_str.contains("- [X]")) + { + if todo_path.exists() { + match std::fs::remove_file(&todo_path) { + Ok(_) => { + let mut todo = ctx.todo_content.write().await; + *todo = String::new(); + // Show the final completed TODOs before deletion + let mut result = + String::from("āœ… All TODOs completed! Removed todo.g3.md\n\nFinal status:\n"); + for line in content_str.lines() { + ctx.ui_writer.print_tool_output_line(line); + result.push_str(line); + result.push('\n'); + } + return Ok(result); + } + Err(e) => return Ok(format!("āŒ Failed to remove todo.g3.md: {}", e)), + } + } + } + + match std::fs::write(&todo_path, content_str) { + Ok(_) => { + // Also update in-memory content to stay in sync + let mut todo = ctx.todo_content.write().await; + *todo = content_str.to_string(); + // Print the TODO content to the console (inside the tool frame) + for line in content_str.lines() { + ctx.ui_writer.print_tool_output_line(line); + } + Ok(format!( + "āœ… TODO list updated ({} chars) and saved to todo.g3.md:\n{}", + char_count, content_str + )) + } + Err(e) => Ok(format!("āŒ Failed to write todo.g3.md: {}", e)), + } +} + +/// Check if the TODO list is stale (generated from a different requirements file). +/// Returns Some(message) if staleness was detected and handled, None otherwise. +fn check_todo_staleness( + content: &str, + req_sha: &str, + ui_writer: &W, +) -> Option { + // Parse the first line for the SHA header + let first_line = content.lines().next()?; + + if !first_line.starts_with("{{Based on the requirements file with SHA256:") { + return None; + } + + let parts: Vec<&str> = first_line.split("SHA256:").collect(); + if parts.len() <= 1 { + return None; + } + + let todo_sha = parts[1].trim().trim_end_matches("}}").trim(); + if todo_sha == req_sha { + return None; + } + + let warning = format!( + "āš ļø TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}", + req_sha, todo_sha + ); + ui_writer.print_context_status(&warning); + + // Beep 6 times + print!("\x07\x07\x07\x07\x07\x07"); + let _ = std::io::stdout().flush(); + + let options = [ + "Ignore and Continue", + "Mark as Stale", + "Quit Application", + ]; + let choice = ui_writer.prompt_user_choice( + "Requirements have changed! What would you like to do?", + &options, + ); + + match choice { + 0 => { + // Ignore and Continue + ui_writer.print_context_status("āš ļø Ignoring staleness warning."); + None + } + 1 => { + // Mark as Stale + Some("āš ļø TODO list is stale (requirements changed). Please regenerate the TODO list to match the new requirements.".to_string()) + } + 2 => { + // Quit Application + ui_writer.print_context_status("āŒ Quitting application as requested."); + std::process::exit(0); + } + _ => None, + } +} diff --git a/crates/g3-core/src/tools/vision.rs b/crates/g3-core/src/tools/vision.rs new file mode 100644 index 0000000..15a3d23 --- /dev/null +++ b/crates/g3-core/src/tools/vision.rs @@ -0,0 +1,275 @@ +//! Vision-based tools: vision_find_text, vision_click_text, vision_click_near_text, extract_text_with_boxes. + +use anyhow::Result; +use tracing::debug; + +use crate::ui_writer::UiWriter; +use crate::ToolCall; + +use super::executor::ToolContext; + +/// Execute the `vision_find_text` tool. +pub async fn execute_vision_find_text( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing vision_find_text tool call"); + + let controller = match ctx.computer_controller { + Some(c) => c, + None => { + return Ok( + "āŒ Computer control not enabled. Set computer_control.enabled = true in config." + .to_string(), + ) + } + }; + + let app_name = tool_call + .args + .get("app_name") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?; + + let text = tool_call + .args + .get("text") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?; + + match controller.find_text_in_app(app_name, text).await { + Ok(Some(location)) => Ok(format!( + "āœ… Found '{}' in {} at position ({}, {}) with size {}x{} (confidence: {:.0}%)", + location.text, + app_name, + location.x, + location.y, + location.width, + location.height, + location.confidence * 100.0 + )), + Ok(None) => Ok(format!("āŒ Could not find '{}' in {}", text, app_name)), + Err(e) => Ok(format!("āŒ Error finding text: {}", e)), + } +} + +/// Execute the `vision_click_text` tool. +pub async fn execute_vision_click_text( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing vision_click_text tool call"); + + let controller = match ctx.computer_controller { + Some(c) => c, + None => { + return Ok( + "āŒ Computer control not enabled. Set computer_control.enabled = true in config." + .to_string(), + ) + } + }; + + let app_name = tool_call + .args + .get("app_name") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?; + + let text = tool_call + .args + .get("text") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?; + + match controller.find_text_in_app(app_name, text).await { + Ok(Some(location)) => { + // Click on center of text + // IMPORTANT: location coordinates are in NSScreen space (Y=0 at BOTTOM, increases UPWARD) + // location.x is the LEFT edge of the bounding box + // location.y is the TOP edge of the bounding box (highest Y value in NSScreen space) + // location.width and location.height are already scaled to screen space + // To get center: we need to add half the SCALED width and subtract half the SCALED height + + if location.width == 0 || location.height == 0 { + return Ok(format!( + "āŒ Invalid bounding box dimensions: width={}, height={}", + location.width, location.height + )); + } + + debug!( + "[vision_click_text] Location from find_text_in_app: x={}, y={}, width={}, height={}, text='{}'", + location.x, location.y, location.width, location.height, location.text + ); + + // Calculate center using the SCALED dimensions + // X: Use right edge instead of center (Vision OCR bounding box seems offset) + // This gives us: left edge + full width = right edge + // Y: top edge - half of scaled height (subtract because Y increases upward) + let click_x = location.x + location.width; // Right edge + let half_height = location.height / 2; + let click_y = location.y - half_height; + + debug!( + "[vision_click_text] Click position calculation: x={} + {} = {} (right edge), y={} - {} = {}", + location.x, location.width, click_x, location.y, half_height, click_y + ); + + match controller.click_at(click_x, click_y, Some(app_name)) { + Ok(_) => Ok(format!( + "āœ… Clicked on '{}' in {} at ({}, {})", + text, app_name, click_x, click_y + )), + Err(e) => Ok(format!("āŒ Failed to click: {}", e)), + } + } + Ok(None) => Ok(format!("āŒ Could not find '{}' in {}", text, app_name)), + Err(e) => Ok(format!("āŒ Error finding text: {}", e)), + } +} + +/// Execute the `vision_click_near_text` tool. +pub async fn execute_vision_click_near_text( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing vision_click_near_text tool call"); + + let controller = match ctx.computer_controller { + Some(c) => c, + None => { + return Ok( + "āŒ Computer control not enabled. Set computer_control.enabled = true in config." + .to_string(), + ) + } + }; + + let app_name = tool_call + .args + .get("app_name") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?; + + let text = tool_call + .args + .get("text") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?; + + let direction = tool_call + .args + .get("direction") + .and_then(|v| v.as_str()) + .unwrap_or("right"); + + let distance = tool_call + .args + .get("distance") + .and_then(|v| v.as_i64()) + .unwrap_or(50) as i32; + + match controller.find_text_in_app(app_name, text).await { + Ok(Some(location)) => { + // Calculate click position based on direction + // location.x is LEFT edge, location.y is TOP edge (in NSScreen space) + let (click_x, click_y) = match direction { + "right" => ( + location.x + location.width + distance, + location.y - (location.height / 2), + ), + "below" => ( + location.x + (location.width / 2), + location.y - location.height - distance, + ), + "left" => (location.x - distance, location.y - (location.height / 2)), + "above" => (location.x + (location.width / 2), location.y + distance), + _ => ( + location.x + location.width + distance, + location.y - (location.height / 2), + ), + }; + debug!( + "[vision_click_near_text] Clicking {} of text at ({}, {})", + direction, click_x, click_y + ); + + match controller.click_at(click_x, click_y, Some(app_name)) { + Ok(_) => Ok(format!( + "āœ… Clicked {} of '{}' in {} at ({}, {})", + direction, text, app_name, click_x, click_y + )), + Err(e) => Ok(format!("āŒ Failed to click: {}", e)), + } + } + Ok(None) => Ok(format!("āŒ Could not find '{}' in {}", text, app_name)), + Err(e) => Ok(format!("āŒ Error finding text: {}", e)), + } +} + +/// Execute the `extract_text_with_boxes` tool. +pub async fn execute_extract_text_with_boxes( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing extract_text_with_boxes tool call"); + + if !ctx.config.macax.enabled { + return Ok( + "āŒ extract_text_with_boxes requires --macax flag to be enabled".to_string(), + ); + } + + let controller = match ctx.computer_controller { + Some(c) => c, + None => { + return Ok( + "āŒ Computer control not enabled. Set computer_control.enabled = true in config." + .to_string(), + ) + } + }; + + let path = tool_call + .args + .get("path") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("Missing path parameter"))?; + + // Optional: take screenshot of app first + let final_path = if let Some(app_name) = tool_call.args.get("app_name").and_then(|v| v.as_str()) + { + let temp_path = format!("/tmp/g3_extract_boxes_{}.png", uuid::Uuid::new_v4()); + match controller + .take_screenshot(&temp_path, None, Some(app_name)) + .await + { + Ok(_) => temp_path, + Err(e) => return Ok(format!("āŒ Failed to take screenshot: {}", e)), + } + } else { + path.to_string() + }; + + // Extract text with locations + match controller.extract_text_with_locations(&final_path).await { + Ok(locations) => { + // Clean up temp file if we created one + if final_path != path { + let _ = std::fs::remove_file(&final_path); + } + + // Return as JSON + match serde_json::to_string_pretty(&locations) { + Ok(json) => Ok(format!( + "āœ… Extracted {} text elements:\n{}", + locations.len(), + json + )), + Err(e) => Ok(format!("āŒ Failed to serialize results: {}", e)), + } + } + Err(e) => Ok(format!("āŒ Failed to extract text: {}", e)), + } +} diff --git a/crates/g3-core/src/tools/webdriver.rs b/crates/g3-core/src/tools/webdriver.rs new file mode 100644 index 0000000..e217ec2 --- /dev/null +++ b/crates/g3-core/src/tools/webdriver.rs @@ -0,0 +1,678 @@ +//! WebDriver browser automation tools. + +use anyhow::Result; +use g3_computer_control::WebDriverController; +use tracing::{debug, warn}; + +use crate::ui_writer::UiWriter; +use crate::webdriver_session::WebDriverSession; +use crate::ToolCall; + +use super::executor::ToolContext; + +/// Execute the `webdriver_start` tool. +pub async fn execute_webdriver_start( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_start tool call"); + let _ = tool_call; // unused + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + // Check if session already exists + let session_guard = ctx.webdriver_session.read().await; + if session_guard.is_some() { + drop(session_guard); + return Ok("āœ… WebDriver session already active".to_string()); + } + drop(session_guard); + + // Determine which browser to use based on config + use g3_config::WebDriverBrowser; + match &ctx.config.webdriver.browser { + WebDriverBrowser::Safari => start_safari_driver(ctx).await, + WebDriverBrowser::ChromeHeadless => start_chrome_driver(ctx).await, + } +} + +async fn start_safari_driver(ctx: &ToolContext<'_, W>) -> Result { + let port = ctx.config.webdriver.safari_port; + + let driver_result = tokio::process::Command::new("safaridriver") + .arg("--port") + .arg(port.to_string()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn(); + + let mut webdriver_process = match driver_result { + Ok(process) => process, + Err(e) => { + return Ok(format!( + "āŒ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.", + e + )); + } + }; + + // Wait for safaridriver to start up + tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; + + // Connect to SafariDriver + match g3_computer_control::SafariDriver::with_port(port).await { + Ok(driver) => { + let session = + std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Safari(driver))); + *ctx.webdriver_session.write().await = Some(session); + *ctx.webdriver_process.write().await = Some(webdriver_process); + + Ok( + "āœ… WebDriver session started successfully! Safari should open automatically." + .to_string(), + ) + } + Err(e) => { + let _ = webdriver_process.kill().await; + Ok(format!( + "āŒ Failed to connect to SafariDriver: {}\n\n\ + This might be because:\n \ + - Safari Remote Automation is not enabled (run: safaridriver --enable)\n \ + - Port {} is already in use\n \ + - Safari failed to start\n \ + - Network connectivity issue\n\n\ + To enable Remote Automation:\n \ + 1. Run: safaridriver --enable (requires password, one-time setup)\n \ + 2. Or manually: Safari → Develop → Allow Remote Automation", + e, port + )) + } + } +} + +async fn start_chrome_driver(ctx: &ToolContext<'_, W>) -> Result { + let port = ctx.config.webdriver.chrome_port; + + // Start chromedriver process + let driver_result = tokio::process::Command::new("chromedriver") + .arg(format!("--port={}", port)) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn(); + + let mut webdriver_process = match driver_result { + Ok(process) => process, + Err(e) => { + return Ok(format!( + "āŒ Failed to start chromedriver: {}\n\n\ + Make sure chromedriver is installed and in your PATH.\n\n\ + Install with:\n \ + - macOS: brew install chromedriver\n \ + - Linux: apt install chromium-chromedriver\n \ + - Or download from: https://chromedriver.chromium.org/downloads", + e + )); + } + }; + + // Wait for chromedriver to be ready with retry loop + let max_retries = 10; + let mut last_error = None; + + for attempt in 0..max_retries { + // Wait before each attempt (200ms between retries, total max ~2s) + tokio::time::sleep(tokio::time::Duration::from_millis(200)).await; + + // Try to connect to ChromeDriver in headless mode (with optional custom binary) + let driver_result = match &ctx.config.webdriver.chrome_binary { + Some(binary) => { + g3_computer_control::ChromeDriver::with_port_headless_and_binary(port, Some(binary)) + .await + } + None => g3_computer_control::ChromeDriver::with_port_headless(port).await, + }; + + match driver_result { + Ok(driver) => { + let session = + std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Chrome(driver))); + *ctx.webdriver_session.write().await = Some(session); + *ctx.webdriver_process.write().await = Some(webdriver_process); + + return Ok( + "āœ… WebDriver session started successfully! Chrome is running in headless mode (no visible window)." + .to_string(), + ); + } + Err(e) => { + last_error = Some(e); + if attempt < max_retries - 1 { + continue; + } + } + } + } + + // All retries failed + let _ = webdriver_process.kill().await; + let error_msg = last_error + .map(|e| e.to_string()) + .unwrap_or_else(|| "Unknown error".to_string()); + Ok(format!( + "āŒ Failed to connect to ChromeDriver after {} attempts: {}\n\n\ + This might be because:\n \ + - Chrome is not installed\n \ + - ChromeDriver version doesn't match Chrome version\n \ + - Port {} is already in use\n\n\ + Make sure Chrome and ChromeDriver are installed and compatible.", + max_retries, error_msg, port + )) +} + +/// Execute the `webdriver_navigate` tool. +pub async fn execute_webdriver_navigate( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_navigate tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + drop(session_guard); + + let url = match tool_call.args.get("url").and_then(|v| v.as_str()) { + Some(u) => u, + None => return Ok("āŒ Missing url argument".to_string()), + }; + + let mut driver = session.lock().await; + match driver.navigate(url).await { + Ok(_) => Ok(format!("āœ… Navigated to {}", url)), + Err(e) => Ok(format!("āŒ Failed to navigate: {}", e)), + } +} + +/// Execute the `webdriver_get_url` tool. +pub async fn execute_webdriver_get_url( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_get_url tool call"); + let _ = tool_call; // unused + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let driver = session.lock().await; + match driver.current_url().await { + Ok(url) => Ok(format!("Current URL: {}", url)), + Err(e) => Ok(format!("āŒ Failed to get URL: {}", e)), + } +} + +/// Execute the `webdriver_get_title` tool. +pub async fn execute_webdriver_get_title( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_get_title tool call"); + let _ = tool_call; // unused + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let driver = session.lock().await; + match driver.title().await { + Ok(title) => Ok(format!("Page title: {}", title)), + Err(e) => Ok(format!("āŒ Failed to get title: {}", e)), + } +} + +/// Execute the `webdriver_find_element` tool. +pub async fn execute_webdriver_find_element( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_find_element tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { + Some(s) => s, + None => return Ok("āŒ Missing selector argument".to_string()), + }; + + let mut driver = session.lock().await; + match driver.find_element(selector).await { + Ok(elem) => match elem.text().await { + Ok(text) => Ok(format!("Element text: {}", text)), + Err(e) => Ok(format!("āŒ Failed to get element text: {}", e)), + }, + Err(e) => Ok(format!("āŒ Failed to find element '{}': {}", selector, e)), + } +} + +/// Execute the `webdriver_find_elements` tool. +pub async fn execute_webdriver_find_elements( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_find_elements tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { + Some(s) => s, + None => return Ok("āŒ Missing selector argument".to_string()), + }; + + let mut driver = session.lock().await; + match driver.find_elements(selector).await { + Ok(elements) => { + let mut results = Vec::new(); + for (i, elem) in elements.iter().enumerate() { + match elem.text().await { + Ok(text) => results.push(format!("[{}]: {}", i, text)), + Err(_) => results.push(format!("[{}]: ", i)), + } + } + Ok(format!( + "Found {} elements:\n{}", + results.len(), + results.join("\n") + )) + } + Err(e) => Ok(format!("āŒ Failed to find elements '{}': {}", selector, e)), + } +} + +/// Execute the `webdriver_click` tool. +pub async fn execute_webdriver_click( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_click tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { + Some(s) => s, + None => return Ok("āŒ Missing selector argument".to_string()), + }; + + let mut driver = session.lock().await; + match driver.find_element(selector).await { + Ok(mut elem) => match elem.click().await { + Ok(_) => Ok(format!("āœ… Clicked element '{}'", selector)), + Err(e) => Ok(format!("āŒ Failed to click element: {}", e)), + }, + Err(e) => Ok(format!("āŒ Failed to find element '{}': {}", selector, e)), + } +} + +/// Execute the `webdriver_send_keys` tool. +pub async fn execute_webdriver_send_keys( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_send_keys tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) { + Some(s) => s, + None => return Ok("āŒ Missing selector argument".to_string()), + }; + + let text = match tool_call.args.get("text").and_then(|v| v.as_str()) { + Some(t) => t, + None => return Ok("āŒ Missing text argument".to_string()), + }; + + let clear_first = tool_call + .args + .get("clear_first") + .and_then(|v| v.as_bool()) + .unwrap_or(true); + + let mut driver = session.lock().await; + match driver.find_element(selector).await { + Ok(mut elem) => { + if clear_first { + if let Err(e) = elem.clear().await { + return Ok(format!("āŒ Failed to clear element: {}", e)); + } + } + match elem.send_keys(text).await { + Ok(_) => Ok(format!("āœ… Sent keys to element '{}'", selector)), + Err(e) => Ok(format!("āŒ Failed to send keys: {}", e)), + } + } + Err(e) => Ok(format!("āŒ Failed to find element '{}': {}", selector, e)), + } +} + +/// Execute the `webdriver_execute_script` tool. +pub async fn execute_webdriver_execute_script( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_execute_script tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let script = match tool_call.args.get("script").and_then(|v| v.as_str()) { + Some(s) => s, + None => return Ok("āŒ Missing script argument".to_string()), + }; + + let mut driver = session.lock().await; + match driver.execute_script(script, vec![]).await { + Ok(result) => Ok(format!("Script result: {:?}", result)), + Err(e) => Ok(format!("āŒ Failed to execute script: {}", e)), + } +} + +/// Execute the `webdriver_get_page_source` tool. +pub async fn execute_webdriver_get_page_source( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_get_page_source tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + // Extract optional parameters + let max_length = tool_call + .args + .get("max_length") + .and_then(|v| v.as_u64()) + .map(|n| n as usize) + .unwrap_or(10000); + + let save_to_file = tool_call.args.get("save_to_file").and_then(|v| v.as_str()); + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let driver = session.lock().await; + match driver.page_source().await { + Ok(source) => { + // If save_to_file is specified, write to file + if let Some(file_path) = save_to_file { + let expanded_path = shellexpand::tilde(file_path); + let path_str = expanded_path.as_ref(); + + // Create parent directories if needed + if let Some(parent) = std::path::Path::new(path_str).parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + return Ok(format!("āŒ Failed to create directories: {}", e)); + } + } + + match std::fs::write(path_str, &source) { + Ok(_) => Ok(format!( + "āœ… Page source ({} chars) saved to: {}", + source.len(), + path_str + )), + Err(e) => Ok(format!("āŒ Failed to write file: {}", e)), + } + } else if max_length > 0 && source.len() > max_length { + // Truncate if max_length is set and source exceeds it + Ok(format!( + "Page source ({} chars, truncated to {}):\n{}...", + source.len(), + max_length, + &source[..max_length] + )) + } else { + // Return full source + Ok(format!("Page source ({} chars):\n{}", source.len(), source)) + } + } + Err(e) => Ok(format!("āŒ Failed to get page source: {}", e)), + } +} + +/// Execute the `webdriver_screenshot` tool. +pub async fn execute_webdriver_screenshot( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_screenshot tool call"); + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let path = match tool_call.args.get("path").and_then(|v| v.as_str()) { + Some(p) => p, + None => return Ok("āŒ Missing path argument".to_string()), + }; + + let mut driver = session.lock().await; + match driver.screenshot(path).await { + Ok(_) => Ok(format!("āœ… Screenshot saved to {}", path)), + Err(e) => Ok(format!("āŒ Failed to take screenshot: {}", e)), + } +} + +/// Execute the `webdriver_back` tool. +pub async fn execute_webdriver_back( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_back tool call"); + let _ = tool_call; // unused + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let mut driver = session.lock().await; + match driver.back().await { + Ok(_) => Ok("āœ… Navigated back".to_string()), + Err(e) => Ok(format!("āŒ Failed to navigate back: {}", e)), + } +} + +/// Execute the `webdriver_forward` tool. +pub async fn execute_webdriver_forward( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_forward tool call"); + let _ = tool_call; // unused + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let mut driver = session.lock().await; + match driver.forward().await { + Ok(_) => Ok("āœ… Navigated forward".to_string()), + Err(e) => Ok(format!("āŒ Failed to navigate forward: {}", e)), + } +} + +/// Execute the `webdriver_refresh` tool. +pub async fn execute_webdriver_refresh( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_refresh tool call"); + let _ = tool_call; // unused + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + let session_guard = ctx.webdriver_session.read().await; + let session = match session_guard.as_ref() { + Some(s) => s.clone(), + None => { + return Ok("āŒ No active WebDriver session. Call webdriver_start first.".to_string()) + } + }; + + let mut driver = session.lock().await; + match driver.refresh().await { + Ok(_) => Ok("āœ… Page refreshed".to_string()), + Err(e) => Ok(format!("āŒ Failed to refresh page: {}", e)), + } +} + +/// Execute the `webdriver_quit` tool. +pub async fn execute_webdriver_quit( + tool_call: &ToolCall, + ctx: &ToolContext<'_, W>, +) -> Result { + debug!("Processing webdriver_quit tool call"); + let _ = tool_call; // unused + + if !ctx.config.webdriver.enabled { + return Ok("āŒ WebDriver is not enabled. Use --webdriver flag to enable.".to_string()); + } + + // Take the session + let session = match ctx.webdriver_session.write().await.take() { + Some(s) => s.clone(), + None => return Ok("āŒ No active WebDriver session.".to_string()), + }; + + // Quit the WebDriver session + match std::sync::Arc::try_unwrap(session) { + Ok(mutex) => { + let driver = mutex.into_inner(); + match driver.quit().await { + Ok(_) => { + debug!("WebDriver session closed successfully"); + + // Kill the safaridriver process + if let Some(mut process) = ctx.webdriver_process.write().await.take() { + if let Err(e) = process.kill().await { + warn!("Failed to kill safaridriver process: {}", e); + } else { + debug!("Safaridriver process terminated"); + } + } + + Ok("āœ… WebDriver session closed and safaridriver stopped".to_string()) + } + Err(e) => Ok(format!("āŒ Failed to quit WebDriver: {}", e)), + } + } + Err(_) => Ok("āŒ Cannot quit: WebDriver session is still in use".to_string()), + } +} diff --git a/crates/g3-core/tests/test_preflight_max_tokens.rs b/crates/g3-core/tests/test_preflight_max_tokens.rs index c2d27ab..c4d94e1 100644 --- a/crates/g3-core/tests/test_preflight_max_tokens.rs +++ b/crates/g3-core/tests/test_preflight_max_tokens.rs @@ -189,3 +189,39 @@ fn test_error_message_content() { assert!(warning.contains("10000")); assert!(warning.contains("Context reduction needed")); } + +/// Test that SUMMARY_MIN_TOKENS floor prevents max_tokens=0 errors +/// This is the fix for the bug where context at 90%+ caused API errors +#[test] +fn test_summary_min_tokens_floor_prevents_zero() { + // The SUMMARY_MIN_TOKENS constant is 1000 + let summary_min_tokens = 1000u32; + + let mut context = ContextWindow::new(200000); + + // Simulate extremely full context - 98% used + context.used_tokens = 196000; + + let model_limit = context.total_tokens; + let current_usage = context.used_tokens; + let buffer = (model_limit / 40).clamp(1000, 10000); // 5000 + + // Without the floor, available would be 0 + let available_without_floor = model_limit + .saturating_sub(current_usage) + .saturating_sub(buffer); + assert_eq!(available_without_floor, 0, "Without floor, available should be 0"); + + // With the floor, available is at least SUMMARY_MIN_TOKENS + let available_with_floor = available_without_floor.max(summary_min_tokens); + assert_eq!(available_with_floor, 1000, "With floor, available should be 1000"); + + // Even after applying provider caps (which use .min()), the floor is preserved + let after_cap = available_with_floor.min(10_000); + assert_eq!(after_cap, 1000, "After cap, should still be 1000"); + + // And the final defense-in-depth .max() ensures it's never below the floor + let final_value = after_cap.max(summary_min_tokens); + assert!(final_value >= 1, "Final value must be >= 1 for API"); + assert_eq!(final_value, 1000, "Final value should be exactly 1000"); +} diff --git a/crates/g3-core/tests/test_session_continuation.rs b/crates/g3-core/tests/test_session_continuation.rs index cc2ff5a..80861c4 100644 --- a/crates/g3-core/tests/test_session_continuation.rs +++ b/crates/g3-core/tests/test_session_continuation.rs @@ -31,7 +31,7 @@ fn teardown_test_env(original_dir: std::path::PathBuf) { #[test] fn test_session_continuation_creation() { // This test doesn't need file system access - let continuation = SessionContinuation::new( + let continuation = SessionContinuation::new(false, None, "test_session_123".to_string(), Some("Task completed successfully".to_string()), "/path/to/session.json".to_string(), @@ -63,7 +63,7 @@ fn test_can_restore_full_context_threshold() { ]; for (percentage, expected) in test_cases { - let continuation = SessionContinuation::new( + let continuation = SessionContinuation::new(false, None, "test".to_string(), None, "path".to_string(), @@ -85,7 +85,7 @@ fn test_save_and_load_continuation() { let _lock = TEST_MUTEX.lock().unwrap(); let (temp_dir, original_dir) = setup_test_env(); - let original = SessionContinuation::new( + let original = SessionContinuation::new(false, None, "save_load_test".to_string(), Some("Test summary content".to_string()), "/logs/g3_session_save_load_test.json".to_string(), @@ -117,10 +117,111 @@ fn test_save_and_load_continuation() { teardown_test_env(original_dir); } +#[test] +fn test_find_incomplete_agent_session() { + use g3_core::session_continuation::find_incomplete_agent_session; + + let _lock = TEST_MUTEX.lock().unwrap(); + let (temp_dir, original_dir) = setup_test_env(); + + // Get the actual current directory (after set_current_dir in setup) + let current_working_dir = std::env::current_dir() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_default(); + + // Create an agent mode session with incomplete TODOs + let agent_session = SessionContinuation::new( + true, // is_agent_mode + Some("fowler".to_string()), // agent_name + "fowler_session_1".to_string(), + Some("Working on task".to_string()), + "/path/to/session.json".to_string(), + 50.0, + Some("- [x] Done\n- [ ] Not done yet".to_string()), // incomplete TODO + current_working_dir, // Use actual current dir + ); + save_continuation(&agent_session).expect("Failed to save agent session"); + + // Should find the incomplete session for "fowler" + let result = find_incomplete_agent_session("fowler").expect("Failed to search"); + assert!(result.is_some(), "Should find incomplete fowler session"); + let found = result.unwrap(); + assert_eq!(found.session_id, "fowler_session_1"); + assert_eq!(found.agent_name, Some("fowler".to_string())); + + // Should NOT find session for different agent + let result = find_incomplete_agent_session("pike").expect("Failed to search"); + assert!(result.is_none(), "Should not find session for pike"); + + teardown_test_env(original_dir); +} + +#[test] +fn test_find_incomplete_agent_session_ignores_complete_todos() { + use g3_core::session_continuation::find_incomplete_agent_session; + + let _lock = TEST_MUTEX.lock().unwrap(); + let (temp_dir, original_dir) = setup_test_env(); + + let current_working_dir = std::env::current_dir() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_default(); + + // Create an agent mode session with ALL TODOs complete + let complete_session = SessionContinuation::new( + true, + Some("fowler".to_string()), + "fowler_complete".to_string(), + Some("All done".to_string()), + "/path/to/session.json".to_string(), + 50.0, + Some("- [x] Task 1\n- [x] Task 2".to_string()), // all complete + current_working_dir, + ); + save_continuation(&complete_session).expect("Failed to save"); + + // Should NOT find session since all TODOs are complete + let result = find_incomplete_agent_session("fowler").expect("Failed to search"); + assert!(result.is_none(), "Should not find session with complete TODOs"); + + teardown_test_env(original_dir); +} + +#[test] +fn test_find_incomplete_agent_session_ignores_non_agent_mode() { + use g3_core::session_continuation::find_incomplete_agent_session; + + let _lock = TEST_MUTEX.lock().unwrap(); + let (temp_dir, original_dir) = setup_test_env(); + + let current_working_dir = std::env::current_dir() + .map(|p| p.to_string_lossy().to_string()) + .unwrap_or_default(); + + // Create a NON-agent mode session with incomplete TODOs + let non_agent_session = SessionContinuation::new( + false, // NOT agent mode + None, + "regular_session".to_string(), + None, + "/path/to/session.json".to_string(), + 50.0, + Some("- [ ] Incomplete task".to_string()), + current_working_dir, + ); + save_continuation(&non_agent_session).expect("Failed to save"); + + // Should NOT find session since it's not agent mode + let result = find_incomplete_agent_session("fowler").expect("Failed to search"); + assert!(result.is_none(), "Should not find non-agent-mode session"); + + teardown_test_env(original_dir); +} + #[test] fn test_load_continuation_when_none_exists() { let _lock = TEST_MUTEX.lock().unwrap(); - let (_temp_dir, original_dir) = setup_test_env(); + let (temp_dir, original_dir) = setup_test_env(); // No continuation should exist in a fresh temp directory let result = load_continuation().expect("load_continuation should not error"); @@ -132,10 +233,10 @@ fn test_load_continuation_when_none_exists() { #[test] fn test_clear_continuation() { let _lock = TEST_MUTEX.lock().unwrap(); - let (_temp_dir, original_dir) = setup_test_env(); + let (temp_dir, original_dir) = setup_test_env(); // Create and save a continuation - let continuation = SessionContinuation::new( + let continuation = SessionContinuation::new(false, None, "clear_test".to_string(), Some("Will be cleared".to_string()), "/path/to/session.json".to_string(), @@ -187,10 +288,10 @@ fn test_ensure_session_dir_creates_g3_directory() { #[test] fn test_has_valid_continuation_with_missing_session_log() { let _lock = TEST_MUTEX.lock().unwrap(); - let (_temp_dir, original_dir) = setup_test_env(); + let (temp_dir, original_dir) = setup_test_env(); // Create a continuation pointing to a non-existent session log - let continuation = SessionContinuation::new( + let continuation = SessionContinuation::new(false, None, "invalid_test".to_string(), Some("Summary".to_string()), "/nonexistent/path/session.json".to_string(), @@ -218,7 +319,7 @@ fn test_has_valid_continuation_with_existing_session_log() { fs::write(&session_log_path, "{}").expect("Failed to write session log"); // Create a continuation pointing to the existing session log - let continuation = SessionContinuation::new( + let continuation = SessionContinuation::new(false, None, "valid_test".to_string(), Some("Summary".to_string()), session_log_path.to_string_lossy().to_string(), @@ -237,9 +338,9 @@ fn test_has_valid_continuation_with_existing_session_log() { #[test] fn test_continuation_serialization_format() { let _lock = TEST_MUTEX.lock().unwrap(); - let (_temp_dir, original_dir) = setup_test_env(); + let (temp_dir, original_dir) = setup_test_env(); - let continuation = SessionContinuation::new( + let continuation = SessionContinuation::new(false, None, "format_test".to_string(), Some("Test summary".to_string()), "/path/to/session.json".to_string(), @@ -273,7 +374,7 @@ fn test_multiple_saves_update_symlink() { let (temp_dir, original_dir) = setup_test_env(); // Save first continuation - let first = SessionContinuation::new( + let first = SessionContinuation::new(false, None, "first_session".to_string(), Some("First summary".to_string()), "/path/first.json".to_string(), @@ -289,7 +390,7 @@ fn test_multiple_saves_update_symlink() { assert!(first_target.to_string_lossy().contains("first_session")); // Save second continuation (should update symlink) - let second = SessionContinuation::new( + let second = SessionContinuation::new(false, None, "second_session".to_string(), Some("Second summary".to_string()), "/path/second.json".to_string(), @@ -334,7 +435,7 @@ fn test_symlink_migration_from_old_directory() { .expect("Failed to write old latest.json"); // Save a new continuation - this should migrate the old directory to a symlink - let continuation = SessionContinuation::new( + let continuation = SessionContinuation::new(false, None, "new_session".to_string(), Some("New summary".to_string()), "/path/to/session.json".to_string(),