agent mode resumption

This commit is contained in:
Dhanji R. Prasanna
2026-01-03 14:50:08 +11:00
parent 016efc1db6
commit 595ad6ad21
14 changed files with 2584 additions and 19 deletions

View File

@@ -0,0 +1,54 @@
//! Tool executor trait and context for tool execution.
use anyhow::Result;
use std::sync::Arc;
use tokio::sync::RwLock;
use crate::background_process::BackgroundProcessManager;
use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path};
use crate::ui_writer::UiWriter;
use crate::webdriver_session::WebDriverSession;
use crate::ToolCall;
use g3_config::Config;
/// Context passed to tool executors containing shared state.
pub struct ToolContext<'a, W: UiWriter> {
pub config: &'a Config,
pub ui_writer: &'a W,
pub session_id: Option<&'a str>,
pub working_dir: Option<&'a str>,
pub computer_controller: Option<&'a Box<dyn g3_computer_control::ComputerController>>,
pub webdriver_session: &'a Arc<RwLock<Option<Arc<tokio::sync::Mutex<WebDriverSession>>>>>,
pub webdriver_process: &'a Arc<RwLock<Option<tokio::process::Child>>>,
pub macax_controller: &'a Arc<RwLock<Option<g3_computer_control::MacAxController>>>,
pub background_process_manager: &'a Arc<BackgroundProcessManager>,
pub todo_content: &'a Arc<RwLock<String>>,
pub pending_images: &'a mut Vec<g3_providers::ImageContent>,
pub is_autonomous: bool,
pub requirements_sha: Option<&'a str>,
}
impl<'a, W: UiWriter> ToolContext<'a, W> {
/// Get the path to the TODO file (session-scoped or workspace).
pub fn get_todo_path(&self) -> std::path::PathBuf {
if let Some(session_id) = self.session_id {
let _ = ensure_session_dir(session_id);
get_session_todo_path(session_id)
} else {
get_todo_path()
}
}
}
/// Trait for tool executors.
/// Each tool category implements this trait.
pub trait ToolExecutor<W: UiWriter> {
/// Execute a tool call and return the result.
/// Returns None if this executor doesn't handle the given tool.
fn execute<'a>(
tool_call: &'a ToolCall,
ctx: &'a mut ToolContext<'_, W>,
) -> impl std::future::Future<Output = Option<Result<String>>> + Send + 'a
where
W: 'a;
}

View File

@@ -0,0 +1,510 @@
//! File operation tools: read_file, write_file, str_replace, read_image.
use anyhow::Result;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::utils::apply_unified_diff_to_string;
use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `read_file` tool.
pub async fn execute_read_file<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing read_file tool call");
let file_path = match tool_call.args.get("file_path").and_then(|v| v.as_str()) {
Some(p) => p,
None => return Ok("❌ Missing file_path argument".to_string()),
};
// Expand tilde (~) to home directory
let expanded_path = shellexpand::tilde(file_path);
let path_str = expanded_path.as_ref();
// Check if this is an image file
let is_image = path_str.to_lowercase().ends_with(".png")
|| path_str.to_lowercase().ends_with(".jpg")
|| path_str.to_lowercase().ends_with(".jpeg")
|| path_str.to_lowercase().ends_with(".gif")
|| path_str.to_lowercase().ends_with(".bmp")
|| path_str.to_lowercase().ends_with(".tiff")
|| path_str.to_lowercase().ends_with(".tif")
|| path_str.to_lowercase().ends_with(".webp");
// If it's an image file, use OCR via extract_text
if is_image {
if let Some(controller) = ctx.computer_controller {
match controller.extract_text_from_image(path_str).await {
Ok(text) => {
return Ok(format!("📄 Image file (OCR extracted):\n{}", text));
}
Err(e) => {
return Ok(format!(
"❌ Failed to extract text from image '{}': {}",
path_str, e
));
}
}
} else {
return Ok("❌ Computer control not enabled. Cannot perform OCR on image files. Set computer_control.enabled = true in config.".to_string());
}
}
// Extract optional start and end positions
let start_char = tool_call
.args
.get("start")
.and_then(|v| v.as_u64())
.map(|n| n as usize);
let end_char = tool_call
.args
.get("end")
.and_then(|v| v.as_u64())
.map(|n| n as usize);
debug!(
"Reading file: {}, start={:?}, end={:?}",
path_str, start_char, end_char
);
match std::fs::read_to_string(path_str) {
Ok(content) => {
// Validate and apply range if specified
let start = start_char.unwrap_or(0);
let end = end_char.unwrap_or(content.len());
// Validation
if start > content.len() {
return Ok(format!(
"❌ Start position {} exceeds file length {}",
start,
content.len()
));
}
if end > content.len() {
return Ok(format!(
"❌ End position {} exceeds file length {}",
end,
content.len()
));
}
if start > end {
return Ok(format!(
"❌ Start position {} is greater than end position {}",
start, end
));
}
// Extract the requested portion, ensuring we're at char boundaries
let start_boundary = if start == 0 {
0
} else {
content
.char_indices()
.find(|(i, _)| *i >= start)
.map(|(i, _)| i)
.unwrap_or(start)
};
let end_boundary = content
.char_indices()
.find(|(i, _)| *i >= end)
.map(|(i, _)| i)
.unwrap_or(content.len());
let partial_content = &content[start_boundary..end_boundary];
let line_count = partial_content.lines().count();
let total_lines = content.lines().count();
// Format output with range info if partial
if start_char.is_some() || end_char.is_some() {
Ok(format!(
"📄 File content (chars {}-{}, {} lines of {} total):\n{}",
start_boundary, end_boundary, line_count, total_lines, partial_content
))
} else {
Ok(format!("📄 File content ({} lines):\n{}", line_count, content))
}
}
Err(e) => Ok(format!("❌ Failed to read file '{}': {}", path_str, e)),
}
}
/// Execute the `read_image` tool.
pub async fn execute_read_image<W: UiWriter>(
tool_call: &ToolCall,
ctx: &mut ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing read_image tool call");
// Get paths from file_paths array
let mut paths: Vec<String> = Vec::new();
if let Some(file_paths) = tool_call.args.get("file_paths") {
if let Some(arr) = file_paths.as_array() {
for p in arr {
if let Some(s) = p.as_str() {
paths.push(s.to_string());
}
}
}
}
if paths.is_empty() {
return Ok("❌ Missing or empty file_paths argument".to_string());
}
let mut results: Vec<String> = Vec::new();
let mut success_count = 0;
// Print └─ and newline before images to break out of tool output box
println!("└─\n");
for path_str in &paths {
// Expand tilde (~) to home directory
let expanded_path = shellexpand::tilde(path_str);
let path = std::path::Path::new(expanded_path.as_ref());
// Check file exists
if !path.exists() {
results.push(format!("❌ Image file not found: {}", path_str));
continue;
}
// Read the file first, then detect format from magic bytes
match std::fs::read(path) {
Ok(bytes) => {
// Detect media type from magic bytes (file signature)
let media_type = match g3_providers::ImageContent::media_type_from_bytes(&bytes) {
Some(mt) => mt,
None => {
// Fall back to extension-based detection
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
match g3_providers::ImageContent::media_type_from_extension(ext) {
Some(mt) => mt,
None => {
results.push(format!(
"{}: Unsupported or unrecognized image format",
path_str
));
continue;
}
}
}
};
let file_size = bytes.len();
// Try to get image dimensions
let dimensions = get_image_dimensions(&bytes, media_type);
// Build info string
let dim_str = dimensions
.map(|(w, h)| format!("{}x{}", w, h))
.unwrap_or_else(|| "unknown".to_string());
let size_str = if file_size >= 1024 * 1024 {
format!("{:.1} MB", file_size as f64 / (1024.0 * 1024.0))
} else if file_size >= 1024 {
format!("{:.1} KB", file_size as f64 / 1024.0)
} else {
format!("{} bytes", file_size)
};
// Output imgcat inline image to terminal (height constrained)
print_imgcat(&bytes, path_str, &dim_str, media_type, &size_str, 5);
// Store the image to be attached to the next user message
use base64::Engine;
let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
let image = g3_providers::ImageContent::new(media_type, encoded);
ctx.pending_images.push(image);
success_count += 1;
}
Err(e) => {
results.push(format!("❌ Failed to read '{}': {}", path_str, e));
}
}
}
// Print ┌─ to resume tool output box
print!("┌─\n");
let summary = if success_count == paths.len() {
format!("{} image(s) read.", success_count)
} else {
format!("{}/{} image(s) read.", success_count, paths.len())
};
// Only include error results if there are any
if results.is_empty() {
Ok(summary)
} else {
Ok(format!("{}\n{}", results.join("\n"), summary))
}
}
/// Execute the `write_file` tool.
pub async fn execute_write_file<W: UiWriter>(
tool_call: &ToolCall,
_ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing write_file tool call");
debug!("Raw tool_call.args: {:?}", tool_call.args);
// Try multiple argument formats that different providers might use
let (path_str, content_str) = extract_path_and_content(&tool_call.args);
debug!(
"Final extracted values: path_str={:?}, content_str_len={:?}",
path_str,
content_str.map(|c| c.len())
);
if let (Some(path), Some(content)) = (path_str, content_str) {
// Expand tilde (~) to home directory
let expanded_path = shellexpand::tilde(path);
let path = expanded_path.as_ref();
debug!("Writing to file: {}", path);
// Create parent directories if they don't exist
if let Some(parent) = std::path::Path::new(path).parent() {
if let Err(e) = std::fs::create_dir_all(parent) {
return Ok(format!(
"❌ Failed to create parent directories for '{}': {}",
path, e
));
}
}
match std::fs::write(path, content) {
Ok(()) => {
let line_count = content.lines().count();
let char_count = content.len();
Ok(format!(
"✅ Successfully wrote {} lines ({} characters)",
line_count, char_count
))
}
Err(e) => Ok(format!("❌ Failed to write to file '{}': {}", path, e)),
}
} else {
// Provide more detailed error information
let available_keys = if let Some(obj) = tool_call.args.as_object() {
obj.keys().collect::<Vec<_>>()
} else {
vec![]
};
Ok(format!(
"❌ Missing file_path or content argument. Available keys: {:?}. Expected formats: {{\"file_path\": \"...\", \"content\": \"...\"}}, {{\"path\": \"...\", \"content\": \"...\"}}, {{\"filename\": \"...\", \"text\": \"...\"}}, or {{\"file\": \"...\", \"data\": \"...\"}}",
available_keys
))
}
}
/// Execute the `str_replace` tool.
pub async fn execute_str_replace<W: UiWriter>(
tool_call: &ToolCall,
_ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing str_replace tool call");
let args_obj = match tool_call.args.as_object() {
Some(obj) => obj,
None => return Ok("❌ Invalid arguments: expected object".to_string()),
};
let file_path = match args_obj.get("file_path").and_then(|v| v.as_str()) {
Some(path) => {
let expanded_path = shellexpand::tilde(path);
expanded_path.into_owned()
}
None => return Ok("❌ Missing or invalid file_path argument".to_string()),
};
let diff = match args_obj.get("diff").and_then(|v| v.as_str()) {
Some(d) => d,
None => return Ok("❌ Missing or invalid diff argument".to_string()),
};
// Optional start and end character positions (0-indexed, end is EXCLUSIVE)
let start_char = args_obj
.get("start")
.and_then(|v| v.as_u64())
.map(|n| n as usize);
let end_char = args_obj
.get("end")
.and_then(|v| v.as_u64())
.map(|n| n as usize);
debug!(
"str_replace: path={}, start={:?}, end={:?}",
file_path, start_char, end_char
);
// Read the existing file
let file_content = match std::fs::read_to_string(&file_path) {
Ok(content) => content,
Err(e) => return Ok(format!("❌ Failed to read file '{}': {}", file_path, e)),
};
// Apply unified diff to content
let result = match apply_unified_diff_to_string(&file_content, diff, start_char, end_char) {
Ok(r) => r,
Err(e) => return Ok(format!("{}", e)),
};
// Write the result back to the file
match std::fs::write(&file_path, &result) {
Ok(()) => Ok("✅ applied unified diff".to_string()),
Err(e) => Ok(format!("❌ Failed to write to file '{}': {}", file_path, e)),
}
}
// Helper functions
/// Extract path and content from various argument formats.
fn extract_path_and_content(args: &serde_json::Value) -> (Option<&str>, Option<&str>) {
if let Some(args_obj) = args.as_object() {
// Format 1: Standard format with file_path and content
if let (Some(path_val), Some(content_val)) =
(args_obj.get("file_path"), args_obj.get("content"))
{
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
return (Some(path), Some(content));
}
}
// Format 2: Anthropic-style with path and content
if let (Some(path_val), Some(content_val)) =
(args_obj.get("path"), args_obj.get("content"))
{
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
return (Some(path), Some(content));
}
}
// Format 3: Alternative naming with filename and text
if let (Some(path_val), Some(content_val)) =
(args_obj.get("filename"), args_obj.get("text"))
{
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
return (Some(path), Some(content));
}
}
// Format 4: Alternative naming with file and data
if let (Some(path_val), Some(content_val)) = (args_obj.get("file"), args_obj.get("data")) {
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
return (Some(path), Some(content));
}
}
} else if let Some(args_array) = args.as_array() {
// Format 5: Args might be an array [path, content]
if args_array.len() >= 2 {
if let (Some(path), Some(content)) = (args_array[0].as_str(), args_array[1].as_str()) {
return (Some(path), Some(content));
}
}
}
(None, None)
}
/// Get image dimensions from raw bytes.
pub fn get_image_dimensions(bytes: &[u8], media_type: &str) -> Option<(u32, u32)> {
match media_type {
"image/png" => {
// PNG: width at bytes 16-19, height at bytes 20-23 (big-endian)
if bytes.len() >= 24 {
let width = u32::from_be_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]);
let height = u32::from_be_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]);
Some((width, height))
} else {
None
}
}
"image/jpeg" => {
// JPEG: Need to find SOF0/SOF2 marker (FF C0 or FF C2)
let mut i = 2; // Skip FF D8
while i + 8 < bytes.len() {
if bytes[i] == 0xFF {
let marker = bytes[i + 1];
// SOF0, SOF1, SOF2 markers contain dimensions
if marker == 0xC0 || marker == 0xC1 || marker == 0xC2 {
let height = u16::from_be_bytes([bytes[i + 5], bytes[i + 6]]) as u32;
let width = u16::from_be_bytes([bytes[i + 7], bytes[i + 8]]) as u32;
return Some((width, height));
}
// Skip to next marker
if marker == 0xD8
|| marker == 0xD9
|| marker == 0x01
|| (0xD0..=0xD7).contains(&marker)
{
i += 2;
} else {
let len = u16::from_be_bytes([bytes[i + 2], bytes[i + 3]]) as usize;
i += 2 + len;
}
} else {
i += 1;
}
}
None
}
"image/gif" => {
// GIF: width at bytes 6-7, height at bytes 8-9 (little-endian)
if bytes.len() >= 10 {
let width = u16::from_le_bytes([bytes[6], bytes[7]]) as u32;
let height = u16::from_le_bytes([bytes[8], bytes[9]]) as u32;
Some((width, height))
} else {
None
}
}
"image/webp" => {
// WebP VP8: dimensions at specific offsets (simplified)
if bytes.len() >= 30 && &bytes[12..16] == b"VP8 " {
let width = (u16::from_le_bytes([bytes[26], bytes[27]]) & 0x3FFF) as u32;
let height = (u16::from_le_bytes([bytes[28], bytes[29]]) & 0x3FFF) as u32;
Some((width, height))
} else {
None
}
}
_ => None,
}
}
/// Print image using iTerm2 imgcat protocol with info line.
pub fn print_imgcat(
bytes: &[u8],
name: &str,
dimensions: &str,
media_type: &str,
size: &str,
max_height: u32,
) {
use base64::Engine;
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
// Extract just the filename from the path
let filename = std::path::Path::new(name)
.file_name()
.and_then(|f| f.to_str())
.unwrap_or(name);
// iTerm2 inline image protocol (single space prefix)
print!(
" \x1b]1337;File=inline=1;height={};name={}:{}\x07\n",
max_height, name, encoded
);
// Print dimmed info line with filename only (no │ prefix)
println!(
" \x1b[2m{} | {} | {} | {}\x1b[0m",
filename, dimensions, media_type, size
);
// Blank line before next image (no │ prefix)
println!();
}

View File

@@ -0,0 +1,178 @@
//! macOS Accessibility API tools.
use anyhow::Result;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `macax_list_apps` tool.
pub async fn execute_macax_list_apps<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing macax_list_apps tool call");
let _ = tool_call; // unused
if !ctx.config.macax.enabled {
return Ok(
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
);
}
let controller_guard = ctx.macax_controller.read().await;
let controller = match controller_guard.as_ref() {
Some(c) => c,
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
};
match controller.list_applications() {
Ok(apps) => {
let app_list: Vec<String> = apps.iter().map(|a| a.name.clone()).collect();
Ok(format!("Running applications:\n{}", app_list.join("\n")))
}
Err(e) => Ok(format!("❌ Failed to list applications: {}", e)),
}
}
/// Execute the `macax_get_frontmost_app` tool.
pub async fn execute_macax_get_frontmost_app<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing macax_get_frontmost_app tool call");
let _ = tool_call; // unused
if !ctx.config.macax.enabled {
return Ok(
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
);
}
let controller_guard = ctx.macax_controller.read().await;
let controller = match controller_guard.as_ref() {
Some(c) => c,
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
};
match controller.get_frontmost_app() {
Ok(app) => Ok(format!("Frontmost application: {}", app.name)),
Err(e) => Ok(format!("❌ Failed to get frontmost app: {}", e)),
}
}
/// Execute the `macax_activate_app` tool.
pub async fn execute_macax_activate_app<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing macax_activate_app tool call");
if !ctx.config.macax.enabled {
return Ok(
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
);
}
let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) {
Some(n) => n,
None => return Ok("❌ Missing app_name argument".to_string()),
};
let controller_guard = ctx.macax_controller.read().await;
let controller = match controller_guard.as_ref() {
Some(c) => c,
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
};
match controller.activate_app(app_name) {
Ok(_) => Ok(format!("✅ Activated application: {}", app_name)),
Err(e) => Ok(format!("❌ Failed to activate app: {}", e)),
}
}
/// Execute the `macax_press_key` tool.
pub async fn execute_macax_press_key<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing macax_press_key tool call");
if !ctx.config.macax.enabled {
return Ok(
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
);
}
let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) {
Some(n) => n,
None => return Ok("❌ Missing app_name argument".to_string()),
};
let key = match tool_call.args.get("key").and_then(|v| v.as_str()) {
Some(k) => k,
None => return Ok("❌ Missing key argument".to_string()),
};
let modifiers_vec: Vec<&str> = tool_call
.args
.get("modifiers")
.and_then(|v| v.as_array())
.map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
.unwrap_or_default();
let controller_guard = ctx.macax_controller.read().await;
let controller = match controller_guard.as_ref() {
Some(c) => c,
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
};
match controller.press_key(app_name, key, modifiers_vec.clone()) {
Ok(_) => {
let modifier_str = if modifiers_vec.is_empty() {
String::new()
} else {
format!(" with modifiers: {}", modifiers_vec.join("+"))
};
Ok(format!("✅ Pressed key: {}{}", key, modifier_str))
}
Err(e) => Ok(format!("❌ Failed to press key: {}", e)),
}
}
/// Execute the `macax_type_text` tool.
pub async fn execute_macax_type_text<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing macax_type_text tool call");
if !ctx.config.macax.enabled {
return Ok(
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
);
}
let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) {
Some(n) => n,
None => return Ok("❌ Missing app_name argument".to_string()),
};
let text = match tool_call.args.get("text").and_then(|v| v.as_str()) {
Some(t) => t,
None => return Ok("❌ Missing text argument".to_string()),
};
let controller_guard = ctx.macax_controller.read().await;
let controller = match controller_guard.as_ref() {
Some(c) => c,
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
};
match controller.type_text(app_name, text) {
Ok(_) => Ok(format!("✅ Typed text into {}", app_name)),
Err(e) => Ok(format!("❌ Failed to type text: {}", e)),
}
}

View File

@@ -0,0 +1,225 @@
//! Miscellaneous tools: final_output, take_screenshot, extract_text, code_coverage, code_search.
use anyhow::Result;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `final_output` tool.
pub async fn execute_final_output<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing final_output tool call");
let summary_str = tool_call.args.get("summary").and_then(|v| v.as_str());
// In autonomous mode, check for incomplete TODO items before allowing completion
if ctx.is_autonomous {
let todo_content = ctx.todo_content.read().await;
let has_incomplete_todos = todo_content
.lines()
.any(|line| line.trim().starts_with("- [ ]"));
drop(todo_content);
if has_incomplete_todos {
return Ok(
"There are still incomplete TODO items. Please continue until \
*ALL* TODO items in *ALL* phases are marked complete, and \
*ONLY* then call `final_output`."
.to_string(),
);
}
}
// Return the summary or a default message
// Note: Session continuation saving is handled by the caller (Agent)
if let Some(summary) = summary_str {
Ok(summary.to_string())
} else {
Ok("✅ Turn completed".to_string())
}
}
/// Execute the `take_screenshot` tool.
pub async fn execute_take_screenshot<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing take_screenshot tool call");
let controller = match ctx.computer_controller {
Some(c) => c,
None => {
return Ok(
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
.to_string(),
)
}
};
let path = tool_call
.args
.get("path")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing path argument"))?;
// Extract window_id (app name) - REQUIRED
let window_id = tool_call
.args
.get("window_id")
.and_then(|v| v.as_str())
.ok_or_else(|| {
anyhow::anyhow!(
"Missing window_id argument. You must specify which window to capture \
(e.g., 'Safari', 'Terminal', 'Google Chrome')."
)
})?;
// Extract region if provided
let region = tool_call
.args
.get("region")
.and_then(|v| v.as_object())
.map(|region_obj| g3_computer_control::types::Rect {
x: region_obj.get("x").and_then(|v| v.as_i64()).unwrap_or(0) as i32,
y: region_obj.get("y").and_then(|v| v.as_i64()).unwrap_or(0) as i32,
width: region_obj
.get("width")
.and_then(|v| v.as_i64())
.unwrap_or(0) as i32,
height: region_obj
.get("height")
.and_then(|v| v.as_i64())
.unwrap_or(0) as i32,
});
match controller.take_screenshot(path, region, Some(window_id)).await {
Ok(_) => {
// Get the actual path where the screenshot was saved
let actual_path = if path.starts_with('/') {
path.to_string()
} else {
let temp_dir = std::env::var("TMPDIR")
.or_else(|_| std::env::var("HOME").map(|h| format!("{}/tmp", h)))
.unwrap_or_else(|_| "/tmp".to_string());
format!("{}/{}", temp_dir.trim_end_matches('/'), path)
};
Ok(format!(
"✅ Screenshot of {} saved to: {}",
window_id, actual_path
))
}
Err(e) => Ok(format!("❌ Failed to take screenshot: {}", e)),
}
}
/// Execute the `extract_text` tool.
pub async fn execute_extract_text<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing extract_text tool call");
let controller = match ctx.computer_controller {
Some(c) => c,
None => {
return Ok(
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
.to_string(),
)
}
};
let path = tool_call
.args
.get("path")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing path argument"))?;
match controller.extract_text_from_image(path).await {
Ok(text) => Ok(format!("✅ Extracted text:\n{}", text)),
Err(e) => Ok(format!("❌ Failed to extract text: {}", e)),
}
}
/// Execute the `code_coverage` tool.
pub async fn execute_code_coverage<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing code_coverage tool call");
let _ = tool_call; // unused
ctx.ui_writer
.print_context_status("🔍 Generating code coverage report...");
// Ensure coverage tools are installed
match g3_execution::ensure_coverage_tools_installed() {
Ok(already_installed) => {
if !already_installed {
ctx.ui_writer
.print_context_status("✅ Coverage tools installed successfully");
}
}
Err(e) => {
return Ok(format!("❌ Failed to install coverage tools: {}", e));
}
}
// Run cargo llvm-cov --workspace
let output = std::process::Command::new("cargo")
.args(["llvm-cov", "--workspace"])
.current_dir(std::env::current_dir()?)
.output()?;
if output.status.success() {
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
let mut result = String::from("✅ Code coverage report generated successfully\n\n");
result.push_str("## Coverage Summary\n");
result.push_str(&stdout);
if !stderr.is_empty() {
result.push_str("\n## Warnings\n");
result.push_str(&stderr);
}
Ok(result)
} else {
let stderr = String::from_utf8_lossy(&output.stderr);
Ok(format!("❌ Failed to generate coverage report:\n{}", stderr))
}
}
/// Execute the `code_search` tool.
pub async fn execute_code_search<W: UiWriter>(
tool_call: &ToolCall,
_ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing code_search tool call");
// Parse the request
let request: crate::code_search::CodeSearchRequest =
match serde_json::from_value(tool_call.args.clone()) {
Ok(req) => req,
Err(e) => {
return Ok(format!("❌ Invalid code_search arguments: {}", e));
}
};
// Execute the code search
match crate::code_search::execute_code_search(request).await {
Ok(response) => {
// Serialize the response to JSON
match serde_json::to_string_pretty(&response) {
Ok(json_output) => Ok(format!("✅ Code search completed\n{}", json_output)),
Err(e) => Ok(format!("❌ Failed to serialize response: {}", e)),
}
}
Err(e) => Ok(format!("❌ Code search failed: {}", e)),
}
}

View File

@@ -0,0 +1,22 @@
//! Tool execution module for G3 agent.
//!
//! This module contains all tool implementations that the agent can execute.
//! Tools are organized by category:
//! - `shell` - Shell command execution and background processes
//! - `file_ops` - File reading, writing, and editing
//! - `todo` - TODO list management
//! - `webdriver` - Browser automation via WebDriver
//! - `macax` - macOS Accessibility API tools
//! - `vision` - Vision-based text finding and clicking
//! - `misc` - Other tools (screenshots, code search, etc.)
pub mod executor;
pub mod file_ops;
pub mod macax;
pub mod misc;
pub mod shell;
pub mod todo;
pub mod vision;
pub mod webdriver;
pub use executor::ToolExecutor;

View File

@@ -0,0 +1,115 @@
//! Shell command execution tools.
use anyhow::Result;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::utils::shell_escape_command;
use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `shell` tool.
pub async fn execute_shell<W: UiWriter>(tool_call: &ToolCall, ctx: &ToolContext<'_, W>) -> Result<String> {
debug!("Processing shell tool call");
let command = match tool_call.args.get("command").and_then(|v| v.as_str()) {
Some(cmd) => cmd,
None => {
debug!("No command parameter found in args: {:?}", tool_call.args);
return Ok("❌ Missing command argument".to_string());
}
};
debug!("Command string: {}", command);
let escaped_command = shell_escape_command(command);
let executor = g3_execution::CodeExecutor::new();
struct ToolOutputReceiver<'a, W: UiWriter> {
ui_writer: &'a W,
}
impl<'a, W: UiWriter> g3_execution::OutputReceiver for ToolOutputReceiver<'a, W> {
fn on_output_line(&self, line: &str) {
self.ui_writer.update_tool_output_line(line);
}
}
let receiver = ToolOutputReceiver {
ui_writer: ctx.ui_writer,
};
debug!(
"ABOUT TO CALL execute_bash_streaming_in_dir: escaped_command='{}', working_dir={:?}",
escaped_command, ctx.working_dir
);
match executor
.execute_bash_streaming_in_dir(&escaped_command, &receiver, ctx.working_dir)
.await
{
Ok(result) => {
if result.success {
Ok(if result.stdout.is_empty() {
"✅ Command executed successfully".to_string()
} else {
result.stdout.trim().to_string()
})
} else {
Ok(format!("❌ Command failed: {}", result.stderr.trim()))
}
}
Err(e) => Ok(format!("❌ Execution error: {}", e)),
}
}
/// Execute the `background_process` tool.
pub async fn execute_background_process<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing background_process tool call");
let name = match tool_call.args.get("name").and_then(|v| v.as_str()) {
Some(n) => n,
None => return Ok("❌ Missing 'name' argument".to_string()),
};
let command = match tool_call.args.get("command").and_then(|v| v.as_str()) {
Some(c) => c,
None => return Ok("❌ Missing 'command' argument".to_string()),
};
// Use provided working_dir, or fall back to context working_dir, or current dir
let work_dir = tool_call
.args
.get("working_dir")
.and_then(|v| v.as_str())
.map(|s| std::path::PathBuf::from(shellexpand::tilde(s).as_ref()))
.or_else(|| ctx.working_dir.map(std::path::PathBuf::from))
.unwrap_or_else(|| std::env::current_dir().unwrap_or_default());
match ctx.background_process_manager.start(name, command, &work_dir) {
Ok(info) => Ok(format!(
"✅ Background process '{}' started\n\n\
**PID:** {}\n\
**Log file:** {}\n\
**Working dir:** {}\n\n\
To interact with this process, use the shell tool:\n\
- View logs: `tail -100 {}`\n\
- Follow logs: `tail -f {}` (blocks until Ctrl+C)\n\
- Check status: `ps -p {}`\n\
- Stop process: `kill {}`",
info.name,
info.pid,
info.log_file.display(),
info.working_dir.display(),
info.log_file.display(),
info.log_file.display(),
info.pid,
info.pid
)),
Err(e) => Ok(format!("❌ Failed to start background process: {}", e)),
}
}

View File

@@ -0,0 +1,195 @@
//! TODO list management tools.
use anyhow::Result;
use std::io::Write;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `todo_read` tool.
pub async fn execute_todo_read<W: UiWriter>(
tool_call: &ToolCall,
ctx: &mut ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing todo_read tool call");
let _ = tool_call; // unused but kept for consistency
let todo_path = ctx.get_todo_path();
if !todo_path.exists() {
// Also update in-memory content to stay in sync
let mut todo = ctx.todo_content.write().await;
*todo = String::new();
return Ok("📝 TODO list is empty (no todo.g3.md file found)".to_string());
}
match std::fs::read_to_string(&todo_path) {
Ok(content) => {
// Update in-memory content to stay in sync
let mut todo = ctx.todo_content.write().await;
*todo = content.clone();
// Check for staleness if enabled and we have a requirements SHA
if ctx.config.agent.check_todo_staleness {
if let Some(req_sha) = ctx.requirements_sha {
if let Some(staleness_result) = check_todo_staleness(&content, req_sha, ctx.ui_writer) {
return Ok(staleness_result);
}
}
}
if content.trim().is_empty() {
Ok("📝 TODO list is empty".to_string())
} else {
for line in content.lines() {
ctx.ui_writer.print_tool_output_line(line);
}
Ok(format!("📝 TODO list:\n{}", content))
}
}
Err(e) => Ok(format!("❌ Failed to read TODO.md: {}", e)),
}
}
/// Execute the `todo_write` tool.
pub async fn execute_todo_write<W: UiWriter>(
tool_call: &ToolCall,
ctx: &mut ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing todo_write tool call");
let content_str = match tool_call.args.get("content").and_then(|v| v.as_str()) {
Some(c) => c,
None => return Ok("❌ Missing content argument".to_string()),
};
let char_count = content_str.chars().count();
let max_chars = std::env::var("G3_TODO_MAX_CHARS")
.ok()
.and_then(|s| s.parse().ok())
.unwrap_or(50_000);
if max_chars > 0 && char_count > max_chars {
return Ok(format!(
"❌ TODO list too large: {} chars (max: {})",
char_count, max_chars
));
}
// Check if all todos are completed (all checkboxes are checked)
let has_incomplete = content_str
.lines()
.any(|line| line.trim().starts_with("- [ ]"));
// If all todos are complete, delete the file instead of writing
// EXCEPT in planner mode (G3_TODO_PATH is set) - preserve for rename to completed_todo_*.md
let in_planner_mode = std::env::var("G3_TODO_PATH").is_ok();
let todo_path = ctx.get_todo_path();
if !in_planner_mode
&& !has_incomplete
&& (content_str.contains("- [x]") || content_str.contains("- [X]"))
{
if todo_path.exists() {
match std::fs::remove_file(&todo_path) {
Ok(_) => {
let mut todo = ctx.todo_content.write().await;
*todo = String::new();
// Show the final completed TODOs before deletion
let mut result =
String::from("✅ All TODOs completed! Removed todo.g3.md\n\nFinal status:\n");
for line in content_str.lines() {
ctx.ui_writer.print_tool_output_line(line);
result.push_str(line);
result.push('\n');
}
return Ok(result);
}
Err(e) => return Ok(format!("❌ Failed to remove todo.g3.md: {}", e)),
}
}
}
match std::fs::write(&todo_path, content_str) {
Ok(_) => {
// Also update in-memory content to stay in sync
let mut todo = ctx.todo_content.write().await;
*todo = content_str.to_string();
// Print the TODO content to the console (inside the tool frame)
for line in content_str.lines() {
ctx.ui_writer.print_tool_output_line(line);
}
Ok(format!(
"✅ TODO list updated ({} chars) and saved to todo.g3.md:\n{}",
char_count, content_str
))
}
Err(e) => Ok(format!("❌ Failed to write todo.g3.md: {}", e)),
}
}
/// Check if the TODO list is stale (generated from a different requirements file).
/// Returns Some(message) if staleness was detected and handled, None otherwise.
fn check_todo_staleness<W: UiWriter>(
content: &str,
req_sha: &str,
ui_writer: &W,
) -> Option<String> {
// Parse the first line for the SHA header
let first_line = content.lines().next()?;
if !first_line.starts_with("{{Based on the requirements file with SHA256:") {
return None;
}
let parts: Vec<&str> = first_line.split("SHA256:").collect();
if parts.len() <= 1 {
return None;
}
let todo_sha = parts[1].trim().trim_end_matches("}}").trim();
if todo_sha == req_sha {
return None;
}
let warning = format!(
"⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}",
req_sha, todo_sha
);
ui_writer.print_context_status(&warning);
// Beep 6 times
print!("\x07\x07\x07\x07\x07\x07");
let _ = std::io::stdout().flush();
let options = [
"Ignore and Continue",
"Mark as Stale",
"Quit Application",
];
let choice = ui_writer.prompt_user_choice(
"Requirements have changed! What would you like to do?",
&options,
);
match choice {
0 => {
// Ignore and Continue
ui_writer.print_context_status("⚠️ Ignoring staleness warning.");
None
}
1 => {
// Mark as Stale
Some("⚠️ TODO list is stale (requirements changed). Please regenerate the TODO list to match the new requirements.".to_string())
}
2 => {
// Quit Application
ui_writer.print_context_status("❌ Quitting application as requested.");
std::process::exit(0);
}
_ => None,
}
}

View File

@@ -0,0 +1,275 @@
//! Vision-based tools: vision_find_text, vision_click_text, vision_click_near_text, extract_text_with_boxes.
use anyhow::Result;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `vision_find_text` tool.
pub async fn execute_vision_find_text<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing vision_find_text tool call");
let controller = match ctx.computer_controller {
Some(c) => c,
None => {
return Ok(
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
.to_string(),
)
}
};
let app_name = tool_call
.args
.get("app_name")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?;
let text = tool_call
.args
.get("text")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
match controller.find_text_in_app(app_name, text).await {
Ok(Some(location)) => Ok(format!(
"✅ Found '{}' in {} at position ({}, {}) with size {}x{} (confidence: {:.0}%)",
location.text,
app_name,
location.x,
location.y,
location.width,
location.height,
location.confidence * 100.0
)),
Ok(None) => Ok(format!("❌ Could not find '{}' in {}", text, app_name)),
Err(e) => Ok(format!("❌ Error finding text: {}", e)),
}
}
/// Execute the `vision_click_text` tool.
pub async fn execute_vision_click_text<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing vision_click_text tool call");
let controller = match ctx.computer_controller {
Some(c) => c,
None => {
return Ok(
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
.to_string(),
)
}
};
let app_name = tool_call
.args
.get("app_name")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?;
let text = tool_call
.args
.get("text")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
match controller.find_text_in_app(app_name, text).await {
Ok(Some(location)) => {
// Click on center of text
// IMPORTANT: location coordinates are in NSScreen space (Y=0 at BOTTOM, increases UPWARD)
// location.x is the LEFT edge of the bounding box
// location.y is the TOP edge of the bounding box (highest Y value in NSScreen space)
// location.width and location.height are already scaled to screen space
// To get center: we need to add half the SCALED width and subtract half the SCALED height
if location.width == 0 || location.height == 0 {
return Ok(format!(
"❌ Invalid bounding box dimensions: width={}, height={}",
location.width, location.height
));
}
debug!(
"[vision_click_text] Location from find_text_in_app: x={}, y={}, width={}, height={}, text='{}'",
location.x, location.y, location.width, location.height, location.text
);
// Calculate center using the SCALED dimensions
// X: Use right edge instead of center (Vision OCR bounding box seems offset)
// This gives us: left edge + full width = right edge
// Y: top edge - half of scaled height (subtract because Y increases upward)
let click_x = location.x + location.width; // Right edge
let half_height = location.height / 2;
let click_y = location.y - half_height;
debug!(
"[vision_click_text] Click position calculation: x={} + {} = {} (right edge), y={} - {} = {}",
location.x, location.width, click_x, location.y, half_height, click_y
);
match controller.click_at(click_x, click_y, Some(app_name)) {
Ok(_) => Ok(format!(
"✅ Clicked on '{}' in {} at ({}, {})",
text, app_name, click_x, click_y
)),
Err(e) => Ok(format!("❌ Failed to click: {}", e)),
}
}
Ok(None) => Ok(format!("❌ Could not find '{}' in {}", text, app_name)),
Err(e) => Ok(format!("❌ Error finding text: {}", e)),
}
}
/// Execute the `vision_click_near_text` tool.
pub async fn execute_vision_click_near_text<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing vision_click_near_text tool call");
let controller = match ctx.computer_controller {
Some(c) => c,
None => {
return Ok(
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
.to_string(),
)
}
};
let app_name = tool_call
.args
.get("app_name")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?;
let text = tool_call
.args
.get("text")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
let direction = tool_call
.args
.get("direction")
.and_then(|v| v.as_str())
.unwrap_or("right");
let distance = tool_call
.args
.get("distance")
.and_then(|v| v.as_i64())
.unwrap_or(50) as i32;
match controller.find_text_in_app(app_name, text).await {
Ok(Some(location)) => {
// Calculate click position based on direction
// location.x is LEFT edge, location.y is TOP edge (in NSScreen space)
let (click_x, click_y) = match direction {
"right" => (
location.x + location.width + distance,
location.y - (location.height / 2),
),
"below" => (
location.x + (location.width / 2),
location.y - location.height - distance,
),
"left" => (location.x - distance, location.y - (location.height / 2)),
"above" => (location.x + (location.width / 2), location.y + distance),
_ => (
location.x + location.width + distance,
location.y - (location.height / 2),
),
};
debug!(
"[vision_click_near_text] Clicking {} of text at ({}, {})",
direction, click_x, click_y
);
match controller.click_at(click_x, click_y, Some(app_name)) {
Ok(_) => Ok(format!(
"✅ Clicked {} of '{}' in {} at ({}, {})",
direction, text, app_name, click_x, click_y
)),
Err(e) => Ok(format!("❌ Failed to click: {}", e)),
}
}
Ok(None) => Ok(format!("❌ Could not find '{}' in {}", text, app_name)),
Err(e) => Ok(format!("❌ Error finding text: {}", e)),
}
}
/// Execute the `extract_text_with_boxes` tool.
pub async fn execute_extract_text_with_boxes<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing extract_text_with_boxes tool call");
if !ctx.config.macax.enabled {
return Ok(
"❌ extract_text_with_boxes requires --macax flag to be enabled".to_string(),
);
}
let controller = match ctx.computer_controller {
Some(c) => c,
None => {
return Ok(
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
.to_string(),
)
}
};
let path = tool_call
.args
.get("path")
.and_then(|v| v.as_str())
.ok_or_else(|| anyhow::anyhow!("Missing path parameter"))?;
// Optional: take screenshot of app first
let final_path = if let Some(app_name) = tool_call.args.get("app_name").and_then(|v| v.as_str())
{
let temp_path = format!("/tmp/g3_extract_boxes_{}.png", uuid::Uuid::new_v4());
match controller
.take_screenshot(&temp_path, None, Some(app_name))
.await
{
Ok(_) => temp_path,
Err(e) => return Ok(format!("❌ Failed to take screenshot: {}", e)),
}
} else {
path.to_string()
};
// Extract text with locations
match controller.extract_text_with_locations(&final_path).await {
Ok(locations) => {
// Clean up temp file if we created one
if final_path != path {
let _ = std::fs::remove_file(&final_path);
}
// Return as JSON
match serde_json::to_string_pretty(&locations) {
Ok(json) => Ok(format!(
"✅ Extracted {} text elements:\n{}",
locations.len(),
json
)),
Err(e) => Ok(format!("❌ Failed to serialize results: {}", e)),
}
}
Err(e) => Ok(format!("❌ Failed to extract text: {}", e)),
}
}

View File

@@ -0,0 +1,678 @@
//! WebDriver browser automation tools.
use anyhow::Result;
use g3_computer_control::WebDriverController;
use tracing::{debug, warn};
use crate::ui_writer::UiWriter;
use crate::webdriver_session::WebDriverSession;
use crate::ToolCall;
use super::executor::ToolContext;
/// Execute the `webdriver_start` tool.
pub async fn execute_webdriver_start<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_start tool call");
let _ = tool_call; // unused
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
// Check if session already exists
let session_guard = ctx.webdriver_session.read().await;
if session_guard.is_some() {
drop(session_guard);
return Ok("✅ WebDriver session already active".to_string());
}
drop(session_guard);
// Determine which browser to use based on config
use g3_config::WebDriverBrowser;
match &ctx.config.webdriver.browser {
WebDriverBrowser::Safari => start_safari_driver(ctx).await,
WebDriverBrowser::ChromeHeadless => start_chrome_driver(ctx).await,
}
}
async fn start_safari_driver<W: UiWriter>(ctx: &ToolContext<'_, W>) -> Result<String> {
let port = ctx.config.webdriver.safari_port;
let driver_result = tokio::process::Command::new("safaridriver")
.arg("--port")
.arg(port.to_string())
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.spawn();
let mut webdriver_process = match driver_result {
Ok(process) => process,
Err(e) => {
return Ok(format!(
"❌ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.",
e
));
}
};
// Wait for safaridriver to start up
tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
// Connect to SafariDriver
match g3_computer_control::SafariDriver::with_port(port).await {
Ok(driver) => {
let session =
std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Safari(driver)));
*ctx.webdriver_session.write().await = Some(session);
*ctx.webdriver_process.write().await = Some(webdriver_process);
Ok(
"✅ WebDriver session started successfully! Safari should open automatically."
.to_string(),
)
}
Err(e) => {
let _ = webdriver_process.kill().await;
Ok(format!(
"❌ Failed to connect to SafariDriver: {}\n\n\
This might be because:\n \
- Safari Remote Automation is not enabled (run: safaridriver --enable)\n \
- Port {} is already in use\n \
- Safari failed to start\n \
- Network connectivity issue\n\n\
To enable Remote Automation:\n \
1. Run: safaridriver --enable (requires password, one-time setup)\n \
2. Or manually: Safari → Develop → Allow Remote Automation",
e, port
))
}
}
}
async fn start_chrome_driver<W: UiWriter>(ctx: &ToolContext<'_, W>) -> Result<String> {
let port = ctx.config.webdriver.chrome_port;
// Start chromedriver process
let driver_result = tokio::process::Command::new("chromedriver")
.arg(format!("--port={}", port))
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.spawn();
let mut webdriver_process = match driver_result {
Ok(process) => process,
Err(e) => {
return Ok(format!(
"❌ Failed to start chromedriver: {}\n\n\
Make sure chromedriver is installed and in your PATH.\n\n\
Install with:\n \
- macOS: brew install chromedriver\n \
- Linux: apt install chromium-chromedriver\n \
- Or download from: https://chromedriver.chromium.org/downloads",
e
));
}
};
// Wait for chromedriver to be ready with retry loop
let max_retries = 10;
let mut last_error = None;
for attempt in 0..max_retries {
// Wait before each attempt (200ms between retries, total max ~2s)
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
// Try to connect to ChromeDriver in headless mode (with optional custom binary)
let driver_result = match &ctx.config.webdriver.chrome_binary {
Some(binary) => {
g3_computer_control::ChromeDriver::with_port_headless_and_binary(port, Some(binary))
.await
}
None => g3_computer_control::ChromeDriver::with_port_headless(port).await,
};
match driver_result {
Ok(driver) => {
let session =
std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Chrome(driver)));
*ctx.webdriver_session.write().await = Some(session);
*ctx.webdriver_process.write().await = Some(webdriver_process);
return Ok(
"✅ WebDriver session started successfully! Chrome is running in headless mode (no visible window)."
.to_string(),
);
}
Err(e) => {
last_error = Some(e);
if attempt < max_retries - 1 {
continue;
}
}
}
}
// All retries failed
let _ = webdriver_process.kill().await;
let error_msg = last_error
.map(|e| e.to_string())
.unwrap_or_else(|| "Unknown error".to_string());
Ok(format!(
"❌ Failed to connect to ChromeDriver after {} attempts: {}\n\n\
This might be because:\n \
- Chrome is not installed\n \
- ChromeDriver version doesn't match Chrome version\n \
- Port {} is already in use\n\n\
Make sure Chrome and ChromeDriver are installed and compatible.",
max_retries, error_msg, port
))
}
/// Execute the `webdriver_navigate` tool.
pub async fn execute_webdriver_navigate<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_navigate tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
drop(session_guard);
let url = match tool_call.args.get("url").and_then(|v| v.as_str()) {
Some(u) => u,
None => return Ok("❌ Missing url argument".to_string()),
};
let mut driver = session.lock().await;
match driver.navigate(url).await {
Ok(_) => Ok(format!("✅ Navigated to {}", url)),
Err(e) => Ok(format!("❌ Failed to navigate: {}", e)),
}
}
/// Execute the `webdriver_get_url` tool.
pub async fn execute_webdriver_get_url<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_get_url tool call");
let _ = tool_call; // unused
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let driver = session.lock().await;
match driver.current_url().await {
Ok(url) => Ok(format!("Current URL: {}", url)),
Err(e) => Ok(format!("❌ Failed to get URL: {}", e)),
}
}
/// Execute the `webdriver_get_title` tool.
pub async fn execute_webdriver_get_title<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_get_title tool call");
let _ = tool_call; // unused
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let driver = session.lock().await;
match driver.title().await {
Ok(title) => Ok(format!("Page title: {}", title)),
Err(e) => Ok(format!("❌ Failed to get title: {}", e)),
}
}
/// Execute the `webdriver_find_element` tool.
pub async fn execute_webdriver_find_element<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_find_element tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
Some(s) => s,
None => return Ok("❌ Missing selector argument".to_string()),
};
let mut driver = session.lock().await;
match driver.find_element(selector).await {
Ok(elem) => match elem.text().await {
Ok(text) => Ok(format!("Element text: {}", text)),
Err(e) => Ok(format!("❌ Failed to get element text: {}", e)),
},
Err(e) => Ok(format!("❌ Failed to find element '{}': {}", selector, e)),
}
}
/// Execute the `webdriver_find_elements` tool.
pub async fn execute_webdriver_find_elements<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_find_elements tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
Some(s) => s,
None => return Ok("❌ Missing selector argument".to_string()),
};
let mut driver = session.lock().await;
match driver.find_elements(selector).await {
Ok(elements) => {
let mut results = Vec::new();
for (i, elem) in elements.iter().enumerate() {
match elem.text().await {
Ok(text) => results.push(format!("[{}]: {}", i, text)),
Err(_) => results.push(format!("[{}]: <error getting text>", i)),
}
}
Ok(format!(
"Found {} elements:\n{}",
results.len(),
results.join("\n")
))
}
Err(e) => Ok(format!("❌ Failed to find elements '{}': {}", selector, e)),
}
}
/// Execute the `webdriver_click` tool.
pub async fn execute_webdriver_click<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_click tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
Some(s) => s,
None => return Ok("❌ Missing selector argument".to_string()),
};
let mut driver = session.lock().await;
match driver.find_element(selector).await {
Ok(mut elem) => match elem.click().await {
Ok(_) => Ok(format!("✅ Clicked element '{}'", selector)),
Err(e) => Ok(format!("❌ Failed to click element: {}", e)),
},
Err(e) => Ok(format!("❌ Failed to find element '{}': {}", selector, e)),
}
}
/// Execute the `webdriver_send_keys` tool.
pub async fn execute_webdriver_send_keys<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_send_keys tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
Some(s) => s,
None => return Ok("❌ Missing selector argument".to_string()),
};
let text = match tool_call.args.get("text").and_then(|v| v.as_str()) {
Some(t) => t,
None => return Ok("❌ Missing text argument".to_string()),
};
let clear_first = tool_call
.args
.get("clear_first")
.and_then(|v| v.as_bool())
.unwrap_or(true);
let mut driver = session.lock().await;
match driver.find_element(selector).await {
Ok(mut elem) => {
if clear_first {
if let Err(e) = elem.clear().await {
return Ok(format!("❌ Failed to clear element: {}", e));
}
}
match elem.send_keys(text).await {
Ok(_) => Ok(format!("✅ Sent keys to element '{}'", selector)),
Err(e) => Ok(format!("❌ Failed to send keys: {}", e)),
}
}
Err(e) => Ok(format!("❌ Failed to find element '{}': {}", selector, e)),
}
}
/// Execute the `webdriver_execute_script` tool.
pub async fn execute_webdriver_execute_script<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_execute_script tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let script = match tool_call.args.get("script").and_then(|v| v.as_str()) {
Some(s) => s,
None => return Ok("❌ Missing script argument".to_string()),
};
let mut driver = session.lock().await;
match driver.execute_script(script, vec![]).await {
Ok(result) => Ok(format!("Script result: {:?}", result)),
Err(e) => Ok(format!("❌ Failed to execute script: {}", e)),
}
}
/// Execute the `webdriver_get_page_source` tool.
pub async fn execute_webdriver_get_page_source<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_get_page_source tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
// Extract optional parameters
let max_length = tool_call
.args
.get("max_length")
.and_then(|v| v.as_u64())
.map(|n| n as usize)
.unwrap_or(10000);
let save_to_file = tool_call.args.get("save_to_file").and_then(|v| v.as_str());
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let driver = session.lock().await;
match driver.page_source().await {
Ok(source) => {
// If save_to_file is specified, write to file
if let Some(file_path) = save_to_file {
let expanded_path = shellexpand::tilde(file_path);
let path_str = expanded_path.as_ref();
// Create parent directories if needed
if let Some(parent) = std::path::Path::new(path_str).parent() {
if let Err(e) = std::fs::create_dir_all(parent) {
return Ok(format!("❌ Failed to create directories: {}", e));
}
}
match std::fs::write(path_str, &source) {
Ok(_) => Ok(format!(
"✅ Page source ({} chars) saved to: {}",
source.len(),
path_str
)),
Err(e) => Ok(format!("❌ Failed to write file: {}", e)),
}
} else if max_length > 0 && source.len() > max_length {
// Truncate if max_length is set and source exceeds it
Ok(format!(
"Page source ({} chars, truncated to {}):\n{}...",
source.len(),
max_length,
&source[..max_length]
))
} else {
// Return full source
Ok(format!("Page source ({} chars):\n{}", source.len(), source))
}
}
Err(e) => Ok(format!("❌ Failed to get page source: {}", e)),
}
}
/// Execute the `webdriver_screenshot` tool.
pub async fn execute_webdriver_screenshot<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_screenshot tool call");
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let path = match tool_call.args.get("path").and_then(|v| v.as_str()) {
Some(p) => p,
None => return Ok("❌ Missing path argument".to_string()),
};
let mut driver = session.lock().await;
match driver.screenshot(path).await {
Ok(_) => Ok(format!("✅ Screenshot saved to {}", path)),
Err(e) => Ok(format!("❌ Failed to take screenshot: {}", e)),
}
}
/// Execute the `webdriver_back` tool.
pub async fn execute_webdriver_back<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_back tool call");
let _ = tool_call; // unused
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let mut driver = session.lock().await;
match driver.back().await {
Ok(_) => Ok("✅ Navigated back".to_string()),
Err(e) => Ok(format!("❌ Failed to navigate back: {}", e)),
}
}
/// Execute the `webdriver_forward` tool.
pub async fn execute_webdriver_forward<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_forward tool call");
let _ = tool_call; // unused
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let mut driver = session.lock().await;
match driver.forward().await {
Ok(_) => Ok("✅ Navigated forward".to_string()),
Err(e) => Ok(format!("❌ Failed to navigate forward: {}", e)),
}
}
/// Execute the `webdriver_refresh` tool.
pub async fn execute_webdriver_refresh<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_refresh tool call");
let _ = tool_call; // unused
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
let session_guard = ctx.webdriver_session.read().await;
let session = match session_guard.as_ref() {
Some(s) => s.clone(),
None => {
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
}
};
let mut driver = session.lock().await;
match driver.refresh().await {
Ok(_) => Ok("✅ Page refreshed".to_string()),
Err(e) => Ok(format!("❌ Failed to refresh page: {}", e)),
}
}
/// Execute the `webdriver_quit` tool.
pub async fn execute_webdriver_quit<W: UiWriter>(
tool_call: &ToolCall,
ctx: &ToolContext<'_, W>,
) -> Result<String> {
debug!("Processing webdriver_quit tool call");
let _ = tool_call; // unused
if !ctx.config.webdriver.enabled {
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
}
// Take the session
let session = match ctx.webdriver_session.write().await.take() {
Some(s) => s.clone(),
None => return Ok("❌ No active WebDriver session.".to_string()),
};
// Quit the WebDriver session
match std::sync::Arc::try_unwrap(session) {
Ok(mutex) => {
let driver = mutex.into_inner();
match driver.quit().await {
Ok(_) => {
debug!("WebDriver session closed successfully");
// Kill the safaridriver process
if let Some(mut process) = ctx.webdriver_process.write().await.take() {
if let Err(e) = process.kill().await {
warn!("Failed to kill safaridriver process: {}", e);
} else {
debug!("Safaridriver process terminated");
}
}
Ok("✅ WebDriver session closed and safaridriver stopped".to_string())
}
Err(e) => Ok(format!("❌ Failed to quit WebDriver: {}", e)),
}
}
Err(_) => Ok("❌ Cannot quit: WebDriver session is still in use".to_string()),
}
}