agent mode resumption
This commit is contained in:
@@ -10,6 +10,7 @@ pub mod session_continuation;
|
||||
pub mod streaming_parser;
|
||||
pub mod task_result;
|
||||
pub mod tool_definitions;
|
||||
pub mod tools;
|
||||
pub mod ui_writer;
|
||||
pub mod utils;
|
||||
pub mod webdriver_session;
|
||||
@@ -17,7 +18,7 @@ pub mod webdriver_session;
|
||||
pub use task_result::TaskResult;
|
||||
pub use retry::{RetryConfig, RetryResult, execute_with_retry, retry_operation};
|
||||
pub use feedback_extraction::{ExtractedFeedback, FeedbackSource, FeedbackExtractionConfig, extract_coach_feedback};
|
||||
pub use session_continuation::{SessionContinuation, load_continuation, save_continuation, clear_continuation, has_valid_continuation, get_session_dir, load_context_from_session_log};
|
||||
pub use session_continuation::{SessionContinuation, load_continuation, save_continuation, clear_continuation, has_valid_continuation, get_session_dir, load_context_from_session_log, find_incomplete_agent_session};
|
||||
|
||||
// Re-export context window types
|
||||
pub use context_window::{ContextWindow, ThinScope};
|
||||
@@ -117,9 +118,17 @@ pub struct Agent<W: UiWriter> {
|
||||
background_process_manager: std::sync::Arc<background_process::BackgroundProcessManager>,
|
||||
/// Pending images to attach to the next user message
|
||||
pending_images: Vec<g3_providers::ImageContent>,
|
||||
/// Whether this agent is running in agent mode (--agent flag)
|
||||
is_agent_mode: bool,
|
||||
/// Name of the agent if running in agent mode (e.g., "fowler", "pike")
|
||||
agent_name: Option<String>,
|
||||
}
|
||||
|
||||
impl<W: UiWriter> Agent<W> {
|
||||
/// Minimum tokens for summary requests to avoid API errors when context is nearly full.
|
||||
/// This ensures max_tokens is never 0 even when context usage is 90%+.
|
||||
const SUMMARY_MIN_TOKENS: u32 = 1000;
|
||||
|
||||
pub async fn new(config: Config, ui_writer: W) -> Result<Self> {
|
||||
Self::new_with_mode(config, ui_writer, false, false).await
|
||||
}
|
||||
@@ -418,6 +427,8 @@ impl<W: UiWriter> Agent<W> {
|
||||
paths::get_logs_dir().join("background_processes")
|
||||
)),
|
||||
pending_images: Vec::new(),
|
||||
is_agent_mode: false,
|
||||
agent_name: None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -606,6 +617,9 @@ impl<W: UiWriter> Agent<W> {
|
||||
/// Calculate max_tokens for a summary request, ensuring it satisfies the thinking constraint.
|
||||
/// Applies fallback sequence: thinnify -> skinnify -> hard-coded minimum
|
||||
/// Returns (max_tokens, whether_fallback_was_used)
|
||||
///
|
||||
/// IMPORTANT: Always returns at least SUMMARY_MIN_TOKENS to avoid API errors
|
||||
/// when context is nearly full (90%+).
|
||||
fn calculate_summary_max_tokens(
|
||||
&mut self,
|
||||
provider_name: &str,
|
||||
@@ -621,7 +635,10 @@ impl<W: UiWriter> Agent<W> {
|
||||
let available = model_limit
|
||||
.saturating_sub(current_usage)
|
||||
.saturating_sub(buffer);
|
||||
// Use the smaller of available tokens or configured max_tokens,
|
||||
// Ensure we have at least a minimum floor for summary requests
|
||||
// This prevents max_tokens=0 errors when context is 90%+ full
|
||||
let available = available.max(Self::SUMMARY_MIN_TOKENS);
|
||||
// Use the smaller of available tokens (with floor) or configured max_tokens,
|
||||
// but ensure we don't go below thinking budget floor for Anthropic
|
||||
let proposed_max_tokens = available.min(configured_max_tokens);
|
||||
let proposed_max_tokens = if provider_name == "anthropic" {
|
||||
@@ -1554,6 +1571,9 @@ impl<W: UiWriter> Agent<W> {
|
||||
_ => summary_max_tokens.min(5000),
|
||||
};
|
||||
|
||||
// Ensure minimum floor as defense-in-depth (primary protection is in calculate_summary_max_tokens)
|
||||
summary_max_tokens = summary_max_tokens.max(Self::SUMMARY_MIN_TOKENS);
|
||||
|
||||
debug!(
|
||||
"Requesting summary with max_tokens: {} (current usage: {} tokens)",
|
||||
summary_max_tokens, self.context_window.used_tokens
|
||||
@@ -1912,6 +1932,8 @@ impl<W: UiWriter> Agent<W> {
|
||||
.unwrap_or_else(|_| ".".to_string());
|
||||
|
||||
let continuation = SessionContinuation::new(
|
||||
self.is_agent_mode,
|
||||
self.agent_name.clone(),
|
||||
session_id,
|
||||
final_output_summary,
|
||||
session_log_path.to_string_lossy().to_string(),
|
||||
@@ -1927,6 +1949,14 @@ impl<W: UiWriter> Agent<W> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set agent mode information for session tracking
|
||||
/// Called when running with --agent flag to enable agent-specific session resume
|
||||
pub fn set_agent_mode(&mut self, agent_name: &str) {
|
||||
self.is_agent_mode = true;
|
||||
self.agent_name = Some(agent_name.to_string());
|
||||
debug!("Agent mode enabled for agent: {}", agent_name);
|
||||
}
|
||||
|
||||
/// Clear session state and continuation artifacts (for /clear command)
|
||||
pub fn clear_session(&mut self) {
|
||||
use crate::session_continuation::clear_continuation;
|
||||
@@ -2158,6 +2188,9 @@ impl<W: UiWriter> Agent<W> {
|
||||
_ => summary_max_tokens.min(5000),
|
||||
};
|
||||
|
||||
// Ensure minimum floor as defense-in-depth (primary protection is in calculate_summary_max_tokens)
|
||||
summary_max_tokens = summary_max_tokens.max(Self::SUMMARY_MIN_TOKENS);
|
||||
|
||||
debug!(
|
||||
"Requesting summary with max_tokens: {} (current usage: {} tokens)",
|
||||
summary_max_tokens, self.context_window.used_tokens
|
||||
|
||||
@@ -24,6 +24,10 @@ const CONTINUATION_FILENAME: &str = "latest.json";
|
||||
pub struct SessionContinuation {
|
||||
/// Version of the continuation format
|
||||
pub version: String,
|
||||
/// Whether this session was running in agent mode
|
||||
pub is_agent_mode: bool,
|
||||
/// Name of the agent (e.g., "fowler", "pike") if in agent mode
|
||||
pub agent_name: Option<String>,
|
||||
/// Timestamp when the continuation was saved
|
||||
pub created_at: String,
|
||||
/// Original session ID
|
||||
@@ -43,6 +47,8 @@ pub struct SessionContinuation {
|
||||
impl SessionContinuation {
|
||||
/// Create a new session continuation artifact
|
||||
pub fn new(
|
||||
is_agent_mode: bool,
|
||||
agent_name: Option<String>,
|
||||
session_id: String,
|
||||
final_output_summary: Option<String>,
|
||||
session_log_path: String,
|
||||
@@ -52,6 +58,8 @@ impl SessionContinuation {
|
||||
) -> Self {
|
||||
Self {
|
||||
version: CONTINUATION_VERSION.to_string(),
|
||||
is_agent_mode,
|
||||
agent_name,
|
||||
created_at: chrono::Utc::now().to_rfc3339(),
|
||||
session_id,
|
||||
final_output_summary,
|
||||
@@ -66,6 +74,14 @@ impl SessionContinuation {
|
||||
pub fn can_restore_full_context(&self) -> bool {
|
||||
self.context_percentage < 80.0
|
||||
}
|
||||
|
||||
/// Check if this session has incomplete TODO items
|
||||
pub fn has_incomplete_todos(&self) -> bool {
|
||||
match &self.todo_snapshot {
|
||||
Some(todo) => todo.contains("- [ ]"),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the path to the .g3 directory
|
||||
@@ -272,6 +288,76 @@ pub fn load_context_from_session_log(session_log_path: &Path) -> Result<Option<s
|
||||
Ok(Some(session_data))
|
||||
}
|
||||
|
||||
/// Find an incomplete agent session for the given agent name.
|
||||
/// Returns the most recent session that:
|
||||
/// 1. Was running in agent mode with the matching agent name
|
||||
/// 2. Has incomplete TODO items (contains "- [ ]")
|
||||
/// 3. Is in the same working directory
|
||||
pub fn find_incomplete_agent_session(agent_name: &str) -> Result<Option<SessionContinuation>> {
|
||||
let sessions_dir = get_sessions_dir();
|
||||
|
||||
if !sessions_dir.exists() {
|
||||
debug!("Sessions directory does not exist: {:?}", sessions_dir);
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let current_dir = std::env::current_dir()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
let mut candidates: Vec<SessionContinuation> = Vec::new();
|
||||
|
||||
// Scan all session directories
|
||||
for entry in std::fs::read_dir(&sessions_dir)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if !path.is_dir() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for latest.json in this session directory
|
||||
let latest_path = path.join(CONTINUATION_FILENAME);
|
||||
if !latest_path.exists() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Try to load the continuation
|
||||
let json = match std::fs::read_to_string(&latest_path) {
|
||||
Ok(j) => j,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let continuation: SessionContinuation = match serde_json::from_str(&json) {
|
||||
Ok(c) => c,
|
||||
Err(_) => continue, // Skip sessions with old format
|
||||
};
|
||||
|
||||
// Check if this is an agent mode session with matching name
|
||||
if !continuation.is_agent_mode {
|
||||
continue;
|
||||
}
|
||||
|
||||
if continuation.agent_name.as_deref() != Some(agent_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if in same working directory
|
||||
if continuation.working_directory != current_dir {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if has incomplete TODOs
|
||||
if continuation.has_incomplete_todos() {
|
||||
candidates.push(continuation);
|
||||
}
|
||||
}
|
||||
|
||||
// Sort by created_at descending and return the most recent
|
||||
candidates.sort_by(|a, b| b.created_at.cmp(&a.created_at));
|
||||
Ok(candidates.into_iter().next())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -279,6 +365,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_session_continuation_creation() {
|
||||
let continuation = SessionContinuation::new(
|
||||
false,
|
||||
None,
|
||||
"test_session_123".to_string(),
|
||||
Some("Task completed successfully".to_string()),
|
||||
"/path/to/session.json".to_string(),
|
||||
@@ -295,6 +383,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_can_restore_full_context() {
|
||||
let mut continuation = SessionContinuation::new(
|
||||
false,
|
||||
None,
|
||||
"test".to_string(),
|
||||
None,
|
||||
"path".to_string(),
|
||||
@@ -311,4 +401,26 @@ mod tests {
|
||||
continuation.context_percentage = 95.0;
|
||||
assert!(!continuation.can_restore_full_context()); // 95% >= 80%
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_todos() {
|
||||
let mut continuation = SessionContinuation::new(
|
||||
true,
|
||||
Some("fowler".to_string()),
|
||||
"test".to_string(),
|
||||
None,
|
||||
"path".to_string(),
|
||||
50.0,
|
||||
Some("- [x] Done\n- [ ] Not done".to_string()),
|
||||
".".to_string(),
|
||||
);
|
||||
|
||||
assert!(continuation.has_incomplete_todos());
|
||||
|
||||
continuation.todo_snapshot = Some("- [x] All done".to_string());
|
||||
assert!(!continuation.has_incomplete_todos());
|
||||
|
||||
continuation.todo_snapshot = None;
|
||||
assert!(!continuation.has_incomplete_todos());
|
||||
}
|
||||
}
|
||||
|
||||
54
crates/g3-core/src/tools/executor.rs
Normal file
54
crates/g3-core/src/tools/executor.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
//! Tool executor trait and context for tool execution.
|
||||
|
||||
use anyhow::Result;
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::background_process::BackgroundProcessManager;
|
||||
use crate::paths::{ensure_session_dir, get_session_todo_path, get_todo_path};
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::webdriver_session::WebDriverSession;
|
||||
use crate::ToolCall;
|
||||
use g3_config::Config;
|
||||
|
||||
/// Context passed to tool executors containing shared state.
|
||||
pub struct ToolContext<'a, W: UiWriter> {
|
||||
pub config: &'a Config,
|
||||
pub ui_writer: &'a W,
|
||||
pub session_id: Option<&'a str>,
|
||||
pub working_dir: Option<&'a str>,
|
||||
pub computer_controller: Option<&'a Box<dyn g3_computer_control::ComputerController>>,
|
||||
pub webdriver_session: &'a Arc<RwLock<Option<Arc<tokio::sync::Mutex<WebDriverSession>>>>>,
|
||||
pub webdriver_process: &'a Arc<RwLock<Option<tokio::process::Child>>>,
|
||||
pub macax_controller: &'a Arc<RwLock<Option<g3_computer_control::MacAxController>>>,
|
||||
pub background_process_manager: &'a Arc<BackgroundProcessManager>,
|
||||
pub todo_content: &'a Arc<RwLock<String>>,
|
||||
pub pending_images: &'a mut Vec<g3_providers::ImageContent>,
|
||||
pub is_autonomous: bool,
|
||||
pub requirements_sha: Option<&'a str>,
|
||||
}
|
||||
|
||||
impl<'a, W: UiWriter> ToolContext<'a, W> {
|
||||
/// Get the path to the TODO file (session-scoped or workspace).
|
||||
pub fn get_todo_path(&self) -> std::path::PathBuf {
|
||||
if let Some(session_id) = self.session_id {
|
||||
let _ = ensure_session_dir(session_id);
|
||||
get_session_todo_path(session_id)
|
||||
} else {
|
||||
get_todo_path()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for tool executors.
|
||||
/// Each tool category implements this trait.
|
||||
pub trait ToolExecutor<W: UiWriter> {
|
||||
/// Execute a tool call and return the result.
|
||||
/// Returns None if this executor doesn't handle the given tool.
|
||||
fn execute<'a>(
|
||||
tool_call: &'a ToolCall,
|
||||
ctx: &'a mut ToolContext<'_, W>,
|
||||
) -> impl std::future::Future<Output = Option<Result<String>>> + Send + 'a
|
||||
where
|
||||
W: 'a;
|
||||
}
|
||||
510
crates/g3-core/src/tools/file_ops.rs
Normal file
510
crates/g3-core/src/tools/file_ops.rs
Normal file
@@ -0,0 +1,510 @@
|
||||
//! File operation tools: read_file, write_file, str_replace, read_image.
|
||||
|
||||
use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::utils::apply_unified_diff_to_string;
|
||||
use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Execute the `read_file` tool.
|
||||
pub async fn execute_read_file<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing read_file tool call");
|
||||
|
||||
let file_path = match tool_call.args.get("file_path").and_then(|v| v.as_str()) {
|
||||
Some(p) => p,
|
||||
None => return Ok("❌ Missing file_path argument".to_string()),
|
||||
};
|
||||
|
||||
// Expand tilde (~) to home directory
|
||||
let expanded_path = shellexpand::tilde(file_path);
|
||||
let path_str = expanded_path.as_ref();
|
||||
|
||||
// Check if this is an image file
|
||||
let is_image = path_str.to_lowercase().ends_with(".png")
|
||||
|| path_str.to_lowercase().ends_with(".jpg")
|
||||
|| path_str.to_lowercase().ends_with(".jpeg")
|
||||
|| path_str.to_lowercase().ends_with(".gif")
|
||||
|| path_str.to_lowercase().ends_with(".bmp")
|
||||
|| path_str.to_lowercase().ends_with(".tiff")
|
||||
|| path_str.to_lowercase().ends_with(".tif")
|
||||
|| path_str.to_lowercase().ends_with(".webp");
|
||||
|
||||
// If it's an image file, use OCR via extract_text
|
||||
if is_image {
|
||||
if let Some(controller) = ctx.computer_controller {
|
||||
match controller.extract_text_from_image(path_str).await {
|
||||
Ok(text) => {
|
||||
return Ok(format!("📄 Image file (OCR extracted):\n{}", text));
|
||||
}
|
||||
Err(e) => {
|
||||
return Ok(format!(
|
||||
"❌ Failed to extract text from image '{}': {}",
|
||||
path_str, e
|
||||
));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Ok("❌ Computer control not enabled. Cannot perform OCR on image files. Set computer_control.enabled = true in config.".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Extract optional start and end positions
|
||||
let start_char = tool_call
|
||||
.args
|
||||
.get("start")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize);
|
||||
let end_char = tool_call
|
||||
.args
|
||||
.get("end")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize);
|
||||
|
||||
debug!(
|
||||
"Reading file: {}, start={:?}, end={:?}",
|
||||
path_str, start_char, end_char
|
||||
);
|
||||
|
||||
match std::fs::read_to_string(path_str) {
|
||||
Ok(content) => {
|
||||
// Validate and apply range if specified
|
||||
let start = start_char.unwrap_or(0);
|
||||
let end = end_char.unwrap_or(content.len());
|
||||
|
||||
// Validation
|
||||
if start > content.len() {
|
||||
return Ok(format!(
|
||||
"❌ Start position {} exceeds file length {}",
|
||||
start,
|
||||
content.len()
|
||||
));
|
||||
}
|
||||
if end > content.len() {
|
||||
return Ok(format!(
|
||||
"❌ End position {} exceeds file length {}",
|
||||
end,
|
||||
content.len()
|
||||
));
|
||||
}
|
||||
if start > end {
|
||||
return Ok(format!(
|
||||
"❌ Start position {} is greater than end position {}",
|
||||
start, end
|
||||
));
|
||||
}
|
||||
|
||||
// Extract the requested portion, ensuring we're at char boundaries
|
||||
let start_boundary = if start == 0 {
|
||||
0
|
||||
} else {
|
||||
content
|
||||
.char_indices()
|
||||
.find(|(i, _)| *i >= start)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(start)
|
||||
};
|
||||
let end_boundary = content
|
||||
.char_indices()
|
||||
.find(|(i, _)| *i >= end)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(content.len());
|
||||
|
||||
let partial_content = &content[start_boundary..end_boundary];
|
||||
let line_count = partial_content.lines().count();
|
||||
let total_lines = content.lines().count();
|
||||
|
||||
// Format output with range info if partial
|
||||
if start_char.is_some() || end_char.is_some() {
|
||||
Ok(format!(
|
||||
"📄 File content (chars {}-{}, {} lines of {} total):\n{}",
|
||||
start_boundary, end_boundary, line_count, total_lines, partial_content
|
||||
))
|
||||
} else {
|
||||
Ok(format!("📄 File content ({} lines):\n{}", line_count, content))
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to read file '{}': {}", path_str, e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `read_image` tool.
|
||||
pub async fn execute_read_image<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &mut ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing read_image tool call");
|
||||
|
||||
// Get paths from file_paths array
|
||||
let mut paths: Vec<String> = Vec::new();
|
||||
|
||||
if let Some(file_paths) = tool_call.args.get("file_paths") {
|
||||
if let Some(arr) = file_paths.as_array() {
|
||||
for p in arr {
|
||||
if let Some(s) = p.as_str() {
|
||||
paths.push(s.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if paths.is_empty() {
|
||||
return Ok("❌ Missing or empty file_paths argument".to_string());
|
||||
}
|
||||
|
||||
let mut results: Vec<String> = Vec::new();
|
||||
let mut success_count = 0;
|
||||
|
||||
// Print └─ and newline before images to break out of tool output box
|
||||
println!("└─\n");
|
||||
|
||||
for path_str in &paths {
|
||||
// Expand tilde (~) to home directory
|
||||
let expanded_path = shellexpand::tilde(path_str);
|
||||
let path = std::path::Path::new(expanded_path.as_ref());
|
||||
|
||||
// Check file exists
|
||||
if !path.exists() {
|
||||
results.push(format!("❌ Image file not found: {}", path_str));
|
||||
continue;
|
||||
}
|
||||
|
||||
// Read the file first, then detect format from magic bytes
|
||||
match std::fs::read(path) {
|
||||
Ok(bytes) => {
|
||||
// Detect media type from magic bytes (file signature)
|
||||
let media_type = match g3_providers::ImageContent::media_type_from_bytes(&bytes) {
|
||||
Some(mt) => mt,
|
||||
None => {
|
||||
// Fall back to extension-based detection
|
||||
let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
|
||||
match g3_providers::ImageContent::media_type_from_extension(ext) {
|
||||
Some(mt) => mt,
|
||||
None => {
|
||||
results.push(format!(
|
||||
"❌ {}: Unsupported or unrecognized image format",
|
||||
path_str
|
||||
));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let file_size = bytes.len();
|
||||
|
||||
// Try to get image dimensions
|
||||
let dimensions = get_image_dimensions(&bytes, media_type);
|
||||
|
||||
// Build info string
|
||||
let dim_str = dimensions
|
||||
.map(|(w, h)| format!("{}x{}", w, h))
|
||||
.unwrap_or_else(|| "unknown".to_string());
|
||||
|
||||
let size_str = if file_size >= 1024 * 1024 {
|
||||
format!("{:.1} MB", file_size as f64 / (1024.0 * 1024.0))
|
||||
} else if file_size >= 1024 {
|
||||
format!("{:.1} KB", file_size as f64 / 1024.0)
|
||||
} else {
|
||||
format!("{} bytes", file_size)
|
||||
};
|
||||
|
||||
// Output imgcat inline image to terminal (height constrained)
|
||||
print_imgcat(&bytes, path_str, &dim_str, media_type, &size_str, 5);
|
||||
|
||||
// Store the image to be attached to the next user message
|
||||
use base64::Engine;
|
||||
let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
|
||||
let image = g3_providers::ImageContent::new(media_type, encoded);
|
||||
ctx.pending_images.push(image);
|
||||
|
||||
success_count += 1;
|
||||
}
|
||||
Err(e) => {
|
||||
results.push(format!("❌ Failed to read '{}': {}", path_str, e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Print ┌─ to resume tool output box
|
||||
print!("┌─\n");
|
||||
|
||||
let summary = if success_count == paths.len() {
|
||||
format!("{} image(s) read.", success_count)
|
||||
} else {
|
||||
format!("{}/{} image(s) read.", success_count, paths.len())
|
||||
};
|
||||
|
||||
// Only include error results if there are any
|
||||
if results.is_empty() {
|
||||
Ok(summary)
|
||||
} else {
|
||||
Ok(format!("{}\n{}", results.join("\n"), summary))
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `write_file` tool.
|
||||
pub async fn execute_write_file<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
_ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing write_file tool call");
|
||||
debug!("Raw tool_call.args: {:?}", tool_call.args);
|
||||
|
||||
// Try multiple argument formats that different providers might use
|
||||
let (path_str, content_str) = extract_path_and_content(&tool_call.args);
|
||||
|
||||
debug!(
|
||||
"Final extracted values: path_str={:?}, content_str_len={:?}",
|
||||
path_str,
|
||||
content_str.map(|c| c.len())
|
||||
);
|
||||
|
||||
if let (Some(path), Some(content)) = (path_str, content_str) {
|
||||
// Expand tilde (~) to home directory
|
||||
let expanded_path = shellexpand::tilde(path);
|
||||
let path = expanded_path.as_ref();
|
||||
|
||||
debug!("Writing to file: {}", path);
|
||||
|
||||
// Create parent directories if they don't exist
|
||||
if let Some(parent) = std::path::Path::new(path).parent() {
|
||||
if let Err(e) = std::fs::create_dir_all(parent) {
|
||||
return Ok(format!(
|
||||
"❌ Failed to create parent directories for '{}': {}",
|
||||
path, e
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
match std::fs::write(path, content) {
|
||||
Ok(()) => {
|
||||
let line_count = content.lines().count();
|
||||
let char_count = content.len();
|
||||
Ok(format!(
|
||||
"✅ Successfully wrote {} lines ({} characters)",
|
||||
line_count, char_count
|
||||
))
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to write to file '{}': {}", path, e)),
|
||||
}
|
||||
} else {
|
||||
// Provide more detailed error information
|
||||
let available_keys = if let Some(obj) = tool_call.args.as_object() {
|
||||
obj.keys().collect::<Vec<_>>()
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
|
||||
Ok(format!(
|
||||
"❌ Missing file_path or content argument. Available keys: {:?}. Expected formats: {{\"file_path\": \"...\", \"content\": \"...\"}}, {{\"path\": \"...\", \"content\": \"...\"}}, {{\"filename\": \"...\", \"text\": \"...\"}}, or {{\"file\": \"...\", \"data\": \"...\"}}",
|
||||
available_keys
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `str_replace` tool.
|
||||
pub async fn execute_str_replace<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
_ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing str_replace tool call");
|
||||
|
||||
let args_obj = match tool_call.args.as_object() {
|
||||
Some(obj) => obj,
|
||||
None => return Ok("❌ Invalid arguments: expected object".to_string()),
|
||||
};
|
||||
|
||||
let file_path = match args_obj.get("file_path").and_then(|v| v.as_str()) {
|
||||
Some(path) => {
|
||||
let expanded_path = shellexpand::tilde(path);
|
||||
expanded_path.into_owned()
|
||||
}
|
||||
None => return Ok("❌ Missing or invalid file_path argument".to_string()),
|
||||
};
|
||||
|
||||
let diff = match args_obj.get("diff").and_then(|v| v.as_str()) {
|
||||
Some(d) => d,
|
||||
None => return Ok("❌ Missing or invalid diff argument".to_string()),
|
||||
};
|
||||
|
||||
// Optional start and end character positions (0-indexed, end is EXCLUSIVE)
|
||||
let start_char = args_obj
|
||||
.get("start")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize);
|
||||
let end_char = args_obj
|
||||
.get("end")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize);
|
||||
|
||||
debug!(
|
||||
"str_replace: path={}, start={:?}, end={:?}",
|
||||
file_path, start_char, end_char
|
||||
);
|
||||
|
||||
// Read the existing file
|
||||
let file_content = match std::fs::read_to_string(&file_path) {
|
||||
Ok(content) => content,
|
||||
Err(e) => return Ok(format!("❌ Failed to read file '{}': {}", file_path, e)),
|
||||
};
|
||||
|
||||
// Apply unified diff to content
|
||||
let result = match apply_unified_diff_to_string(&file_content, diff, start_char, end_char) {
|
||||
Ok(r) => r,
|
||||
Err(e) => return Ok(format!("❌ {}", e)),
|
||||
};
|
||||
|
||||
// Write the result back to the file
|
||||
match std::fs::write(&file_path, &result) {
|
||||
Ok(()) => Ok("✅ applied unified diff".to_string()),
|
||||
Err(e) => Ok(format!("❌ Failed to write to file '{}': {}", file_path, e)),
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
/// Extract path and content from various argument formats.
|
||||
fn extract_path_and_content(args: &serde_json::Value) -> (Option<&str>, Option<&str>) {
|
||||
if let Some(args_obj) = args.as_object() {
|
||||
// Format 1: Standard format with file_path and content
|
||||
if let (Some(path_val), Some(content_val)) =
|
||||
(args_obj.get("file_path"), args_obj.get("content"))
|
||||
{
|
||||
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
|
||||
return (Some(path), Some(content));
|
||||
}
|
||||
}
|
||||
// Format 2: Anthropic-style with path and content
|
||||
if let (Some(path_val), Some(content_val)) =
|
||||
(args_obj.get("path"), args_obj.get("content"))
|
||||
{
|
||||
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
|
||||
return (Some(path), Some(content));
|
||||
}
|
||||
}
|
||||
// Format 3: Alternative naming with filename and text
|
||||
if let (Some(path_val), Some(content_val)) =
|
||||
(args_obj.get("filename"), args_obj.get("text"))
|
||||
{
|
||||
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
|
||||
return (Some(path), Some(content));
|
||||
}
|
||||
}
|
||||
// Format 4: Alternative naming with file and data
|
||||
if let (Some(path_val), Some(content_val)) = (args_obj.get("file"), args_obj.get("data")) {
|
||||
if let (Some(path), Some(content)) = (path_val.as_str(), content_val.as_str()) {
|
||||
return (Some(path), Some(content));
|
||||
}
|
||||
}
|
||||
} else if let Some(args_array) = args.as_array() {
|
||||
// Format 5: Args might be an array [path, content]
|
||||
if args_array.len() >= 2 {
|
||||
if let (Some(path), Some(content)) = (args_array[0].as_str(), args_array[1].as_str()) {
|
||||
return (Some(path), Some(content));
|
||||
}
|
||||
}
|
||||
}
|
||||
(None, None)
|
||||
}
|
||||
|
||||
/// Get image dimensions from raw bytes.
|
||||
pub fn get_image_dimensions(bytes: &[u8], media_type: &str) -> Option<(u32, u32)> {
|
||||
match media_type {
|
||||
"image/png" => {
|
||||
// PNG: width at bytes 16-19, height at bytes 20-23 (big-endian)
|
||||
if bytes.len() >= 24 {
|
||||
let width = u32::from_be_bytes([bytes[16], bytes[17], bytes[18], bytes[19]]);
|
||||
let height = u32::from_be_bytes([bytes[20], bytes[21], bytes[22], bytes[23]]);
|
||||
Some((width, height))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
"image/jpeg" => {
|
||||
// JPEG: Need to find SOF0/SOF2 marker (FF C0 or FF C2)
|
||||
let mut i = 2; // Skip FF D8
|
||||
while i + 8 < bytes.len() {
|
||||
if bytes[i] == 0xFF {
|
||||
let marker = bytes[i + 1];
|
||||
// SOF0, SOF1, SOF2 markers contain dimensions
|
||||
if marker == 0xC0 || marker == 0xC1 || marker == 0xC2 {
|
||||
let height = u16::from_be_bytes([bytes[i + 5], bytes[i + 6]]) as u32;
|
||||
let width = u16::from_be_bytes([bytes[i + 7], bytes[i + 8]]) as u32;
|
||||
return Some((width, height));
|
||||
}
|
||||
// Skip to next marker
|
||||
if marker == 0xD8
|
||||
|| marker == 0xD9
|
||||
|| marker == 0x01
|
||||
|| (0xD0..=0xD7).contains(&marker)
|
||||
{
|
||||
i += 2;
|
||||
} else {
|
||||
let len = u16::from_be_bytes([bytes[i + 2], bytes[i + 3]]) as usize;
|
||||
i += 2 + len;
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
"image/gif" => {
|
||||
// GIF: width at bytes 6-7, height at bytes 8-9 (little-endian)
|
||||
if bytes.len() >= 10 {
|
||||
let width = u16::from_le_bytes([bytes[6], bytes[7]]) as u32;
|
||||
let height = u16::from_le_bytes([bytes[8], bytes[9]]) as u32;
|
||||
Some((width, height))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
"image/webp" => {
|
||||
// WebP VP8: dimensions at specific offsets (simplified)
|
||||
if bytes.len() >= 30 && &bytes[12..16] == b"VP8 " {
|
||||
let width = (u16::from_le_bytes([bytes[26], bytes[27]]) & 0x3FFF) as u32;
|
||||
let height = (u16::from_le_bytes([bytes[28], bytes[29]]) & 0x3FFF) as u32;
|
||||
Some((width, height))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Print image using iTerm2 imgcat protocol with info line.
|
||||
pub fn print_imgcat(
|
||||
bytes: &[u8],
|
||||
name: &str,
|
||||
dimensions: &str,
|
||||
media_type: &str,
|
||||
size: &str,
|
||||
max_height: u32,
|
||||
) {
|
||||
use base64::Engine;
|
||||
let encoded = base64::engine::general_purpose::STANDARD.encode(bytes);
|
||||
// Extract just the filename from the path
|
||||
let filename = std::path::Path::new(name)
|
||||
.file_name()
|
||||
.and_then(|f| f.to_str())
|
||||
.unwrap_or(name);
|
||||
// iTerm2 inline image protocol (single space prefix)
|
||||
print!(
|
||||
" \x1b]1337;File=inline=1;height={};name={}:{}\x07\n",
|
||||
max_height, name, encoded
|
||||
);
|
||||
// Print dimmed info line with filename only (no │ prefix)
|
||||
println!(
|
||||
" \x1b[2m{} | {} | {} | {}\x1b[0m",
|
||||
filename, dimensions, media_type, size
|
||||
);
|
||||
// Blank line before next image (no │ prefix)
|
||||
println!();
|
||||
}
|
||||
178
crates/g3-core/src/tools/macax.rs
Normal file
178
crates/g3-core/src/tools/macax.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
//! macOS Accessibility API tools.
|
||||
|
||||
use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Execute the `macax_list_apps` tool.
|
||||
pub async fn execute_macax_list_apps<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing macax_list_apps tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.macax.enabled {
|
||||
return Ok(
|
||||
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let controller_guard = ctx.macax_controller.read().await;
|
||||
let controller = match controller_guard.as_ref() {
|
||||
Some(c) => c,
|
||||
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
|
||||
};
|
||||
|
||||
match controller.list_applications() {
|
||||
Ok(apps) => {
|
||||
let app_list: Vec<String> = apps.iter().map(|a| a.name.clone()).collect();
|
||||
Ok(format!("Running applications:\n{}", app_list.join("\n")))
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to list applications: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `macax_get_frontmost_app` tool.
|
||||
pub async fn execute_macax_get_frontmost_app<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing macax_get_frontmost_app tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.macax.enabled {
|
||||
return Ok(
|
||||
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let controller_guard = ctx.macax_controller.read().await;
|
||||
let controller = match controller_guard.as_ref() {
|
||||
Some(c) => c,
|
||||
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
|
||||
};
|
||||
|
||||
match controller.get_frontmost_app() {
|
||||
Ok(app) => Ok(format!("Frontmost application: {}", app.name)),
|
||||
Err(e) => Ok(format!("❌ Failed to get frontmost app: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `macax_activate_app` tool.
|
||||
pub async fn execute_macax_activate_app<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing macax_activate_app tool call");
|
||||
|
||||
if !ctx.config.macax.enabled {
|
||||
return Ok(
|
||||
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) {
|
||||
Some(n) => n,
|
||||
None => return Ok("❌ Missing app_name argument".to_string()),
|
||||
};
|
||||
|
||||
let controller_guard = ctx.macax_controller.read().await;
|
||||
let controller = match controller_guard.as_ref() {
|
||||
Some(c) => c,
|
||||
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
|
||||
};
|
||||
|
||||
match controller.activate_app(app_name) {
|
||||
Ok(_) => Ok(format!("✅ Activated application: {}", app_name)),
|
||||
Err(e) => Ok(format!("❌ Failed to activate app: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `macax_press_key` tool.
|
||||
pub async fn execute_macax_press_key<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing macax_press_key tool call");
|
||||
|
||||
if !ctx.config.macax.enabled {
|
||||
return Ok(
|
||||
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) {
|
||||
Some(n) => n,
|
||||
None => return Ok("❌ Missing app_name argument".to_string()),
|
||||
};
|
||||
|
||||
let key = match tool_call.args.get("key").and_then(|v| v.as_str()) {
|
||||
Some(k) => k,
|
||||
None => return Ok("❌ Missing key argument".to_string()),
|
||||
};
|
||||
|
||||
let modifiers_vec: Vec<&str> = tool_call
|
||||
.args
|
||||
.get("modifiers")
|
||||
.and_then(|v| v.as_array())
|
||||
.map(|arr| arr.iter().filter_map(|v| v.as_str()).collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
let controller_guard = ctx.macax_controller.read().await;
|
||||
let controller = match controller_guard.as_ref() {
|
||||
Some(c) => c,
|
||||
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
|
||||
};
|
||||
|
||||
match controller.press_key(app_name, key, modifiers_vec.clone()) {
|
||||
Ok(_) => {
|
||||
let modifier_str = if modifiers_vec.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" with modifiers: {}", modifiers_vec.join("+"))
|
||||
};
|
||||
Ok(format!("✅ Pressed key: {}{}", key, modifier_str))
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to press key: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `macax_type_text` tool.
|
||||
pub async fn execute_macax_type_text<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing macax_type_text tool call");
|
||||
|
||||
if !ctx.config.macax.enabled {
|
||||
return Ok(
|
||||
"❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) {
|
||||
Some(n) => n,
|
||||
None => return Ok("❌ Missing app_name argument".to_string()),
|
||||
};
|
||||
|
||||
let text = match tool_call.args.get("text").and_then(|v| v.as_str()) {
|
||||
Some(t) => t,
|
||||
None => return Ok("❌ Missing text argument".to_string()),
|
||||
};
|
||||
|
||||
let controller_guard = ctx.macax_controller.read().await;
|
||||
let controller = match controller_guard.as_ref() {
|
||||
Some(c) => c,
|
||||
None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()),
|
||||
};
|
||||
|
||||
match controller.type_text(app_name, text) {
|
||||
Ok(_) => Ok(format!("✅ Typed text into {}", app_name)),
|
||||
Err(e) => Ok(format!("❌ Failed to type text: {}", e)),
|
||||
}
|
||||
}
|
||||
225
crates/g3-core/src/tools/misc.rs
Normal file
225
crates/g3-core/src/tools/misc.rs
Normal file
@@ -0,0 +1,225 @@
|
||||
//! Miscellaneous tools: final_output, take_screenshot, extract_text, code_coverage, code_search.
|
||||
|
||||
use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Execute the `final_output` tool.
|
||||
pub async fn execute_final_output<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing final_output tool call");
|
||||
|
||||
let summary_str = tool_call.args.get("summary").and_then(|v| v.as_str());
|
||||
|
||||
// In autonomous mode, check for incomplete TODO items before allowing completion
|
||||
if ctx.is_autonomous {
|
||||
let todo_content = ctx.todo_content.read().await;
|
||||
let has_incomplete_todos = todo_content
|
||||
.lines()
|
||||
.any(|line| line.trim().starts_with("- [ ]"));
|
||||
drop(todo_content);
|
||||
|
||||
if has_incomplete_todos {
|
||||
return Ok(
|
||||
"There are still incomplete TODO items. Please continue until \
|
||||
*ALL* TODO items in *ALL* phases are marked complete, and \
|
||||
*ONLY* then call `final_output`."
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Return the summary or a default message
|
||||
// Note: Session continuation saving is handled by the caller (Agent)
|
||||
if let Some(summary) = summary_str {
|
||||
Ok(summary.to_string())
|
||||
} else {
|
||||
Ok("✅ Turn completed".to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `take_screenshot` tool.
|
||||
pub async fn execute_take_screenshot<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing take_screenshot tool call");
|
||||
|
||||
let controller = match ctx.computer_controller {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Ok(
|
||||
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let path = tool_call
|
||||
.args
|
||||
.get("path")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing path argument"))?;
|
||||
|
||||
// Extract window_id (app name) - REQUIRED
|
||||
let window_id = tool_call
|
||||
.args
|
||||
.get("window_id")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"Missing window_id argument. You must specify which window to capture \
|
||||
(e.g., 'Safari', 'Terminal', 'Google Chrome')."
|
||||
)
|
||||
})?;
|
||||
|
||||
// Extract region if provided
|
||||
let region = tool_call
|
||||
.args
|
||||
.get("region")
|
||||
.and_then(|v| v.as_object())
|
||||
.map(|region_obj| g3_computer_control::types::Rect {
|
||||
x: region_obj.get("x").and_then(|v| v.as_i64()).unwrap_or(0) as i32,
|
||||
y: region_obj.get("y").and_then(|v| v.as_i64()).unwrap_or(0) as i32,
|
||||
width: region_obj
|
||||
.get("width")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(0) as i32,
|
||||
height: region_obj
|
||||
.get("height")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(0) as i32,
|
||||
});
|
||||
|
||||
match controller.take_screenshot(path, region, Some(window_id)).await {
|
||||
Ok(_) => {
|
||||
// Get the actual path where the screenshot was saved
|
||||
let actual_path = if path.starts_with('/') {
|
||||
path.to_string()
|
||||
} else {
|
||||
let temp_dir = std::env::var("TMPDIR")
|
||||
.or_else(|_| std::env::var("HOME").map(|h| format!("{}/tmp", h)))
|
||||
.unwrap_or_else(|_| "/tmp".to_string());
|
||||
format!("{}/{}", temp_dir.trim_end_matches('/'), path)
|
||||
};
|
||||
|
||||
Ok(format!(
|
||||
"✅ Screenshot of {} saved to: {}",
|
||||
window_id, actual_path
|
||||
))
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to take screenshot: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `extract_text` tool.
|
||||
pub async fn execute_extract_text<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing extract_text tool call");
|
||||
|
||||
let controller = match ctx.computer_controller {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Ok(
|
||||
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let path = tool_call
|
||||
.args
|
||||
.get("path")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing path argument"))?;
|
||||
|
||||
match controller.extract_text_from_image(path).await {
|
||||
Ok(text) => Ok(format!("✅ Extracted text:\n{}", text)),
|
||||
Err(e) => Ok(format!("❌ Failed to extract text: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `code_coverage` tool.
|
||||
pub async fn execute_code_coverage<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing code_coverage tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
ctx.ui_writer
|
||||
.print_context_status("🔍 Generating code coverage report...");
|
||||
|
||||
// Ensure coverage tools are installed
|
||||
match g3_execution::ensure_coverage_tools_installed() {
|
||||
Ok(already_installed) => {
|
||||
if !already_installed {
|
||||
ctx.ui_writer
|
||||
.print_context_status("✅ Coverage tools installed successfully");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return Ok(format!("❌ Failed to install coverage tools: {}", e));
|
||||
}
|
||||
}
|
||||
|
||||
// Run cargo llvm-cov --workspace
|
||||
let output = std::process::Command::new("cargo")
|
||||
.args(["llvm-cov", "--workspace"])
|
||||
.current_dir(std::env::current_dir()?)
|
||||
.output()?;
|
||||
|
||||
if output.status.success() {
|
||||
let stdout = String::from_utf8_lossy(&output.stdout);
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
|
||||
let mut result = String::from("✅ Code coverage report generated successfully\n\n");
|
||||
result.push_str("## Coverage Summary\n");
|
||||
result.push_str(&stdout);
|
||||
if !stderr.is_empty() {
|
||||
result.push_str("\n## Warnings\n");
|
||||
result.push_str(&stderr);
|
||||
}
|
||||
Ok(result)
|
||||
} else {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
Ok(format!("❌ Failed to generate coverage report:\n{}", stderr))
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `code_search` tool.
|
||||
pub async fn execute_code_search<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
_ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing code_search tool call");
|
||||
|
||||
// Parse the request
|
||||
let request: crate::code_search::CodeSearchRequest =
|
||||
match serde_json::from_value(tool_call.args.clone()) {
|
||||
Ok(req) => req,
|
||||
Err(e) => {
|
||||
return Ok(format!("❌ Invalid code_search arguments: {}", e));
|
||||
}
|
||||
};
|
||||
|
||||
// Execute the code search
|
||||
match crate::code_search::execute_code_search(request).await {
|
||||
Ok(response) => {
|
||||
// Serialize the response to JSON
|
||||
match serde_json::to_string_pretty(&response) {
|
||||
Ok(json_output) => Ok(format!("✅ Code search completed\n{}", json_output)),
|
||||
Err(e) => Ok(format!("❌ Failed to serialize response: {}", e)),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Code search failed: {}", e)),
|
||||
}
|
||||
}
|
||||
22
crates/g3-core/src/tools/mod.rs
Normal file
22
crates/g3-core/src/tools/mod.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
//! Tool execution module for G3 agent.
|
||||
//!
|
||||
//! This module contains all tool implementations that the agent can execute.
|
||||
//! Tools are organized by category:
|
||||
//! - `shell` - Shell command execution and background processes
|
||||
//! - `file_ops` - File reading, writing, and editing
|
||||
//! - `todo` - TODO list management
|
||||
//! - `webdriver` - Browser automation via WebDriver
|
||||
//! - `macax` - macOS Accessibility API tools
|
||||
//! - `vision` - Vision-based text finding and clicking
|
||||
//! - `misc` - Other tools (screenshots, code search, etc.)
|
||||
|
||||
pub mod executor;
|
||||
pub mod file_ops;
|
||||
pub mod macax;
|
||||
pub mod misc;
|
||||
pub mod shell;
|
||||
pub mod todo;
|
||||
pub mod vision;
|
||||
pub mod webdriver;
|
||||
|
||||
pub use executor::ToolExecutor;
|
||||
115
crates/g3-core/src/tools/shell.rs
Normal file
115
crates/g3-core/src/tools/shell.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
//! Shell command execution tools.
|
||||
|
||||
use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::utils::shell_escape_command;
|
||||
use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Execute the `shell` tool.
|
||||
pub async fn execute_shell<W: UiWriter>(tool_call: &ToolCall, ctx: &ToolContext<'_, W>) -> Result<String> {
|
||||
debug!("Processing shell tool call");
|
||||
|
||||
let command = match tool_call.args.get("command").and_then(|v| v.as_str()) {
|
||||
Some(cmd) => cmd,
|
||||
None => {
|
||||
debug!("No command parameter found in args: {:?}", tool_call.args);
|
||||
return Ok("❌ Missing command argument".to_string());
|
||||
}
|
||||
};
|
||||
|
||||
debug!("Command string: {}", command);
|
||||
let escaped_command = shell_escape_command(command);
|
||||
|
||||
let executor = g3_execution::CodeExecutor::new();
|
||||
|
||||
struct ToolOutputReceiver<'a, W: UiWriter> {
|
||||
ui_writer: &'a W,
|
||||
}
|
||||
|
||||
impl<'a, W: UiWriter> g3_execution::OutputReceiver for ToolOutputReceiver<'a, W> {
|
||||
fn on_output_line(&self, line: &str) {
|
||||
self.ui_writer.update_tool_output_line(line);
|
||||
}
|
||||
}
|
||||
|
||||
let receiver = ToolOutputReceiver {
|
||||
ui_writer: ctx.ui_writer,
|
||||
};
|
||||
|
||||
debug!(
|
||||
"ABOUT TO CALL execute_bash_streaming_in_dir: escaped_command='{}', working_dir={:?}",
|
||||
escaped_command, ctx.working_dir
|
||||
);
|
||||
|
||||
match executor
|
||||
.execute_bash_streaming_in_dir(&escaped_command, &receiver, ctx.working_dir)
|
||||
.await
|
||||
{
|
||||
Ok(result) => {
|
||||
if result.success {
|
||||
Ok(if result.stdout.is_empty() {
|
||||
"✅ Command executed successfully".to_string()
|
||||
} else {
|
||||
result.stdout.trim().to_string()
|
||||
})
|
||||
} else {
|
||||
Ok(format!("❌ Command failed: {}", result.stderr.trim()))
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Execution error: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `background_process` tool.
|
||||
pub async fn execute_background_process<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing background_process tool call");
|
||||
|
||||
let name = match tool_call.args.get("name").and_then(|v| v.as_str()) {
|
||||
Some(n) => n,
|
||||
None => return Ok("❌ Missing 'name' argument".to_string()),
|
||||
};
|
||||
|
||||
let command = match tool_call.args.get("command").and_then(|v| v.as_str()) {
|
||||
Some(c) => c,
|
||||
None => return Ok("❌ Missing 'command' argument".to_string()),
|
||||
};
|
||||
|
||||
// Use provided working_dir, or fall back to context working_dir, or current dir
|
||||
let work_dir = tool_call
|
||||
.args
|
||||
.get("working_dir")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| std::path::PathBuf::from(shellexpand::tilde(s).as_ref()))
|
||||
.or_else(|| ctx.working_dir.map(std::path::PathBuf::from))
|
||||
.unwrap_or_else(|| std::env::current_dir().unwrap_or_default());
|
||||
|
||||
match ctx.background_process_manager.start(name, command, &work_dir) {
|
||||
Ok(info) => Ok(format!(
|
||||
"✅ Background process '{}' started\n\n\
|
||||
**PID:** {}\n\
|
||||
**Log file:** {}\n\
|
||||
**Working dir:** {}\n\n\
|
||||
To interact with this process, use the shell tool:\n\
|
||||
- View logs: `tail -100 {}`\n\
|
||||
- Follow logs: `tail -f {}` (blocks until Ctrl+C)\n\
|
||||
- Check status: `ps -p {}`\n\
|
||||
- Stop process: `kill {}`",
|
||||
info.name,
|
||||
info.pid,
|
||||
info.log_file.display(),
|
||||
info.working_dir.display(),
|
||||
info.log_file.display(),
|
||||
info.log_file.display(),
|
||||
info.pid,
|
||||
info.pid
|
||||
)),
|
||||
Err(e) => Ok(format!("❌ Failed to start background process: {}", e)),
|
||||
}
|
||||
}
|
||||
195
crates/g3-core/src/tools/todo.rs
Normal file
195
crates/g3-core/src/tools/todo.rs
Normal file
@@ -0,0 +1,195 @@
|
||||
//! TODO list management tools.
|
||||
|
||||
use anyhow::Result;
|
||||
use std::io::Write;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Execute the `todo_read` tool.
|
||||
pub async fn execute_todo_read<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &mut ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing todo_read tool call");
|
||||
let _ = tool_call; // unused but kept for consistency
|
||||
|
||||
let todo_path = ctx.get_todo_path();
|
||||
|
||||
if !todo_path.exists() {
|
||||
// Also update in-memory content to stay in sync
|
||||
let mut todo = ctx.todo_content.write().await;
|
||||
*todo = String::new();
|
||||
return Ok("📝 TODO list is empty (no todo.g3.md file found)".to_string());
|
||||
}
|
||||
|
||||
match std::fs::read_to_string(&todo_path) {
|
||||
Ok(content) => {
|
||||
// Update in-memory content to stay in sync
|
||||
let mut todo = ctx.todo_content.write().await;
|
||||
*todo = content.clone();
|
||||
|
||||
// Check for staleness if enabled and we have a requirements SHA
|
||||
if ctx.config.agent.check_todo_staleness {
|
||||
if let Some(req_sha) = ctx.requirements_sha {
|
||||
if let Some(staleness_result) = check_todo_staleness(&content, req_sha, ctx.ui_writer) {
|
||||
return Ok(staleness_result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if content.trim().is_empty() {
|
||||
Ok("📝 TODO list is empty".to_string())
|
||||
} else {
|
||||
for line in content.lines() {
|
||||
ctx.ui_writer.print_tool_output_line(line);
|
||||
}
|
||||
Ok(format!("📝 TODO list:\n{}", content))
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to read TODO.md: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `todo_write` tool.
|
||||
pub async fn execute_todo_write<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &mut ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing todo_write tool call");
|
||||
|
||||
let content_str = match tool_call.args.get("content").and_then(|v| v.as_str()) {
|
||||
Some(c) => c,
|
||||
None => return Ok("❌ Missing content argument".to_string()),
|
||||
};
|
||||
|
||||
let char_count = content_str.chars().count();
|
||||
let max_chars = std::env::var("G3_TODO_MAX_CHARS")
|
||||
.ok()
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(50_000);
|
||||
|
||||
if max_chars > 0 && char_count > max_chars {
|
||||
return Ok(format!(
|
||||
"❌ TODO list too large: {} chars (max: {})",
|
||||
char_count, max_chars
|
||||
));
|
||||
}
|
||||
|
||||
// Check if all todos are completed (all checkboxes are checked)
|
||||
let has_incomplete = content_str
|
||||
.lines()
|
||||
.any(|line| line.trim().starts_with("- [ ]"));
|
||||
|
||||
// If all todos are complete, delete the file instead of writing
|
||||
// EXCEPT in planner mode (G3_TODO_PATH is set) - preserve for rename to completed_todo_*.md
|
||||
let in_planner_mode = std::env::var("G3_TODO_PATH").is_ok();
|
||||
let todo_path = ctx.get_todo_path();
|
||||
|
||||
if !in_planner_mode
|
||||
&& !has_incomplete
|
||||
&& (content_str.contains("- [x]") || content_str.contains("- [X]"))
|
||||
{
|
||||
if todo_path.exists() {
|
||||
match std::fs::remove_file(&todo_path) {
|
||||
Ok(_) => {
|
||||
let mut todo = ctx.todo_content.write().await;
|
||||
*todo = String::new();
|
||||
// Show the final completed TODOs before deletion
|
||||
let mut result =
|
||||
String::from("✅ All TODOs completed! Removed todo.g3.md\n\nFinal status:\n");
|
||||
for line in content_str.lines() {
|
||||
ctx.ui_writer.print_tool_output_line(line);
|
||||
result.push_str(line);
|
||||
result.push('\n');
|
||||
}
|
||||
return Ok(result);
|
||||
}
|
||||
Err(e) => return Ok(format!("❌ Failed to remove todo.g3.md: {}", e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
match std::fs::write(&todo_path, content_str) {
|
||||
Ok(_) => {
|
||||
// Also update in-memory content to stay in sync
|
||||
let mut todo = ctx.todo_content.write().await;
|
||||
*todo = content_str.to_string();
|
||||
// Print the TODO content to the console (inside the tool frame)
|
||||
for line in content_str.lines() {
|
||||
ctx.ui_writer.print_tool_output_line(line);
|
||||
}
|
||||
Ok(format!(
|
||||
"✅ TODO list updated ({} chars) and saved to todo.g3.md:\n{}",
|
||||
char_count, content_str
|
||||
))
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to write todo.g3.md: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the TODO list is stale (generated from a different requirements file).
|
||||
/// Returns Some(message) if staleness was detected and handled, None otherwise.
|
||||
fn check_todo_staleness<W: UiWriter>(
|
||||
content: &str,
|
||||
req_sha: &str,
|
||||
ui_writer: &W,
|
||||
) -> Option<String> {
|
||||
// Parse the first line for the SHA header
|
||||
let first_line = content.lines().next()?;
|
||||
|
||||
if !first_line.starts_with("{{Based on the requirements file with SHA256:") {
|
||||
return None;
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = first_line.split("SHA256:").collect();
|
||||
if parts.len() <= 1 {
|
||||
return None;
|
||||
}
|
||||
|
||||
let todo_sha = parts[1].trim().trim_end_matches("}}").trim();
|
||||
if todo_sha == req_sha {
|
||||
return None;
|
||||
}
|
||||
|
||||
let warning = format!(
|
||||
"⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}",
|
||||
req_sha, todo_sha
|
||||
);
|
||||
ui_writer.print_context_status(&warning);
|
||||
|
||||
// Beep 6 times
|
||||
print!("\x07\x07\x07\x07\x07\x07");
|
||||
let _ = std::io::stdout().flush();
|
||||
|
||||
let options = [
|
||||
"Ignore and Continue",
|
||||
"Mark as Stale",
|
||||
"Quit Application",
|
||||
];
|
||||
let choice = ui_writer.prompt_user_choice(
|
||||
"Requirements have changed! What would you like to do?",
|
||||
&options,
|
||||
);
|
||||
|
||||
match choice {
|
||||
0 => {
|
||||
// Ignore and Continue
|
||||
ui_writer.print_context_status("⚠️ Ignoring staleness warning.");
|
||||
None
|
||||
}
|
||||
1 => {
|
||||
// Mark as Stale
|
||||
Some("⚠️ TODO list is stale (requirements changed). Please regenerate the TODO list to match the new requirements.".to_string())
|
||||
}
|
||||
2 => {
|
||||
// Quit Application
|
||||
ui_writer.print_context_status("❌ Quitting application as requested.");
|
||||
std::process::exit(0);
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
275
crates/g3-core/src/tools/vision.rs
Normal file
275
crates/g3-core/src/tools/vision.rs
Normal file
@@ -0,0 +1,275 @@
|
||||
//! Vision-based tools: vision_find_text, vision_click_text, vision_click_near_text, extract_text_with_boxes.
|
||||
|
||||
use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Execute the `vision_find_text` tool.
|
||||
pub async fn execute_vision_find_text<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing vision_find_text tool call");
|
||||
|
||||
let controller = match ctx.computer_controller {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Ok(
|
||||
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let app_name = tool_call
|
||||
.args
|
||||
.get("app_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?;
|
||||
|
||||
let text = tool_call
|
||||
.args
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
|
||||
|
||||
match controller.find_text_in_app(app_name, text).await {
|
||||
Ok(Some(location)) => Ok(format!(
|
||||
"✅ Found '{}' in {} at position ({}, {}) with size {}x{} (confidence: {:.0}%)",
|
||||
location.text,
|
||||
app_name,
|
||||
location.x,
|
||||
location.y,
|
||||
location.width,
|
||||
location.height,
|
||||
location.confidence * 100.0
|
||||
)),
|
||||
Ok(None) => Ok(format!("❌ Could not find '{}' in {}", text, app_name)),
|
||||
Err(e) => Ok(format!("❌ Error finding text: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `vision_click_text` tool.
|
||||
pub async fn execute_vision_click_text<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing vision_click_text tool call");
|
||||
|
||||
let controller = match ctx.computer_controller {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Ok(
|
||||
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let app_name = tool_call
|
||||
.args
|
||||
.get("app_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?;
|
||||
|
||||
let text = tool_call
|
||||
.args
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
|
||||
|
||||
match controller.find_text_in_app(app_name, text).await {
|
||||
Ok(Some(location)) => {
|
||||
// Click on center of text
|
||||
// IMPORTANT: location coordinates are in NSScreen space (Y=0 at BOTTOM, increases UPWARD)
|
||||
// location.x is the LEFT edge of the bounding box
|
||||
// location.y is the TOP edge of the bounding box (highest Y value in NSScreen space)
|
||||
// location.width and location.height are already scaled to screen space
|
||||
// To get center: we need to add half the SCALED width and subtract half the SCALED height
|
||||
|
||||
if location.width == 0 || location.height == 0 {
|
||||
return Ok(format!(
|
||||
"❌ Invalid bounding box dimensions: width={}, height={}",
|
||||
location.width, location.height
|
||||
));
|
||||
}
|
||||
|
||||
debug!(
|
||||
"[vision_click_text] Location from find_text_in_app: x={}, y={}, width={}, height={}, text='{}'",
|
||||
location.x, location.y, location.width, location.height, location.text
|
||||
);
|
||||
|
||||
// Calculate center using the SCALED dimensions
|
||||
// X: Use right edge instead of center (Vision OCR bounding box seems offset)
|
||||
// This gives us: left edge + full width = right edge
|
||||
// Y: top edge - half of scaled height (subtract because Y increases upward)
|
||||
let click_x = location.x + location.width; // Right edge
|
||||
let half_height = location.height / 2;
|
||||
let click_y = location.y - half_height;
|
||||
|
||||
debug!(
|
||||
"[vision_click_text] Click position calculation: x={} + {} = {} (right edge), y={} - {} = {}",
|
||||
location.x, location.width, click_x, location.y, half_height, click_y
|
||||
);
|
||||
|
||||
match controller.click_at(click_x, click_y, Some(app_name)) {
|
||||
Ok(_) => Ok(format!(
|
||||
"✅ Clicked on '{}' in {} at ({}, {})",
|
||||
text, app_name, click_x, click_y
|
||||
)),
|
||||
Err(e) => Ok(format!("❌ Failed to click: {}", e)),
|
||||
}
|
||||
}
|
||||
Ok(None) => Ok(format!("❌ Could not find '{}' in {}", text, app_name)),
|
||||
Err(e) => Ok(format!("❌ Error finding text: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `vision_click_near_text` tool.
|
||||
pub async fn execute_vision_click_near_text<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing vision_click_near_text tool call");
|
||||
|
||||
let controller = match ctx.computer_controller {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Ok(
|
||||
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let app_name = tool_call
|
||||
.args
|
||||
.get("app_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing app_name parameter"))?;
|
||||
|
||||
let text = tool_call
|
||||
.args
|
||||
.get("text")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing text parameter"))?;
|
||||
|
||||
let direction = tool_call
|
||||
.args
|
||||
.get("direction")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("right");
|
||||
|
||||
let distance = tool_call
|
||||
.args
|
||||
.get("distance")
|
||||
.and_then(|v| v.as_i64())
|
||||
.unwrap_or(50) as i32;
|
||||
|
||||
match controller.find_text_in_app(app_name, text).await {
|
||||
Ok(Some(location)) => {
|
||||
// Calculate click position based on direction
|
||||
// location.x is LEFT edge, location.y is TOP edge (in NSScreen space)
|
||||
let (click_x, click_y) = match direction {
|
||||
"right" => (
|
||||
location.x + location.width + distance,
|
||||
location.y - (location.height / 2),
|
||||
),
|
||||
"below" => (
|
||||
location.x + (location.width / 2),
|
||||
location.y - location.height - distance,
|
||||
),
|
||||
"left" => (location.x - distance, location.y - (location.height / 2)),
|
||||
"above" => (location.x + (location.width / 2), location.y + distance),
|
||||
_ => (
|
||||
location.x + location.width + distance,
|
||||
location.y - (location.height / 2),
|
||||
),
|
||||
};
|
||||
debug!(
|
||||
"[vision_click_near_text] Clicking {} of text at ({}, {})",
|
||||
direction, click_x, click_y
|
||||
);
|
||||
|
||||
match controller.click_at(click_x, click_y, Some(app_name)) {
|
||||
Ok(_) => Ok(format!(
|
||||
"✅ Clicked {} of '{}' in {} at ({}, {})",
|
||||
direction, text, app_name, click_x, click_y
|
||||
)),
|
||||
Err(e) => Ok(format!("❌ Failed to click: {}", e)),
|
||||
}
|
||||
}
|
||||
Ok(None) => Ok(format!("❌ Could not find '{}' in {}", text, app_name)),
|
||||
Err(e) => Ok(format!("❌ Error finding text: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `extract_text_with_boxes` tool.
|
||||
pub async fn execute_extract_text_with_boxes<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing extract_text_with_boxes tool call");
|
||||
|
||||
if !ctx.config.macax.enabled {
|
||||
return Ok(
|
||||
"❌ extract_text_with_boxes requires --macax flag to be enabled".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
let controller = match ctx.computer_controller {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
return Ok(
|
||||
"❌ Computer control not enabled. Set computer_control.enabled = true in config."
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
let path = tool_call
|
||||
.args
|
||||
.get("path")
|
||||
.and_then(|v| v.as_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing path parameter"))?;
|
||||
|
||||
// Optional: take screenshot of app first
|
||||
let final_path = if let Some(app_name) = tool_call.args.get("app_name").and_then(|v| v.as_str())
|
||||
{
|
||||
let temp_path = format!("/tmp/g3_extract_boxes_{}.png", uuid::Uuid::new_v4());
|
||||
match controller
|
||||
.take_screenshot(&temp_path, None, Some(app_name))
|
||||
.await
|
||||
{
|
||||
Ok(_) => temp_path,
|
||||
Err(e) => return Ok(format!("❌ Failed to take screenshot: {}", e)),
|
||||
}
|
||||
} else {
|
||||
path.to_string()
|
||||
};
|
||||
|
||||
// Extract text with locations
|
||||
match controller.extract_text_with_locations(&final_path).await {
|
||||
Ok(locations) => {
|
||||
// Clean up temp file if we created one
|
||||
if final_path != path {
|
||||
let _ = std::fs::remove_file(&final_path);
|
||||
}
|
||||
|
||||
// Return as JSON
|
||||
match serde_json::to_string_pretty(&locations) {
|
||||
Ok(json) => Ok(format!(
|
||||
"✅ Extracted {} text elements:\n{}",
|
||||
locations.len(),
|
||||
json
|
||||
)),
|
||||
Err(e) => Ok(format!("❌ Failed to serialize results: {}", e)),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to extract text: {}", e)),
|
||||
}
|
||||
}
|
||||
678
crates/g3-core/src/tools/webdriver.rs
Normal file
678
crates/g3-core/src/tools/webdriver.rs
Normal file
@@ -0,0 +1,678 @@
|
||||
//! WebDriver browser automation tools.
|
||||
|
||||
use anyhow::Result;
|
||||
use g3_computer_control::WebDriverController;
|
||||
use tracing::{debug, warn};
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::webdriver_session::WebDriverSession;
|
||||
use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Execute the `webdriver_start` tool.
|
||||
pub async fn execute_webdriver_start<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_start tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
// Check if session already exists
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
if session_guard.is_some() {
|
||||
drop(session_guard);
|
||||
return Ok("✅ WebDriver session already active".to_string());
|
||||
}
|
||||
drop(session_guard);
|
||||
|
||||
// Determine which browser to use based on config
|
||||
use g3_config::WebDriverBrowser;
|
||||
match &ctx.config.webdriver.browser {
|
||||
WebDriverBrowser::Safari => start_safari_driver(ctx).await,
|
||||
WebDriverBrowser::ChromeHeadless => start_chrome_driver(ctx).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_safari_driver<W: UiWriter>(ctx: &ToolContext<'_, W>) -> Result<String> {
|
||||
let port = ctx.config.webdriver.safari_port;
|
||||
|
||||
let driver_result = tokio::process::Command::new("safaridriver")
|
||||
.arg("--port")
|
||||
.arg(port.to_string())
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.spawn();
|
||||
|
||||
let mut webdriver_process = match driver_result {
|
||||
Ok(process) => process,
|
||||
Err(e) => {
|
||||
return Ok(format!(
|
||||
"❌ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.",
|
||||
e
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// Wait for safaridriver to start up
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
|
||||
|
||||
// Connect to SafariDriver
|
||||
match g3_computer_control::SafariDriver::with_port(port).await {
|
||||
Ok(driver) => {
|
||||
let session =
|
||||
std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Safari(driver)));
|
||||
*ctx.webdriver_session.write().await = Some(session);
|
||||
*ctx.webdriver_process.write().await = Some(webdriver_process);
|
||||
|
||||
Ok(
|
||||
"✅ WebDriver session started successfully! Safari should open automatically."
|
||||
.to_string(),
|
||||
)
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = webdriver_process.kill().await;
|
||||
Ok(format!(
|
||||
"❌ Failed to connect to SafariDriver: {}\n\n\
|
||||
This might be because:\n \
|
||||
- Safari Remote Automation is not enabled (run: safaridriver --enable)\n \
|
||||
- Port {} is already in use\n \
|
||||
- Safari failed to start\n \
|
||||
- Network connectivity issue\n\n\
|
||||
To enable Remote Automation:\n \
|
||||
1. Run: safaridriver --enable (requires password, one-time setup)\n \
|
||||
2. Or manually: Safari → Develop → Allow Remote Automation",
|
||||
e, port
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn start_chrome_driver<W: UiWriter>(ctx: &ToolContext<'_, W>) -> Result<String> {
|
||||
let port = ctx.config.webdriver.chrome_port;
|
||||
|
||||
// Start chromedriver process
|
||||
let driver_result = tokio::process::Command::new("chromedriver")
|
||||
.arg(format!("--port={}", port))
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.spawn();
|
||||
|
||||
let mut webdriver_process = match driver_result {
|
||||
Ok(process) => process,
|
||||
Err(e) => {
|
||||
return Ok(format!(
|
||||
"❌ Failed to start chromedriver: {}\n\n\
|
||||
Make sure chromedriver is installed and in your PATH.\n\n\
|
||||
Install with:\n \
|
||||
- macOS: brew install chromedriver\n \
|
||||
- Linux: apt install chromium-chromedriver\n \
|
||||
- Or download from: https://chromedriver.chromium.org/downloads",
|
||||
e
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// Wait for chromedriver to be ready with retry loop
|
||||
let max_retries = 10;
|
||||
let mut last_error = None;
|
||||
|
||||
for attempt in 0..max_retries {
|
||||
// Wait before each attempt (200ms between retries, total max ~2s)
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(200)).await;
|
||||
|
||||
// Try to connect to ChromeDriver in headless mode (with optional custom binary)
|
||||
let driver_result = match &ctx.config.webdriver.chrome_binary {
|
||||
Some(binary) => {
|
||||
g3_computer_control::ChromeDriver::with_port_headless_and_binary(port, Some(binary))
|
||||
.await
|
||||
}
|
||||
None => g3_computer_control::ChromeDriver::with_port_headless(port).await,
|
||||
};
|
||||
|
||||
match driver_result {
|
||||
Ok(driver) => {
|
||||
let session =
|
||||
std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Chrome(driver)));
|
||||
*ctx.webdriver_session.write().await = Some(session);
|
||||
*ctx.webdriver_process.write().await = Some(webdriver_process);
|
||||
|
||||
return Ok(
|
||||
"✅ WebDriver session started successfully! Chrome is running in headless mode (no visible window)."
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
Err(e) => {
|
||||
last_error = Some(e);
|
||||
if attempt < max_retries - 1 {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// All retries failed
|
||||
let _ = webdriver_process.kill().await;
|
||||
let error_msg = last_error
|
||||
.map(|e| e.to_string())
|
||||
.unwrap_or_else(|| "Unknown error".to_string());
|
||||
Ok(format!(
|
||||
"❌ Failed to connect to ChromeDriver after {} attempts: {}\n\n\
|
||||
This might be because:\n \
|
||||
- Chrome is not installed\n \
|
||||
- ChromeDriver version doesn't match Chrome version\n \
|
||||
- Port {} is already in use\n\n\
|
||||
Make sure Chrome and ChromeDriver are installed and compatible.",
|
||||
max_retries, error_msg, port
|
||||
))
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_navigate` tool.
|
||||
pub async fn execute_webdriver_navigate<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_navigate tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
drop(session_guard);
|
||||
|
||||
let url = match tool_call.args.get("url").and_then(|v| v.as_str()) {
|
||||
Some(u) => u,
|
||||
None => return Ok("❌ Missing url argument".to_string()),
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.navigate(url).await {
|
||||
Ok(_) => Ok(format!("✅ Navigated to {}", url)),
|
||||
Err(e) => Ok(format!("❌ Failed to navigate: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_get_url` tool.
|
||||
pub async fn execute_webdriver_get_url<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_get_url tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let driver = session.lock().await;
|
||||
match driver.current_url().await {
|
||||
Ok(url) => Ok(format!("Current URL: {}", url)),
|
||||
Err(e) => Ok(format!("❌ Failed to get URL: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_get_title` tool.
|
||||
pub async fn execute_webdriver_get_title<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_get_title tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let driver = session.lock().await;
|
||||
match driver.title().await {
|
||||
Ok(title) => Ok(format!("Page title: {}", title)),
|
||||
Err(e) => Ok(format!("❌ Failed to get title: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_find_element` tool.
|
||||
pub async fn execute_webdriver_find_element<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_find_element tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => return Ok("❌ Missing selector argument".to_string()),
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.find_element(selector).await {
|
||||
Ok(elem) => match elem.text().await {
|
||||
Ok(text) => Ok(format!("Element text: {}", text)),
|
||||
Err(e) => Ok(format!("❌ Failed to get element text: {}", e)),
|
||||
},
|
||||
Err(e) => Ok(format!("❌ Failed to find element '{}': {}", selector, e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_find_elements` tool.
|
||||
pub async fn execute_webdriver_find_elements<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_find_elements tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => return Ok("❌ Missing selector argument".to_string()),
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.find_elements(selector).await {
|
||||
Ok(elements) => {
|
||||
let mut results = Vec::new();
|
||||
for (i, elem) in elements.iter().enumerate() {
|
||||
match elem.text().await {
|
||||
Ok(text) => results.push(format!("[{}]: {}", i, text)),
|
||||
Err(_) => results.push(format!("[{}]: <error getting text>", i)),
|
||||
}
|
||||
}
|
||||
Ok(format!(
|
||||
"Found {} elements:\n{}",
|
||||
results.len(),
|
||||
results.join("\n")
|
||||
))
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to find elements '{}': {}", selector, e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_click` tool.
|
||||
pub async fn execute_webdriver_click<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_click tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => return Ok("❌ Missing selector argument".to_string()),
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.find_element(selector).await {
|
||||
Ok(mut elem) => match elem.click().await {
|
||||
Ok(_) => Ok(format!("✅ Clicked element '{}'", selector)),
|
||||
Err(e) => Ok(format!("❌ Failed to click element: {}", e)),
|
||||
},
|
||||
Err(e) => Ok(format!("❌ Failed to find element '{}': {}", selector, e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_send_keys` tool.
|
||||
pub async fn execute_webdriver_send_keys<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_send_keys tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let selector = match tool_call.args.get("selector").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => return Ok("❌ Missing selector argument".to_string()),
|
||||
};
|
||||
|
||||
let text = match tool_call.args.get("text").and_then(|v| v.as_str()) {
|
||||
Some(t) => t,
|
||||
None => return Ok("❌ Missing text argument".to_string()),
|
||||
};
|
||||
|
||||
let clear_first = tool_call
|
||||
.args
|
||||
.get("clear_first")
|
||||
.and_then(|v| v.as_bool())
|
||||
.unwrap_or(true);
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.find_element(selector).await {
|
||||
Ok(mut elem) => {
|
||||
if clear_first {
|
||||
if let Err(e) = elem.clear().await {
|
||||
return Ok(format!("❌ Failed to clear element: {}", e));
|
||||
}
|
||||
}
|
||||
match elem.send_keys(text).await {
|
||||
Ok(_) => Ok(format!("✅ Sent keys to element '{}'", selector)),
|
||||
Err(e) => Ok(format!("❌ Failed to send keys: {}", e)),
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to find element '{}': {}", selector, e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_execute_script` tool.
|
||||
pub async fn execute_webdriver_execute_script<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_execute_script tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let script = match tool_call.args.get("script").and_then(|v| v.as_str()) {
|
||||
Some(s) => s,
|
||||
None => return Ok("❌ Missing script argument".to_string()),
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.execute_script(script, vec![]).await {
|
||||
Ok(result) => Ok(format!("Script result: {:?}", result)),
|
||||
Err(e) => Ok(format!("❌ Failed to execute script: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_get_page_source` tool.
|
||||
pub async fn execute_webdriver_get_page_source<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_get_page_source tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
// Extract optional parameters
|
||||
let max_length = tool_call
|
||||
.args
|
||||
.get("max_length")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize)
|
||||
.unwrap_or(10000);
|
||||
|
||||
let save_to_file = tool_call.args.get("save_to_file").and_then(|v| v.as_str());
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let driver = session.lock().await;
|
||||
match driver.page_source().await {
|
||||
Ok(source) => {
|
||||
// If save_to_file is specified, write to file
|
||||
if let Some(file_path) = save_to_file {
|
||||
let expanded_path = shellexpand::tilde(file_path);
|
||||
let path_str = expanded_path.as_ref();
|
||||
|
||||
// Create parent directories if needed
|
||||
if let Some(parent) = std::path::Path::new(path_str).parent() {
|
||||
if let Err(e) = std::fs::create_dir_all(parent) {
|
||||
return Ok(format!("❌ Failed to create directories: {}", e));
|
||||
}
|
||||
}
|
||||
|
||||
match std::fs::write(path_str, &source) {
|
||||
Ok(_) => Ok(format!(
|
||||
"✅ Page source ({} chars) saved to: {}",
|
||||
source.len(),
|
||||
path_str
|
||||
)),
|
||||
Err(e) => Ok(format!("❌ Failed to write file: {}", e)),
|
||||
}
|
||||
} else if max_length > 0 && source.len() > max_length {
|
||||
// Truncate if max_length is set and source exceeds it
|
||||
Ok(format!(
|
||||
"Page source ({} chars, truncated to {}):\n{}...",
|
||||
source.len(),
|
||||
max_length,
|
||||
&source[..max_length]
|
||||
))
|
||||
} else {
|
||||
// Return full source
|
||||
Ok(format!("Page source ({} chars):\n{}", source.len(), source))
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to get page source: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_screenshot` tool.
|
||||
pub async fn execute_webdriver_screenshot<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_screenshot tool call");
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let path = match tool_call.args.get("path").and_then(|v| v.as_str()) {
|
||||
Some(p) => p,
|
||||
None => return Ok("❌ Missing path argument".to_string()),
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.screenshot(path).await {
|
||||
Ok(_) => Ok(format!("✅ Screenshot saved to {}", path)),
|
||||
Err(e) => Ok(format!("❌ Failed to take screenshot: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_back` tool.
|
||||
pub async fn execute_webdriver_back<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_back tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.back().await {
|
||||
Ok(_) => Ok("✅ Navigated back".to_string()),
|
||||
Err(e) => Ok(format!("❌ Failed to navigate back: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_forward` tool.
|
||||
pub async fn execute_webdriver_forward<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_forward tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.forward().await {
|
||||
Ok(_) => Ok("✅ Navigated forward".to_string()),
|
||||
Err(e) => Ok(format!("❌ Failed to navigate forward: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_refresh` tool.
|
||||
pub async fn execute_webdriver_refresh<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_refresh tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
let session_guard = ctx.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
None => {
|
||||
return Ok("❌ No active WebDriver session. Call webdriver_start first.".to_string())
|
||||
}
|
||||
};
|
||||
|
||||
let mut driver = session.lock().await;
|
||||
match driver.refresh().await {
|
||||
Ok(_) => Ok("✅ Page refreshed".to_string()),
|
||||
Err(e) => Ok(format!("❌ Failed to refresh page: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute the `webdriver_quit` tool.
|
||||
pub async fn execute_webdriver_quit<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing webdriver_quit tool call");
|
||||
let _ = tool_call; // unused
|
||||
|
||||
if !ctx.config.webdriver.enabled {
|
||||
return Ok("❌ WebDriver is not enabled. Use --webdriver flag to enable.".to_string());
|
||||
}
|
||||
|
||||
// Take the session
|
||||
let session = match ctx.webdriver_session.write().await.take() {
|
||||
Some(s) => s.clone(),
|
||||
None => return Ok("❌ No active WebDriver session.".to_string()),
|
||||
};
|
||||
|
||||
// Quit the WebDriver session
|
||||
match std::sync::Arc::try_unwrap(session) {
|
||||
Ok(mutex) => {
|
||||
let driver = mutex.into_inner();
|
||||
match driver.quit().await {
|
||||
Ok(_) => {
|
||||
debug!("WebDriver session closed successfully");
|
||||
|
||||
// Kill the safaridriver process
|
||||
if let Some(mut process) = ctx.webdriver_process.write().await.take() {
|
||||
if let Err(e) = process.kill().await {
|
||||
warn!("Failed to kill safaridriver process: {}", e);
|
||||
} else {
|
||||
debug!("Safaridriver process terminated");
|
||||
}
|
||||
}
|
||||
|
||||
Ok("✅ WebDriver session closed and safaridriver stopped".to_string())
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to quit WebDriver: {}", e)),
|
||||
}
|
||||
}
|
||||
Err(_) => Ok("❌ Cannot quit: WebDriver session is still in use".to_string()),
|
||||
}
|
||||
}
|
||||
@@ -189,3 +189,39 @@ fn test_error_message_content() {
|
||||
assert!(warning.contains("10000"));
|
||||
assert!(warning.contains("Context reduction needed"));
|
||||
}
|
||||
|
||||
/// Test that SUMMARY_MIN_TOKENS floor prevents max_tokens=0 errors
|
||||
/// This is the fix for the bug where context at 90%+ caused API errors
|
||||
#[test]
|
||||
fn test_summary_min_tokens_floor_prevents_zero() {
|
||||
// The SUMMARY_MIN_TOKENS constant is 1000
|
||||
let summary_min_tokens = 1000u32;
|
||||
|
||||
let mut context = ContextWindow::new(200000);
|
||||
|
||||
// Simulate extremely full context - 98% used
|
||||
context.used_tokens = 196000;
|
||||
|
||||
let model_limit = context.total_tokens;
|
||||
let current_usage = context.used_tokens;
|
||||
let buffer = (model_limit / 40).clamp(1000, 10000); // 5000
|
||||
|
||||
// Without the floor, available would be 0
|
||||
let available_without_floor = model_limit
|
||||
.saturating_sub(current_usage)
|
||||
.saturating_sub(buffer);
|
||||
assert_eq!(available_without_floor, 0, "Without floor, available should be 0");
|
||||
|
||||
// With the floor, available is at least SUMMARY_MIN_TOKENS
|
||||
let available_with_floor = available_without_floor.max(summary_min_tokens);
|
||||
assert_eq!(available_with_floor, 1000, "With floor, available should be 1000");
|
||||
|
||||
// Even after applying provider caps (which use .min()), the floor is preserved
|
||||
let after_cap = available_with_floor.min(10_000);
|
||||
assert_eq!(after_cap, 1000, "After cap, should still be 1000");
|
||||
|
||||
// And the final defense-in-depth .max() ensures it's never below the floor
|
||||
let final_value = after_cap.max(summary_min_tokens);
|
||||
assert!(final_value >= 1, "Final value must be >= 1 for API");
|
||||
assert_eq!(final_value, 1000, "Final value should be exactly 1000");
|
||||
}
|
||||
|
||||
@@ -31,7 +31,7 @@ fn teardown_test_env(original_dir: std::path::PathBuf) {
|
||||
#[test]
|
||||
fn test_session_continuation_creation() {
|
||||
// This test doesn't need file system access
|
||||
let continuation = SessionContinuation::new(
|
||||
let continuation = SessionContinuation::new(false, None,
|
||||
"test_session_123".to_string(),
|
||||
Some("Task completed successfully".to_string()),
|
||||
"/path/to/session.json".to_string(),
|
||||
@@ -63,7 +63,7 @@ fn test_can_restore_full_context_threshold() {
|
||||
];
|
||||
|
||||
for (percentage, expected) in test_cases {
|
||||
let continuation = SessionContinuation::new(
|
||||
let continuation = SessionContinuation::new(false, None,
|
||||
"test".to_string(),
|
||||
None,
|
||||
"path".to_string(),
|
||||
@@ -85,7 +85,7 @@ fn test_save_and_load_continuation() {
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
let original = SessionContinuation::new(
|
||||
let original = SessionContinuation::new(false, None,
|
||||
"save_load_test".to_string(),
|
||||
Some("Test summary content".to_string()),
|
||||
"/logs/g3_session_save_load_test.json".to_string(),
|
||||
@@ -117,10 +117,111 @@ fn test_save_and_load_continuation() {
|
||||
teardown_test_env(original_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_incomplete_agent_session() {
|
||||
use g3_core::session_continuation::find_incomplete_agent_session;
|
||||
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
// Get the actual current directory (after set_current_dir in setup)
|
||||
let current_working_dir = std::env::current_dir()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Create an agent mode session with incomplete TODOs
|
||||
let agent_session = SessionContinuation::new(
|
||||
true, // is_agent_mode
|
||||
Some("fowler".to_string()), // agent_name
|
||||
"fowler_session_1".to_string(),
|
||||
Some("Working on task".to_string()),
|
||||
"/path/to/session.json".to_string(),
|
||||
50.0,
|
||||
Some("- [x] Done\n- [ ] Not done yet".to_string()), // incomplete TODO
|
||||
current_working_dir, // Use actual current dir
|
||||
);
|
||||
save_continuation(&agent_session).expect("Failed to save agent session");
|
||||
|
||||
// Should find the incomplete session for "fowler"
|
||||
let result = find_incomplete_agent_session("fowler").expect("Failed to search");
|
||||
assert!(result.is_some(), "Should find incomplete fowler session");
|
||||
let found = result.unwrap();
|
||||
assert_eq!(found.session_id, "fowler_session_1");
|
||||
assert_eq!(found.agent_name, Some("fowler".to_string()));
|
||||
|
||||
// Should NOT find session for different agent
|
||||
let result = find_incomplete_agent_session("pike").expect("Failed to search");
|
||||
assert!(result.is_none(), "Should not find session for pike");
|
||||
|
||||
teardown_test_env(original_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_incomplete_agent_session_ignores_complete_todos() {
|
||||
use g3_core::session_continuation::find_incomplete_agent_session;
|
||||
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
let current_working_dir = std::env::current_dir()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Create an agent mode session with ALL TODOs complete
|
||||
let complete_session = SessionContinuation::new(
|
||||
true,
|
||||
Some("fowler".to_string()),
|
||||
"fowler_complete".to_string(),
|
||||
Some("All done".to_string()),
|
||||
"/path/to/session.json".to_string(),
|
||||
50.0,
|
||||
Some("- [x] Task 1\n- [x] Task 2".to_string()), // all complete
|
||||
current_working_dir,
|
||||
);
|
||||
save_continuation(&complete_session).expect("Failed to save");
|
||||
|
||||
// Should NOT find session since all TODOs are complete
|
||||
let result = find_incomplete_agent_session("fowler").expect("Failed to search");
|
||||
assert!(result.is_none(), "Should not find session with complete TODOs");
|
||||
|
||||
teardown_test_env(original_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_incomplete_agent_session_ignores_non_agent_mode() {
|
||||
use g3_core::session_continuation::find_incomplete_agent_session;
|
||||
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
let current_working_dir = std::env::current_dir()
|
||||
.map(|p| p.to_string_lossy().to_string())
|
||||
.unwrap_or_default();
|
||||
|
||||
// Create a NON-agent mode session with incomplete TODOs
|
||||
let non_agent_session = SessionContinuation::new(
|
||||
false, // NOT agent mode
|
||||
None,
|
||||
"regular_session".to_string(),
|
||||
None,
|
||||
"/path/to/session.json".to_string(),
|
||||
50.0,
|
||||
Some("- [ ] Incomplete task".to_string()),
|
||||
current_working_dir,
|
||||
);
|
||||
save_continuation(&non_agent_session).expect("Failed to save");
|
||||
|
||||
// Should NOT find session since it's not agent mode
|
||||
let result = find_incomplete_agent_session("fowler").expect("Failed to search");
|
||||
assert!(result.is_none(), "Should not find non-agent-mode session");
|
||||
|
||||
teardown_test_env(original_dir);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_continuation_when_none_exists() {
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (_temp_dir, original_dir) = setup_test_env();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
// No continuation should exist in a fresh temp directory
|
||||
let result = load_continuation().expect("load_continuation should not error");
|
||||
@@ -132,10 +233,10 @@ fn test_load_continuation_when_none_exists() {
|
||||
#[test]
|
||||
fn test_clear_continuation() {
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (_temp_dir, original_dir) = setup_test_env();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
// Create and save a continuation
|
||||
let continuation = SessionContinuation::new(
|
||||
let continuation = SessionContinuation::new(false, None,
|
||||
"clear_test".to_string(),
|
||||
Some("Will be cleared".to_string()),
|
||||
"/path/to/session.json".to_string(),
|
||||
@@ -187,10 +288,10 @@ fn test_ensure_session_dir_creates_g3_directory() {
|
||||
#[test]
|
||||
fn test_has_valid_continuation_with_missing_session_log() {
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (_temp_dir, original_dir) = setup_test_env();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
// Create a continuation pointing to a non-existent session log
|
||||
let continuation = SessionContinuation::new(
|
||||
let continuation = SessionContinuation::new(false, None,
|
||||
"invalid_test".to_string(),
|
||||
Some("Summary".to_string()),
|
||||
"/nonexistent/path/session.json".to_string(),
|
||||
@@ -218,7 +319,7 @@ fn test_has_valid_continuation_with_existing_session_log() {
|
||||
fs::write(&session_log_path, "{}").expect("Failed to write session log");
|
||||
|
||||
// Create a continuation pointing to the existing session log
|
||||
let continuation = SessionContinuation::new(
|
||||
let continuation = SessionContinuation::new(false, None,
|
||||
"valid_test".to_string(),
|
||||
Some("Summary".to_string()),
|
||||
session_log_path.to_string_lossy().to_string(),
|
||||
@@ -237,9 +338,9 @@ fn test_has_valid_continuation_with_existing_session_log() {
|
||||
#[test]
|
||||
fn test_continuation_serialization_format() {
|
||||
let _lock = TEST_MUTEX.lock().unwrap();
|
||||
let (_temp_dir, original_dir) = setup_test_env();
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
let continuation = SessionContinuation::new(
|
||||
let continuation = SessionContinuation::new(false, None,
|
||||
"format_test".to_string(),
|
||||
Some("Test summary".to_string()),
|
||||
"/path/to/session.json".to_string(),
|
||||
@@ -273,7 +374,7 @@ fn test_multiple_saves_update_symlink() {
|
||||
let (temp_dir, original_dir) = setup_test_env();
|
||||
|
||||
// Save first continuation
|
||||
let first = SessionContinuation::new(
|
||||
let first = SessionContinuation::new(false, None,
|
||||
"first_session".to_string(),
|
||||
Some("First summary".to_string()),
|
||||
"/path/first.json".to_string(),
|
||||
@@ -289,7 +390,7 @@ fn test_multiple_saves_update_symlink() {
|
||||
assert!(first_target.to_string_lossy().contains("first_session"));
|
||||
|
||||
// Save second continuation (should update symlink)
|
||||
let second = SessionContinuation::new(
|
||||
let second = SessionContinuation::new(false, None,
|
||||
"second_session".to_string(),
|
||||
Some("Second summary".to_string()),
|
||||
"/path/second.json".to_string(),
|
||||
@@ -334,7 +435,7 @@ fn test_symlink_migration_from_old_directory() {
|
||||
.expect("Failed to write old latest.json");
|
||||
|
||||
// Save a new continuation - this should migrate the old directory to a symlink
|
||||
let continuation = SessionContinuation::new(
|
||||
let continuation = SessionContinuation::new(false, None,
|
||||
"new_session".to_string(),
|
||||
Some("New summary".to_string()),
|
||||
"/path/to/session.json".to_string(),
|
||||
|
||||
Reference in New Issue
Block a user