diff --git a/Cargo.lock b/Cargo.lock index b3cf969..38b9eb8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,28 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "accessibility" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ac9f33ffc1ef16eddb2451c03c983e56a5182ac760c3f2733da55ba8f48eac4" +dependencies = [ + "accessibility-sys", + "cocoa 0.26.1", + "core-foundation 0.10.1", + "objc", + "thiserror 1.0.69", +] + +[[package]] +name = "accessibility-sys" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46a6a8e90a1d8b96a48249e7c8f5b4058447bea8847280db7bfccb6dcab6b8e1" +dependencies = [ + "core-foundation-sys", +] + [[package]] name = "adler2" version = "2.0.1" @@ -437,9 +459,25 @@ checksum = "f6140449f97a6e97f9511815c5632d84c8aacf8ac271ad77c559218161a1373c" dependencies = [ "bitflags 1.3.2", "block", - "cocoa-foundation", + "cocoa-foundation 0.1.2", "core-foundation 0.9.4", - "core-graphics", + "core-graphics 0.23.2", + "foreign-types 0.5.0", + "libc", + "objc", +] + +[[package]] +name = "cocoa" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad36507aeb7e16159dfe68db81ccc27571c3ccd4b76fb2fb72fc59e7a4b1b64c" +dependencies = [ + "bitflags 2.10.0", + "block", + "cocoa-foundation 0.2.1", + "core-foundation 0.10.1", + "core-graphics 0.24.0", "foreign-types 0.5.0", "libc", "objc", @@ -454,11 +492,24 @@ dependencies = [ "bitflags 1.3.2", "block", "core-foundation 0.9.4", - "core-graphics-types", + "core-graphics-types 0.1.3", "libc", "objc", ] +[[package]] +name = "cocoa-foundation" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81411967c50ee9a1fc11365f8c585f863a22a9697c89239c452292c40ba79b0d" +dependencies = [ + "bitflags 2.10.0", + "block", + "core-foundation 0.10.1", + "core-graphics-types 0.2.0", + "objc", +] + [[package]] name = "color_quant" version = "1.1.0" @@ -635,7 +686,20 @@ checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081" dependencies = [ "bitflags 1.3.2", "core-foundation 0.9.4", - "core-graphics-types", + "core-graphics-types 0.1.3", + "foreign-types 0.5.0", + "libc", +] + +[[package]] +name = "core-graphics" +version = "0.24.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa95a34622365fa5bbf40b20b75dba8dfa8c94c734aea8ac9a5ca38af14316f1" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", + "core-graphics-types 0.2.0", "foreign-types 0.5.0", "libc", ] @@ -651,6 +715,17 @@ dependencies = [ "libc", ] +[[package]] +name = "core-graphics-types" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb" +dependencies = [ + "bitflags 2.10.0", + "core-foundation 0.10.1", + "libc", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -1287,11 +1362,12 @@ dependencies = [ name = "g3-computer-control" version = "0.1.0" dependencies = [ + "accessibility", "anyhow", "async-trait", - "cocoa", - "core-foundation 0.9.4", - "core-graphics", + "cocoa 0.25.0", + "core-foundation 0.10.1", + "core-graphics 0.23.2", "fantoccini", "image", "objc", diff --git a/README.md b/README.md index e3aefc1..8bb807c 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,7 @@ These commands give you fine-grained control over context management, allowing y - **TODO Management**: Read and write TODO lists with markdown checkbox format - **Computer Control** (Experimental): Automate desktop applications - Mouse and keyboard control + - macOS Accessibility API for native app automation (via `--macax` flag) - UI element inspection - Screenshot capture and window management - OCR text extraction from images and screen regions @@ -166,6 +167,19 @@ safaridriver --enable # Requires password **Usage**: Run G3 with the `--webdriver` flag to enable browser automation tools. +## macOS Accessibility API Tools + +G3 includes support for controlling macOS applications via the Accessibility API, allowing you to automate native macOS apps. + +**Available Tools**: `macax_list_apps`, `macax_get_frontmost_app`, `macax_activate_app`, `macax_get_ui_tree`, `macax_find_elements`, `macax_click`, `macax_set_value`, `macax_get_value`, `macax_press_key` + +**Setup**: Enable with the `--macax` flag or in config with `macax.enabled = true`. Grant accessibility permissions: +- **macOS**: System Preferences β†’ Security & Privacy β†’ Privacy β†’ Accessibility β†’ Add your terminal app + +**For detailed documentation**, see [macOS Accessibility Tools Guide](docs/macax-tools.md). + +**Note**: This is particularly useful for testing and automating apps you're building with G3, as you can add accessibility identifiers to your UI elements. + ## Computer Control (Experimental) G3 can interact with your computer's GUI for automation tasks: diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 1382bd5..32000b3 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -239,6 +239,10 @@ pub struct Cli { /// Disable log file creation (no logs/ directory or session logs) #[arg(long)] pub quiet: bool, + + /// Enable macOS Accessibility API tools for native app automation + #[arg(long)] + pub macax: bool, } pub async fn run() -> Result<()> { @@ -433,12 +437,20 @@ Output ONLY the markdown content, no explanations or meta-commentary."#, } // Load configuration with CLI overrides - let config = Config::load_with_overrides( + let mut config = Config::load_with_overrides( cli.config.as_deref(), cli.provider.clone(), cli.model.clone(), )?; + // Apply macax flag override + if cli.macax { + config.macax.enabled = true; + if !cli.retro { + info!("macOS Accessibility API tools enabled"); + } + } + // Validate provider if specified if let Some(ref provider) = cli.provider { let valid_providers = ["anthropic", "databricks", "embedded", "openai"]; diff --git a/crates/g3-computer-control/Cargo.toml b/crates/g3-computer-control/Cargo.toml index 9aa522c..4300dc1 100644 --- a/crates/g3-computer-control/Cargo.toml +++ b/crates/g3-computer-control/Cargo.toml @@ -26,9 +26,10 @@ tesseract = "0.14" # macOS dependencies [target.'cfg(target_os = "macos")'.dependencies] core-graphics = "0.23" -core-foundation = "0.9" +core-foundation = "0.10" cocoa = "0.25" objc = "0.2" +accessibility = "0.2" image = "0.24" # Linux dependencies diff --git a/crates/g3-computer-control/examples/macax_demo.rs b/crates/g3-computer-control/examples/macax_demo.rs new file mode 100644 index 0000000..ff1398d --- /dev/null +++ b/crates/g3-computer-control/examples/macax_demo.rs @@ -0,0 +1,74 @@ +//! Example demonstrating macOS Accessibility API tools +//! +//! This example shows how to use the macax tools to control macOS applications. +//! +//! Run with: cargo run --example macax_demo + +use anyhow::Result; +use g3_computer_control::MacAxController; + +#[tokio::main] +async fn main() -> Result<()> { + println!("🍎 macOS Accessibility API Demo\n"); + println!("This demo shows how to control macOS applications using the Accessibility API.\n"); + + // Create controller + let controller = MacAxController::new()?; + println!("βœ… MacAxController initialized\n"); + + // List running applications + println!("πŸ“± Listing running applications:"); + match controller.list_applications() { + Ok(apps) => { + for app in apps.iter().take(10) { + println!(" - {}", app.name); + } + if apps.len() > 10 { + println!(" ... and {} more", apps.len() - 10); + } + } + Err(e) => println!(" ❌ Error: {}", e), + } + println!(); + + // Get frontmost app + println!("🎯 Getting frontmost application:"); + match controller.get_frontmost_app() { + Ok(app) => println!(" Current: {}", app.name), + Err(e) => println!(" ❌ Error: {}", e), + } + println!(); + + // Example: Activate Finder and get its UI tree + println!("πŸ“‚ Activating Finder and inspecting UI:"); + match controller.activate_app("Finder") { + Ok(_) => { + println!(" βœ… Finder activated"); + + // Wait a moment for activation + tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; + + // Get UI tree + match controller.get_ui_tree("Finder", 2) { + Ok(tree) => { + println!("\n UI Tree:"); + for line in tree.lines().take(10) { + println!(" {}", line); + } + } + Err(e) => println!(" ❌ Error getting UI tree: {}", e), + } + } + Err(e) => println!(" ❌ Error: {}", e), + } + println!(); + + println!("✨ Demo complete!\n"); + println!("πŸ’‘ Tips:"); + println!(" - Use --macax flag with g3 to enable these tools"); + println!(" - Grant accessibility permissions in System Preferences"); + println!(" - Add accessibility identifiers to your apps for easier automation"); + println!(" - See docs/macax-tools.md for full documentation\n"); + + Ok(()) +} diff --git a/crates/g3-computer-control/examples/test_type_text.rs b/crates/g3-computer-control/examples/test_type_text.rs new file mode 100644 index 0000000..2d1aea0 --- /dev/null +++ b/crates/g3-computer-control/examples/test_type_text.rs @@ -0,0 +1,48 @@ +//! Test the new type_text functionality + +use anyhow::Result; +use g3_computer_control::MacAxController; + +#[tokio::main] +async fn main() -> Result<()> { + println!("πŸ§ͺ Testing macax type_text functionality\n"); + + let controller = MacAxController::new()?; + println!("βœ… Controller initialized\n"); + + // Test 1: Type simple text + println!("Test 1: Typing simple text into TextEdit"); + println!(" Please open TextEdit and create a new document..."); + std::thread::sleep(std::time::Duration::from_secs(3)); + + match controller.type_text("TextEdit", "Hello, World!") { + Ok(_) => println!(" βœ… Successfully typed simple text\n"), + Err(e) => println!(" ❌ Failed: {}\n", e), + } + + std::thread::sleep(std::time::Duration::from_secs(1)); + + // Test 2: Type unicode and emojis + println!("Test 2: Typing unicode and emojis"); + match controller.type_text("TextEdit", "\n🌟 Unicode test: cafΓ©, naΓ―ve, ζ—₯本θͺž πŸŽ‰") { + Ok(_) => println!(" βœ… Successfully typed unicode text\n"), + Err(e) => println!(" ❌ Failed: {}\n", e), + } + + std::thread::sleep(std::time::Duration::from_secs(1)); + + // Test 3: Type special characters + println!("Test 3: Typing special characters"); + match controller.type_text("TextEdit", "\nSpecial: @#$%^&*()_+-=[]{}|;':,.<>?/") { + Ok(_) => println!(" βœ… Successfully typed special characters\n"), + Err(e) => println!(" ❌ Failed: {}\n", e), + } + + println!("\n✨ Tests complete!"); + println!("\nπŸ’‘ Now try with Things3:"); + println!(" 1. Open Things3"); + println!(" 2. Press Cmd+N to create a new task"); + println!(" 3. Run: g3 --macax 'type \"🌟 My awesome task\" into Things'"); + + Ok(()) +} diff --git a/crates/g3-computer-control/src/lib.rs b/crates/g3-computer-control/src/lib.rs index 5c72d65..2eb686c 100644 --- a/crates/g3-computer-control/src/lib.rs +++ b/crates/g3-computer-control/src/lib.rs @@ -1,10 +1,14 @@ pub mod types; pub mod platform; pub mod webdriver; +pub mod macax; // Re-export webdriver types for convenience pub use webdriver::{WebDriverController, WebElement, safari::SafariDriver}; +// Re-export macax types for convenience +pub use macax::{MacAxController, AXElement, AXApplication}; + use anyhow::Result; use async_trait::async_trait; use types::*; diff --git a/crates/g3-computer-control/src/macax/controller.rs b/crates/g3-computer-control/src/macax/controller.rs new file mode 100644 index 0000000..a887714 --- /dev/null +++ b/crates/g3-computer-control/src/macax/controller.rs @@ -0,0 +1,826 @@ +use super::{AXApplication, AXElement}; +use anyhow::{Context, Result}; +use std::collections::HashMap; + +#[cfg(target_os = "macos")] +use accessibility::{AXUIElement, AXUIElementAttributes, ElementFinder, TreeVisitor, TreeWalker, TreeWalkerFlow}; + +#[cfg(target_os = "macos")] +use core_foundation::base::TCFType; + +#[cfg(target_os = "macos")] +use core_foundation::string::CFString; + +#[cfg(target_os = "macos")] +use core_foundation::boolean::CFBoolean; + +/// macOS Accessibility API controller using native APIs +pub struct MacAxController { + // Cache for application elements + app_cache: std::sync::Mutex>, +} + +impl MacAxController { + pub fn new() -> Result { + #[cfg(target_os = "macos")] + { + // Check if we have accessibility permissions by trying to get system-wide element + let _system = AXUIElement::system_wide(); + + Ok(Self { + app_cache: std::sync::Mutex::new(HashMap::new()), + }) + } + + #[cfg(not(target_os = "macos"))] + { + anyhow::bail!("macOS Accessibility API is only available on macOS") + } + } + + /// List all running applications + #[cfg(target_os = "macos")] + pub fn list_applications(&self) -> Result> { + let apps = Self::get_running_applications()?; + Ok(apps) + } + + #[cfg(not(target_os = "macos"))] + pub fn list_applications(&self) -> Result> { + anyhow::bail!("Not supported on this platform") + } + + #[cfg(target_os = "macos")] + fn get_running_applications() -> Result> { + use cocoa::appkit::NSApplicationActivationPolicy; + use cocoa::base::{id, nil}; + use objc::{class, msg_send, sel, sel_impl}; + + unsafe { + let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace]; + let running_apps: id = msg_send![workspace, runningApplications]; + let count: usize = msg_send![running_apps, count]; + + let mut apps = Vec::new(); + + for i in 0..count { + let app: id = msg_send![running_apps, objectAtIndex: i]; + + // Get app name + let localized_name: id = msg_send![app, localizedName]; + if localized_name == nil { + continue; + } + let name_ptr: *const i8 = msg_send![localized_name, UTF8String]; + let name = if !name_ptr.is_null() { + std::ffi::CStr::from_ptr(name_ptr) + .to_string_lossy() + .to_string() + } else { + continue; + }; + + // Get bundle ID + let bundle_id_obj: id = msg_send![app, bundleIdentifier]; + let bundle_id = if bundle_id_obj != nil { + let bundle_id_ptr: *const i8 = msg_send![bundle_id_obj, UTF8String]; + if !bundle_id_ptr.is_null() { + Some( + std::ffi::CStr::from_ptr(bundle_id_ptr) + .to_string_lossy() + .to_string(), + ) + } else { + None + } + } else { + None + }; + + // Get PID + let pid: i32 = msg_send![app, processIdentifier]; + + // Skip background-only apps + let activation_policy: i64 = msg_send![app, activationPolicy]; + if activation_policy == NSApplicationActivationPolicy::NSApplicationActivationPolicyRegular as i64 { + apps.push(AXApplication { + name, + bundle_id, + pid, + }); + } + } + + Ok(apps) + } + } + + /// Get the frontmost (active) application + #[cfg(target_os = "macos")] + pub fn get_frontmost_app(&self) -> Result { + use cocoa::base::{id, nil}; + use objc::{class, msg_send, sel, sel_impl}; + + unsafe { + let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace]; + let frontmost_app: id = msg_send![workspace, frontmostApplication]; + + if frontmost_app == nil { + anyhow::bail!("No frontmost application"); + } + + // Get app name + let localized_name: id = msg_send![frontmost_app, localizedName]; + let name_ptr: *const i8 = msg_send![localized_name, UTF8String]; + let name = std::ffi::CStr::from_ptr(name_ptr) + .to_string_lossy() + .to_string(); + + // Get bundle ID + let bundle_id_obj: id = msg_send![frontmost_app, bundleIdentifier]; + let bundle_id = if bundle_id_obj != nil { + let bundle_id_ptr: *const i8 = msg_send![bundle_id_obj, UTF8String]; + if !bundle_id_ptr.is_null() { + Some( + std::ffi::CStr::from_ptr(bundle_id_ptr) + .to_string_lossy() + .to_string(), + ) + } else { + None + } + } else { + None + }; + + // Get PID + let pid: i32 = msg_send![frontmost_app, processIdentifier]; + + Ok(AXApplication { + name, + bundle_id, + pid, + }) + } + } + + #[cfg(not(target_os = "macos"))] + pub fn get_frontmost_app(&self) -> Result { + anyhow::bail!("Not supported on this platform") + } + + /// Get AXUIElement for an application by name or PID + #[cfg(target_os = "macos")] + fn get_app_element(&self, app_name: &str) -> Result { + // Check cache first + { + let cache = self.app_cache.lock().unwrap(); + if let Some(element) = cache.get(app_name) { + return Ok(element.clone()); + } + } + + // Find the app by name + let apps = Self::get_running_applications()?; + let app = apps + .iter() + .find(|a| a.name == app_name) + .ok_or_else(|| anyhow::anyhow!("Application '{}' not found", app_name))?; + + // Create AXUIElement for the app + let element = AXUIElement::application(app.pid); + + // Cache it + { + let mut cache = self.app_cache.lock().unwrap(); + cache.insert(app_name.to_string(), element.clone()); + } + + Ok(element) + } + + /// Activate (bring to front) an application + #[cfg(target_os = "macos")] + pub fn activate_app(&self, app_name: &str) -> Result<()> { + use cocoa::base::{id, nil}; + use objc::{class, msg_send, sel, sel_impl}; + + // Find the app + let apps = Self::get_running_applications()?; + let app = apps + .iter() + .find(|a| a.name == app_name) + .ok_or_else(|| anyhow::anyhow!("Application '{}' not found", app_name))?; + + unsafe { + let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace]; + let running_apps: id = msg_send![workspace, runningApplications]; + let count: usize = msg_send![running_apps, count]; + + for i in 0..count { + let running_app: id = msg_send![running_apps, objectAtIndex: i]; + let pid: i32 = msg_send![running_app, processIdentifier]; + + if pid == app.pid { + let _: bool = msg_send![running_app, activateWithOptions: 0]; + return Ok(()); + } + } + } + + anyhow::bail!("Failed to activate application") + } + + #[cfg(not(target_os = "macos"))] + pub fn activate_app(&self, _app_name: &str) -> Result<()> { + anyhow::bail!("Not supported on this platform") + } + + /// Get the UI hierarchy of an application + #[cfg(target_os = "macos")] + pub fn get_ui_tree(&self, app_name: &str, max_depth: usize) -> Result { + let app_element = self.get_app_element(app_name)?; + let mut output = format!("Application: {}\n", app_name); + + Self::build_ui_tree(&app_element, &mut output, 0, max_depth)?; + + Ok(output) + } + + #[cfg(not(target_os = "macos"))] + pub fn get_ui_tree(&self, _app_name: &str, _max_depth: usize) -> Result { + anyhow::bail!("Not supported on this platform") + } + + #[cfg(target_os = "macos")] + fn build_ui_tree( + element: &AXUIElement, + output: &mut String, + depth: usize, + max_depth: usize, + ) -> Result<()> { + if depth >= max_depth { + return Ok(()); + } + + let indent = " ".repeat(depth); + + // Get role + let role = element.role().ok().map(|s| s.to_string()) + .unwrap_or_else(|| "Unknown".to_string()); + + // Get title + let title = element.title().ok() + .map(|s| s.to_string()); + + // Get identifier + let identifier = element.identifier().ok() + .map(|s| s.to_string()); + + // Format output + output.push_str(&format!("{}Role: {}", indent, role)); + if let Some(t) = title { + output.push_str(&format!(", Title: {}", t)); + } + if let Some(id) = identifier { + output.push_str(&format!(", ID: {}", id)); + } + output.push('\n'); + + // Get children + if let Ok(children) = element.children() { + for i in 0..children.len() { + if let Some(child) = children.get(i) { + let _ = Self::build_ui_tree(&child, output, depth + 1, max_depth); + } + } + } + + Ok(()) + } + + /// Find UI elements in an application + #[cfg(target_os = "macos")] + pub fn find_elements( + &self, + app_name: &str, + role: Option<&str>, + title: Option<&str>, + identifier: Option<&str>, + ) -> Result> { + let app_element = self.get_app_element(app_name)?; + let mut found_elements = Vec::new(); + + let visitor = ElementCollector { + role_filter: role.map(|s| s.to_string()), + title_filter: title.map(|s| s.to_string()), + identifier_filter: identifier.map(|s| s.to_string()), + results: std::cell::RefCell::new(&mut found_elements), + depth: std::cell::Cell::new(0), + }; + + let walker = TreeWalker::new(); + walker.walk(&app_element, &visitor); + + Ok(found_elements) + } + + #[cfg(not(target_os = "macos"))] + pub fn find_elements( + &self, + _app_name: &str, + _role: Option<&str>, + _title: Option<&str>, + _identifier: Option<&str>, + ) -> Result> { + anyhow::bail!("Not supported on this platform") + } + + /// Find a single element (helper for click, set_value, etc.) + #[cfg(target_os = "macos")] + fn find_element( + &self, + app_name: &str, + role: &str, + title: Option<&str>, + identifier: Option<&str>, + ) -> Result { + let app_element = self.get_app_element(app_name)?; + + let role_str = role.to_string(); + let title_str = title.map(|s| s.to_string()); + let identifier_str = identifier.map(|s| s.to_string()); + + let finder = ElementFinder::new( + &app_element, + move |element| { + // Check role + let elem_role = element.role() + .ok() + .map(|s| s.to_string()); + + if let Some(r) = elem_role { + if !r.contains(&role_str) { + return false; + } + } else { + return false; + } + + // Check title if specified + if let Some(ref title_filter) = title_str { + let elem_title = element.title() + .ok() + .map(|s| s.to_string()); + + if let Some(t) = elem_title { + if !t.contains(title_filter) { + return false; + } + } else { + return false; + } + } + + // Check identifier if specified + if let Some(ref id_filter) = identifier_str { + let elem_id = element.identifier() + .ok() + .map(|s| s.to_string()); + + if let Some(id) = elem_id { + if !id.contains(id_filter) { + return false; + } + } else { + return false; + } + } + + true + }, + Some(std::time::Duration::from_secs(2)), + ); + + finder.find().context("Element not found") + } + + /// Click on a UI element + #[cfg(target_os = "macos")] + pub fn click_element( + &self, + app_name: &str, + role: &str, + title: Option<&str>, + identifier: Option<&str>, + ) -> Result<()> { + let element = self.find_element(app_name, role, title, identifier)?; + + // Perform the press action + let action_name = CFString::new("AXPress"); + element + .perform_action(&action_name) + .map_err(|e| anyhow::anyhow!("Failed to perform press action: {:?}", e))?; + + Ok(()) + } + + #[cfg(not(target_os = "macos"))] + pub fn click_element( + &self, + _app_name: &str, + _role: &str, + _title: Option<&str>, + _identifier: Option<&str>, + ) -> Result<()> { + anyhow::bail!("Not supported on this platform") + } + + /// Set the value of a UI element + #[cfg(target_os = "macos")] + pub fn set_value( + &self, + app_name: &str, + role: &str, + value: &str, + title: Option<&str>, + identifier: Option<&str>, + ) -> Result<()> { + let element = self.find_element(app_name, role, title, identifier)?; + + // Set the value - convert CFString to CFType + let cf_value = CFString::new(value); + + element.set_value(cf_value.as_CFType()) + .map_err(|e| anyhow::anyhow!("Failed to set value: {:?}", e))?; + + Ok(()) + } + + #[cfg(not(target_os = "macos"))] + pub fn set_value( + &self, + _app_name: &str, + _role: &str, + _value: &str, + _title: Option<&str>, + _identifier: Option<&str>, + ) -> Result<()> { + anyhow::bail!("Not supported on this platform") + } + + /// Get the value of a UI element + #[cfg(target_os = "macos")] + pub fn get_value( + &self, + app_name: &str, + role: &str, + title: Option<&str>, + identifier: Option<&str>, + ) -> Result { + let element = self.find_element(app_name, role, title, identifier)?; + + // Get the value + let value_type = element.value() + .map_err(|e| anyhow::anyhow!("Failed to get value: {:?}", e))?; + + // Try to downcast to CFString + if let Some(cf_string) = value_type.downcast::() { + Ok(cf_string.to_string()) + } else { + // For non-string values, try to get a description + Ok(format!("")) + } + } + + #[cfg(not(target_os = "macos"))] + pub fn get_value( + &self, + _app_name: &str, + _role: &str, + _title: Option<&str>, + _identifier: Option<&str>, + ) -> Result { + anyhow::bail!("Not supported on this platform") + } + + /// Type text into the currently focused element (uses system text input) + #[cfg(target_os = "macos")] + pub fn type_text(&self, app_name: &str, text: &str) -> Result<()> { + use cocoa::appkit::NSPasteboard; + use cocoa::base::{id, nil}; + use cocoa::foundation::NSString; + use objc::{class, msg_send, sel, sel_impl}; + + // First, make sure the app is active + self.activate_app(app_name)?; + + // Wait for app to fully activate + std::thread::sleep(std::time::Duration::from_millis(500)); + + // Send a Tab key to try to focus on a text field + // This helps ensure something is focused before we paste + let _ = self.press_key(app_name, "tab", vec![]); + std::thread::sleep(std::time::Duration::from_millis(800)); + + // Save old clipboard, set new content, paste, then restore + let old_content: id; + unsafe { + // Get the general pasteboard + let pasteboard: id = msg_send![class!(NSPasteboard), generalPasteboard]; + + // Save current clipboard content + let ns_string_type = NSString::alloc(nil).init_str("public.utf8-plain-text"); + old_content = msg_send![pasteboard, stringForType: ns_string_type]; + + // Clear and set new content + let _: () = msg_send![pasteboard, clearContents]; + + let ns_string = NSString::alloc(nil).init_str(text); + let ns_type = NSString::alloc(nil).init_str("public.utf8-plain-text"); + let _: bool = msg_send![pasteboard, setString:ns_string forType:ns_type]; + } + + // Wait a moment for clipboard to update + std::thread::sleep(std::time::Duration::from_millis(200)); + + // Paste using Cmd+V (outside unsafe block) + self.press_key(app_name, "v", vec!["command"])?; + + // Wait for paste to complete + std::thread::sleep(std::time::Duration::from_millis(300)); + + // Restore old clipboard content if it existed + unsafe { + if old_content != nil { + let pasteboard: id = msg_send![class!(NSPasteboard), generalPasteboard]; + let _: () = msg_send![pasteboard, clearContents]; + let ns_type = NSString::alloc(nil).init_str("public.utf8-plain-text"); + let _: bool = msg_send![pasteboard, setString:old_content forType:ns_type]; + } + } + + Ok(()) + } + + #[cfg(not(target_os = "macos"))] + pub fn type_text(&self, _app_name: &str, _text: &str) -> Result<()> { + anyhow::bail!("Not supported on this platform") + } + + /// Focus on a text field or text area element + #[cfg(target_os = "macos")] + pub fn focus_element( + &self, + app_name: &str, + role: &str, + title: Option<&str>, + identifier: Option<&str>, + ) -> Result<()> { + let element = self.find_element(app_name, role, title, identifier)?; + + // Set focused attribute to true + use core_foundation::boolean::CFBoolean; + let cf_true = CFBoolean::true_value(); + + element.set_attribute(&accessibility::AXAttribute::focused(), cf_true) + .map_err(|e| anyhow::anyhow!("Failed to focus element: {:?}", e))?; + + Ok(()) + } + + /// Press a keyboard shortcut + #[cfg(target_os = "macos")] + pub fn press_key( + &self, + app_name: &str, + key: &str, + modifiers: Vec<&str>, + ) -> Result<()> { + use core_graphics::event::{ + CGEvent, CGEventFlags, CGEventTapLocation, + }; + use core_graphics::event_source::{CGEventSource, CGEventSourceStateID}; + + // First, make sure the app is active + self.activate_app(app_name)?; + + // Wait a bit for activation + std::thread::sleep(std::time::Duration::from_millis(100)); + + // Map key string to key code + let key_code = Self::key_to_keycode(key) + .ok_or_else(|| anyhow::anyhow!("Unknown key: {}", key))?; + + // Map modifiers to flags + let mut flags = CGEventFlags::CGEventFlagNull; + for modifier in modifiers { + match modifier.to_lowercase().as_str() { + "command" | "cmd" => flags |= CGEventFlags::CGEventFlagCommand, + "option" | "alt" => flags |= CGEventFlags::CGEventFlagAlternate, + "control" | "ctrl" => flags |= CGEventFlags::CGEventFlagControl, + "shift" => flags |= CGEventFlags::CGEventFlagShift, + _ => {} + } + } + + // Create event source + let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState) + .ok().context("Failed to create event source")?; + + // Create key down event + let key_down = CGEvent::new_keyboard_event(source.clone(), key_code, true) + .ok().context("Failed to create key down event")?; + key_down.set_flags(flags); + + // Create key up event + let key_up = CGEvent::new_keyboard_event(source, key_code, false) + .ok().context("Failed to create key up event")?; + key_up.set_flags(flags); + + // Post events + key_down.post(CGEventTapLocation::HID); + std::thread::sleep(std::time::Duration::from_millis(50)); + key_up.post(CGEventTapLocation::HID); + + Ok(()) + } + + #[cfg(not(target_os = "macos"))] + pub fn press_key( + &self, + _app_name: &str, + _key: &str, + _modifiers: Vec<&str>, + ) -> Result<()> { + anyhow::bail!("Not supported on this platform") + } + + #[cfg(target_os = "macos")] + fn key_to_keycode(key: &str) -> Option { + // Map common keys to keycodes + // See: https://eastmanreference.com/complete-list-of-applescript-key-codes + match key.to_lowercase().as_str() { + "a" => Some(0x00), + "s" => Some(0x01), + "d" => Some(0x02), + "f" => Some(0x03), + "h" => Some(0x04), + "g" => Some(0x05), + "z" => Some(0x06), + "x" => Some(0x07), + "c" => Some(0x08), + "v" => Some(0x09), + "b" => Some(0x0B), + "q" => Some(0x0C), + "w" => Some(0x0D), + "e" => Some(0x0E), + "r" => Some(0x0F), + "y" => Some(0x10), + "t" => Some(0x11), + "1" => Some(0x12), + "2" => Some(0x13), + "3" => Some(0x14), + "4" => Some(0x15), + "6" => Some(0x16), + "5" => Some(0x17), + "=" => Some(0x18), + "9" => Some(0x19), + "7" => Some(0x1A), + "-" => Some(0x1B), + "8" => Some(0x1C), + "0" => Some(0x1D), + "]" => Some(0x1E), + "o" => Some(0x1F), + "u" => Some(0x20), + "[" => Some(0x21), + "i" => Some(0x22), + "p" => Some(0x23), + "return" | "enter" => Some(0x24), + "l" => Some(0x25), + "j" => Some(0x26), + "'" => Some(0x27), + "k" => Some(0x28), + ";" => Some(0x29), + "\\" => Some(0x2A), + "," => Some(0x2B), + "/" => Some(0x2C), + "n" => Some(0x2D), + "m" => Some(0x2E), + "." => Some(0x2F), + "tab" => Some(0x30), + "space" => Some(0x31), + "`" => Some(0x32), + "delete" | "backspace" => Some(0x33), + "escape" | "esc" => Some(0x35), + "f1" => Some(0x7A), + "f2" => Some(0x78), + "f3" => Some(0x63), + "f4" => Some(0x76), + "f5" => Some(0x60), + "f6" => Some(0x61), + "f7" => Some(0x62), + "f8" => Some(0x64), + "f9" => Some(0x65), + "f10" => Some(0x6D), + "f11" => Some(0x67), + "f12" => Some(0x6F), + "left" => Some(0x7B), + "right" => Some(0x7C), + "down" => Some(0x7D), + "up" => Some(0x7E), + _ => None, + } + } +} + +#[cfg(target_os = "macos")] +struct ElementCollector<'a> { + role_filter: Option, + title_filter: Option, + identifier_filter: Option, + results: std::cell::RefCell<&'a mut Vec>, + depth: std::cell::Cell, +} + +#[cfg(target_os = "macos")] +impl<'a> TreeVisitor for ElementCollector<'a> { + fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow { + self.depth.set(self.depth.get() + 1); + + if self.depth.get() > 20 { + return TreeWalkerFlow::SkipSubtree; + } + + // Get element properties + let role = element.role() + .ok() + .map(|s| s.to_string()) + .unwrap_or_else(|| "Unknown".to_string()); + + let title = element.title() + .ok() + .map(|s| s.to_string()); + + let identifier = element.identifier() + .ok() + .map(|s| s.to_string()); + + // Check if this element matches the filters + let role_matches = self.role_filter.as_ref().map_or(true, |r| role.contains(r)); + let title_matches = self.title_filter.as_ref().map_or(true, |t| { + title.as_ref().map_or(false, |title_str| title_str.contains(t)) + }); + let identifier_matches = self.identifier_filter.as_ref().map_or(true, |id| { + identifier.as_ref().map_or(false, |id_str| id_str.contains(id)) + }); + + if role_matches && title_matches && identifier_matches { + // Get additional properties + let value = element.value() + .ok() + .and_then(|v| { + v.downcast::().map(|s| s.to_string()) + }); + + let label = element.description() + .ok() + .map(|s| s.to_string()); + + let enabled = element.enabled() + .ok() + .map(|b| b.into()) + .unwrap_or(false); + + let focused = element.focused() + .ok() + .map(|b| b.into()) + .unwrap_or(false); + + // Count children + let children_count = element.children() + .ok() + .map(|arr| arr.len() as usize) + .unwrap_or(0); + + self.results.borrow_mut().push(AXElement { + role, + title, + value, + label, + identifier, + enabled, + focused, + position: None, + size: None, + children_count, + }); + } + + TreeWalkerFlow::Continue + } + + fn exit_element(&self, _element: &AXUIElement) { + self.depth.set(self.depth.get() - 1); + } +} diff --git a/crates/g3-computer-control/src/macax/mod.rs b/crates/g3-computer-control/src/macax/mod.rs new file mode 100644 index 0000000..b62e87d --- /dev/null +++ b/crates/g3-computer-control/src/macax/mod.rs @@ -0,0 +1,65 @@ +pub mod controller; + +pub use controller::MacAxController; + +use serde::{Deserialize, Serialize}; + +#[cfg(test)] +mod tests; + +/// Represents an accessibility element in the UI hierarchy +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AXElement { + pub role: String, + pub title: Option, + pub value: Option, + pub label: Option, + pub identifier: Option, + pub enabled: bool, + pub focused: bool, + pub position: Option<(f64, f64)>, + pub size: Option<(f64, f64)>, + pub children_count: usize, +} + +/// Represents a macOS application +#[derive(Debug, Clone)] +pub struct AXApplication { + pub name: String, + pub bundle_id: Option, + pub pid: i32, +} + +impl AXElement { + /// Convert to a human-readable string representation + pub fn to_string(&self) -> String { + let mut parts = vec![format!("Role: {}", self.role)]; + + if let Some(ref title) = self.title { + parts.push(format!("Title: {}", title)); + } + if let Some(ref value) = self.value { + parts.push(format!("Value: {}", value)); + } + if let Some(ref label) = self.label { + parts.push(format!("Label: {}", label)); + } + if let Some(ref id) = self.identifier { + parts.push(format!("ID: {}", id)); + } + + parts.push(format!("Enabled: {}", self.enabled)); + parts.push(format!("Focused: {}", self.focused)); + + if let Some((x, y)) = self.position { + parts.push(format!("Position: ({:.0}, {:.0})", x, y)); + } + if let Some((w, h)) = self.size { + parts.push(format!("Size: ({:.0}, {:.0})", w, h)); + } + + parts.push(format!("Children: {}", self.children_count)); + + parts.join(", ") + } +} diff --git a/crates/g3-computer-control/src/macax/tests.rs b/crates/g3-computer-control/src/macax/tests.rs new file mode 100644 index 0000000..01f44e3 --- /dev/null +++ b/crates/g3-computer-control/src/macax/tests.rs @@ -0,0 +1,37 @@ +#[cfg(test)] +mod tests { + use crate::{AXElement, MacAxController}; + + #[test] + fn test_ax_element_to_string() { + let element = AXElement { + role: "button".to_string(), + title: Some("Click Me".to_string()), + value: None, + label: Some("Submit Button".to_string()), + identifier: Some("submitBtn".to_string()), + enabled: true, + focused: false, + position: Some((100.0, 200.0)), + size: Some((80.0, 30.0)), + children_count: 0, + }; + + let string_repr = element.to_string(); + assert!(string_repr.contains("Role: button")); + assert!(string_repr.contains("Title: Click Me")); + assert!(string_repr.contains("Label: Submit Button")); + assert!(string_repr.contains("ID: submitBtn")); + assert!(string_repr.contains("Enabled: true")); + assert!(string_repr.contains("Position: (100, 200)")); + assert!(string_repr.contains("Size: (80, 30)")); + } + + #[test] + fn test_controller_creation() { + // Just test that we can create a controller + // Actual functionality requires macOS and permissions + let result = MacAxController::new(); + assert!(result.is_ok()); + } +} diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs index 4b6dc9d..272367d 100644 --- a/crates/g3-config/src/lib.rs +++ b/crates/g3-config/src/lib.rs @@ -8,6 +8,7 @@ pub struct Config { pub agent: AgentConfig, pub computer_control: ComputerControlConfig, pub webdriver: WebDriverConfig, + pub macax: MacAxConfig, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -79,6 +80,19 @@ pub struct WebDriverConfig { pub safari_port: u16, } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MacAxConfig { + pub enabled: bool, +} + +impl Default for MacAxConfig { + fn default() -> Self { + Self { + enabled: false, + } + } +} + impl Default for WebDriverConfig { fn default() -> Self { Self { @@ -124,6 +138,7 @@ impl Default for Config { }, computer_control: ComputerControlConfig::default(), webdriver: WebDriverConfig::default(), + macax: MacAxConfig::default(), } } } @@ -238,6 +253,7 @@ impl Config { }, computer_control: ComputerControlConfig::default(), webdriver: WebDriverConfig::default(), + macax: MacAxConfig::default(), } } diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 2106d3d..21e7bd0 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -551,6 +551,7 @@ pub struct Agent { todo_content: std::sync::Arc>, webdriver_session: std::sync::Arc>>>>, safaridriver_process: std::sync::Arc>>, + macax_controller: std::sync::Arc>>, } impl Agent { @@ -761,6 +762,9 @@ impl Agent { None }; + // Capture macax_enabled before moving config + let macax_enabled = config.macax.enabled; + Ok(Self { providers, context_window, @@ -777,6 +781,12 @@ impl Agent { computer_controller, webdriver_session: std::sync::Arc::new(tokio::sync::RwLock::new(None)), safaridriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)), + macax_controller: { + std::sync::Arc::new(tokio::sync::RwLock::new( + if macax_enabled { Some(g3_computer_control::MacAxController::new()?) } + else { None } + )) + }, }) } @@ -1088,7 +1098,7 @@ Template: // Check if provider supports native tool calling and add tools if so let provider = self.providers.get(None)?; let tools = if provider.has_native_tool_calling() { - Some(Self::create_tool_definitions(self.config.webdriver.enabled)) + Some(Self::create_tool_definitions(self.config.webdriver.enabled, self.config.macax.enabled)) } else { None }; @@ -1549,7 +1559,7 @@ Template: } /// Create tool definitions for native tool calling providers - fn create_tool_definitions(enable_webdriver: bool) -> Vec { + fn create_tool_definitions(enable_webdriver: bool, enable_macax: bool) -> Vec { let mut tools = vec![ Tool { name: "shell".to_string(), @@ -1904,6 +1914,231 @@ Template: ]); } + // Add macOS Accessibility tools if enabled + if enable_macax { + tools.extend(vec![ + Tool { + name: "macax_list_apps".to_string(), + description: "List all running applications that can be controlled via macOS Accessibility API".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "macax_get_frontmost_app".to_string(), + description: "Get the name of the currently active (frontmost) application".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }, + Tool { + name: "macax_activate_app".to_string(), + description: "Bring an application to the front (activate it)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application to activate (e.g., 'Safari', 'TextEdit')" + } + }, + "required": ["app_name"] + }), + }, + Tool { + name: "macax_get_ui_tree".to_string(), + description: "Get the UI element hierarchy of an application as a tree structure".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "max_depth": { + "type": "integer", + "description": "Maximum depth to traverse (default: 3)" + } + }, + "required": ["app_name"] + }), + }, + Tool { + name: "macax_find_elements".to_string(), + description: "Find UI elements in an application by role, title, or identifier. Use this to locate buttons, text fields, etc.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "role": { + "type": "string", + "description": "UI element role (e.g., 'button', 'text field', 'window')" + }, + "title": { + "type": "string", + "description": "Element title or label to match" + }, + "identifier": { + "type": "string", + "description": "Element identifier (accessibility identifier)" + } + }, + "required": ["app_name"] + }), + }, + Tool { + name: "macax_click".to_string(), + description: "Click a UI element in an application".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "role": { + "type": "string", + "description": "UI element role (e.g., 'button')" + }, + "title": { + "type": "string", + "description": "Element title or label" + }, + "identifier": { + "type": "string", + "description": "Element identifier" + } + }, + "required": ["app_name", "role"] + }), + }, + Tool { + name: "macax_set_value".to_string(), + description: "Set the value of a UI element (e.g., type into a text field)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "role": { + "type": "string", + "description": "UI element role (e.g., 'text field')" + }, + "value": { + "type": "string", + "description": "Value to set" + }, + "title": { + "type": "string", + "description": "Element title or label" + }, + "identifier": { + "type": "string", + "description": "Element identifier" + } + }, + "required": ["app_name", "role", "value"] + }), + }, + Tool { + name: "macax_get_value".to_string(), + description: "Get the value of a UI element (e.g., read text from a text field)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "role": { + "type": "string", + "description": "UI element role (e.g., 'text field')" + }, + "title": { + "type": "string", + "description": "Element title or label" + }, + "identifier": { + "type": "string", + "description": "Element identifier" + } + }, + "required": ["app_name", "role"] + }), + }, + Tool { + name: "macax_press_key".to_string(), + description: "Press a keyboard key or shortcut in an application (e.g., Cmd+S to save)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "key": { + "type": "string", + "description": "Key to press (e.g., 's', 'return', 'tab')" + }, + "modifiers": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Modifier keys (e.g., ['command', 'shift'])" + } + }, + "required": ["app_name", "key"] + }), + }, + ]); + + // Add type_text tool for typing arbitrary text + tools.push(Tool { + name: "macax_type_text".to_string(), + description: "Type arbitrary text into the currently focused element in an application (supports unicode, emojis, etc.)".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": { + "type": "string", + "description": "Name of the application" + }, + "text": { + "type": "string", + "description": "Text to type (can include unicode, emojis, special characters)" + } + }, + "required": ["app_name", "text"] + }), + }); + + // Add focus_element tool + tools.push(Tool { + name: "macax_focus_element".to_string(), + description: "Focus on a UI element (text field, text area, etc.) before typing".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "app_name": {"type": "string", "description": "Name of the application"}, + "role": {"type": "string", "description": "UI element role (e.g., 'text field', 'text area')"}, + "title": {"type": "string", "description": "Element title or label (optional)"}, + "identifier": {"type": "string", "description": "Element accessibility identifier (optional)"} + }, + "required": ["app_name", "role"] + }), + }); + } + tools } @@ -2469,7 +2704,7 @@ Template: // Ensure tools are included for native providers in subsequent iterations if provider.has_native_tool_calling() { - request.tools = Some(Self::create_tool_definitions(self.config.webdriver.enabled)); + request.tools = Some(Self::create_tool_definitions(self.config.webdriver.enabled, self.config.macax.enabled)); } // Only add to full_response if we haven't already added it @@ -3829,6 +4064,331 @@ Template: Err(_) => Ok("❌ Cannot quit: WebDriver session is still in use".to_string()), } } + "macax_list_apps" => { + debug!("Processing macax_list_apps tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.list_applications() { + Ok(apps) => { + let app_list: Vec = apps.iter().map(|a| a.name.clone()).collect(); + Ok(format!("Running applications:\n{}", app_list.join("\n"))) + } + Err(e) => Ok(format!("❌ Failed to list applications: {}", e)), + } + } + "macax_get_frontmost_app" => { + debug!("Processing macax_get_frontmost_app tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.get_frontmost_app() { + Ok(app) => Ok(format!("Frontmost application: {}", app.name)), + Err(e) => Ok(format!("❌ Failed to get frontmost app: {}", e)), + } + } + "macax_activate_app" => { + debug!("Processing macax_activate_app tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.activate_app(app_name) { + Ok(_) => Ok(format!("βœ… Activated application: {}", app_name)), + Err(e) => Ok(format!("❌ Failed to activate app: {}", e)), + } + } + "macax_get_ui_tree" => { + debug!("Processing macax_get_ui_tree tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let max_depth = tool_call.args.get("max_depth") + .and_then(|v| v.as_u64()) + .map(|n| n as usize) + .unwrap_or(3); + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.get_ui_tree(app_name, max_depth) { + Ok(tree) => Ok(tree), + Err(e) => Ok(format!("❌ Failed to get UI tree: {}", e)), + } + } + "macax_find_elements" => { + debug!("Processing macax_find_elements tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let role = tool_call.args.get("role").and_then(|v| v.as_str()); + let title = tool_call.args.get("title").and_then(|v| v.as_str()); + let identifier = tool_call.args.get("identifier").and_then(|v| v.as_str()); + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.find_elements(app_name, role, title, identifier) { + Ok(elements) => { + if elements.is_empty() { + Ok("No elements found matching criteria".to_string()) + } else { + let element_strs: Vec = elements.iter() + .map(|e| e.to_string()) + .collect(); + Ok(format!("Found {} element(s):\n{}", elements.len(), element_strs.join("\n"))) + } + } + Err(e) => Ok(format!("❌ Failed to find elements: {}", e)), + } + } + "macax_click" => { + debug!("Processing macax_click tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let role = match tool_call.args.get("role").and_then(|v| v.as_str()) { + Some(r) => r, + None => return Ok("❌ Missing role argument".to_string()), + }; + + let title = tool_call.args.get("title").and_then(|v| v.as_str()); + let identifier = tool_call.args.get("identifier").and_then(|v| v.as_str()); + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.click_element(app_name, role, title, identifier) { + Ok(_) => Ok(format!("βœ… Clicked {} element", role)), + Err(e) => Ok(format!("❌ Failed to click element: {}", e)), + } + } + "macax_set_value" => { + debug!("Processing macax_set_value tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let role = match tool_call.args.get("role").and_then(|v| v.as_str()) { + Some(r) => r, + None => return Ok("❌ Missing role argument".to_string()), + }; + + let value = match tool_call.args.get("value").and_then(|v| v.as_str()) { + Some(v) => v, + None => return Ok("❌ Missing value argument".to_string()), + }; + + let title = tool_call.args.get("title").and_then(|v| v.as_str()); + let identifier = tool_call.args.get("identifier").and_then(|v| v.as_str()); + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.set_value(app_name, role, value, title, identifier) { + Ok(_) => Ok(format!("βœ… Set value of {} element to: {}", role, value)), + Err(e) => Ok(format!("❌ Failed to set value: {}", e)), + } + } + "macax_get_value" => { + debug!("Processing macax_get_value tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let role = match tool_call.args.get("role").and_then(|v| v.as_str()) { + Some(r) => r, + None => return Ok("❌ Missing role argument".to_string()), + }; + + let title = tool_call.args.get("title").and_then(|v| v.as_str()); + let identifier = tool_call.args.get("identifier").and_then(|v| v.as_str()); + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.get_value(app_name, role, title, identifier) { + Ok(value) => Ok(format!("Value: {}", value)), + Err(e) => Ok(format!("❌ Failed to get value: {}", e)), + } + } + "macax_press_key" => { + debug!("Processing macax_press_key tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let key = match tool_call.args.get("key").and_then(|v| v.as_str()) { + Some(k) => k, + None => return Ok("❌ Missing key argument".to_string()), + }; + + let modifiers_vec: Vec<&str> = tool_call.args.get("modifiers") + .and_then(|v| v.as_array()) + .map(|arr| arr.iter() + .filter_map(|v| v.as_str()) + .collect()) + .unwrap_or_default(); + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.press_key(app_name, key, modifiers_vec.clone()) { + Ok(_) => { + let modifier_str = if modifiers_vec.is_empty() { + String::new() + } else { + format!(" with modifiers: {}", modifiers_vec.join("+")) + }; + Ok(format!("βœ… Pressed key: {}{}", key, modifier_str)) + } + Err(e) => Ok(format!("❌ Failed to press key: {}", e)), + } + } + "macax_type_text" => { + debug!("Processing macax_type_text tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let text = match tool_call.args.get("text").and_then(|v| v.as_str()) { + Some(t) => t, + None => return Ok("❌ Missing text argument".to_string()), + }; + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.type_text(app_name, text) { + Ok(_) => Ok(format!("βœ… Typed text into {}", app_name)), + Err(e) => Ok(format!("❌ Failed to type text: {}", e)), + } + } + "macax_focus_element" => { + debug!("Processing macax_focus_element tool call"); + + if !self.config.macax.enabled { + return Ok("❌ macOS Accessibility is not enabled. Use --macax flag to enable.".to_string()); + } + + let app_name = match tool_call.args.get("app_name").and_then(|v| v.as_str()) { + Some(n) => n, + None => return Ok("❌ Missing app_name argument".to_string()), + }; + + let role = match tool_call.args.get("role").and_then(|v| v.as_str()) { + Some(r) => r, + None => return Ok("❌ Missing role argument".to_string()), + }; + + let title = tool_call.args.get("title").and_then(|v| v.as_str()); + let identifier = tool_call.args.get("identifier").and_then(|v| v.as_str()); + + let controller_guard = self.macax_controller.read().await; + let controller = match controller_guard.as_ref() { + Some(c) => c, + None => return Ok("❌ macOS Accessibility controller not initialized.".to_string()), + }; + + match controller.focus_element(app_name, role, title, identifier) { + Ok(_) => Ok(format!("βœ… Focused {} element in {}", role, app_name)), + Err(e) => Ok(format!("❌ Failed to focus element: {}", e)), + } + } _ => { warn!("Unknown tool: {}", tool_call.tool); Ok(format!("❓ Unknown tool: {}", tool_call.tool))