diff --git a/README.md b/README.md index b1b4d99..d2c84d8 100644 --- a/README.md +++ b/README.md @@ -245,7 +245,7 @@ See `config.example.toml` for a complete configuration example. ## WebDriver Browser Automation -G3 includes WebDriver support for browser automation tasks using Safari. +G3 includes WebDriver support for browser automation tasks. Chrome headless is the default (no visible browser window), with Safari available as an alternative. **One-Time Setup** (macOS only): @@ -263,9 +263,20 @@ safaridriver --enable # Requires password # Then: Develop → Allow Remote Automation ``` -**For detailed setup instructions and troubleshooting**, see [WebDriver Setup Guide](docs/webdriver-setup.md). +**Usage**: -**Usage**: Run G3 with the `--webdriver` flag to enable browser automation tools. +```bash +# Use Chrome in headless mode (default, no visible window, runs in background) +g3 --webdriver + +# Use Safari (opens a visible browser window) +g3 --webdriver --safari +``` + +**Chrome Headless Setup**: Install ChromeDriver: +- macOS: `brew install chromedriver` +- Linux: `apt install chromium-chromedriver` +- Or download from: https://chromedriver.chromium.org/downloads ## macOS Accessibility API Tools diff --git a/config.example.toml b/config.example.toml index 6adf5fd..05234cb 100644 --- a/config.example.toml +++ b/config.example.toml @@ -105,6 +105,11 @@ max_actions_per_second = 5 [webdriver] enabled = false safari_port = 4444 +chrome_port = 9515 +# Browser to use: "safari" or "chrome-headless" (default) +# Safari opens a visible browser window +# Chrome headless runs in the background without a visible window +browser = "chrome-headless" [macax] enabled = false diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index e55190d..0613dfd 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -343,6 +343,14 @@ pub struct Cli { #[arg(long)] pub webdriver: bool, + /// Use Chrome in headless mode for WebDriver (this is the default) + #[arg(long)] + pub chrome_headless: bool, + + /// Use Safari for WebDriver (instead of headless Chrome) + #[arg(long)] + pub safari: bool, + /// Enable flock mode - parallel multi-agent development #[arg(long, requires = "flock_workspace", requires = "segments")] pub project: Option, @@ -506,6 +514,18 @@ pub async fn run() -> Result<()> { config.webdriver.enabled = true; } + // Apply chrome-headless flag override + if cli.chrome_headless { + config.webdriver.enabled = true; + config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless; + } + + // Apply safari flag override + if cli.safari { + config.webdriver.enabled = true; + config.webdriver.browser = g3_config::WebDriverBrowser::Safari; + } + // Apply no-auto-compact flag override if cli.manual_compact { config.agent.auto_compact = false; @@ -774,6 +794,18 @@ async fn run_accumulative_mode( config.webdriver.enabled = true; } + // Apply chrome-headless flag override + if cli.chrome_headless { + config.webdriver.enabled = true; + config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless; + } + + // Apply safari flag override + if cli.safari { + config.webdriver.enabled = true; + config.webdriver.browser = g3_config::WebDriverBrowser::Safari; + } + // Apply no-auto-compact flag override if cli.manual_compact { config.agent.auto_compact = false; @@ -869,6 +901,18 @@ async fn run_accumulative_mode( config.webdriver.enabled = true; } + // Apply chrome-headless flag override + if cli.chrome_headless { + config.webdriver.enabled = true; + config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless; + } + + // Apply safari flag override + if cli.safari { + config.webdriver.enabled = true; + config.webdriver.browser = g3_config::WebDriverBrowser::Safari; + } + // Apply no-auto-compact flag override if cli.manual_compact { config.agent.auto_compact = false; diff --git a/crates/g3-computer-control/src/lib.rs b/crates/g3-computer-control/src/lib.rs index 8133373..7faae63 100644 --- a/crates/g3-computer-control/src/lib.rs +++ b/crates/g3-computer-control/src/lib.rs @@ -8,7 +8,9 @@ pub mod types; pub mod webdriver; // Re-export webdriver types for convenience -pub use webdriver::{safari::SafariDriver, WebDriverController, WebElement}; +pub use webdriver::{ + chrome::ChromeDriver, safari::SafariDriver, WebDriverController, WebElement, +}; // Re-export macax types for convenience pub use macax::{AXApplication, AXElement, MacAxController}; diff --git a/crates/g3-computer-control/src/webdriver/chrome.rs b/crates/g3-computer-control/src/webdriver/chrome.rs new file mode 100644 index 0000000..8ec26f7 --- /dev/null +++ b/crates/g3-computer-control/src/webdriver/chrome.rs @@ -0,0 +1,243 @@ +use super::{WebDriverController, WebElement}; +use anyhow::{Context, Result}; +use async_trait::async_trait; +use fantoccini::{Client, ClientBuilder}; +use serde_json::Value; +use std::time::Duration; + +/// ChromeDriver WebDriver controller with headless support +pub struct ChromeDriver { + client: Client, +} + +impl ChromeDriver { + /// Create a new ChromeDriver instance in headless mode + /// + /// This will connect to ChromeDriver running on the default port (9515). + /// ChromeDriver must be installed and available in PATH. + pub async fn new_headless() -> Result { + Self::with_port_headless(9515).await + } + + /// Create a new ChromeDriver instance with a custom port in headless mode + pub async fn with_port_headless(port: u16) -> Result { + let url = format!("http://localhost:{}", port); + + let mut caps = serde_json::Map::new(); + caps.insert( + "browserName".to_string(), + Value::String("chrome".to_string()), + ); + + // Set up Chrome options for headless mode + let mut chrome_options = serde_json::Map::new(); + chrome_options.insert( + "args".to_string(), + Value::Array(vec![ + Value::String("--headless=new".to_string()), + Value::String("--disable-gpu".to_string()), + Value::String("--no-sandbox".to_string()), + Value::String("--disable-dev-shm-usage".to_string()), + Value::String("--window-size=1920,1080".to_string()), + ]), + ); + caps.insert( + "goog:chromeOptions".to_string(), + Value::Object(chrome_options), + ); + + let client = ClientBuilder::native() + .capabilities(caps) + .connect(&url) + .await + .context("Failed to connect to ChromeDriver. Make sure ChromeDriver is running and Chrome is installed.")?; + + Ok(Self { client }) + } + + /// Go back in browser history + pub async fn back(&mut self) -> Result<()> { + self.client.back().await?; + Ok(()) + } + + /// Go forward in browser history + pub async fn forward(&mut self) -> Result<()> { + self.client.forward().await?; + Ok(()) + } + + /// Refresh the current page + pub async fn refresh(&mut self) -> Result<()> { + self.client.refresh().await?; + Ok(()) + } + + /// Get all window handles + pub async fn window_handles(&mut self) -> Result> { + let handles = self.client.windows().await?; + Ok(handles.into_iter().map(|h| h.into()).collect()) + } + + /// Switch to a window by handle + pub async fn switch_to_window(&mut self, handle: &str) -> Result<()> { + let window_handle: fantoccini::wd::WindowHandle = handle.to_string().try_into()?; + self.client.switch_to_window(window_handle).await?; + Ok(()) + } + + /// Get the current window handle + pub async fn current_window_handle(&mut self) -> Result { + Ok(self.client.window().await?.into()) + } + + /// Close the current window + pub async fn close_window(&mut self) -> Result<()> { + self.client.close_window().await?; + Ok(()) + } + + /// Create a new window/tab + pub async fn new_window(&mut self, is_tab: bool) -> Result { + let response = self.client.new_window(is_tab).await?; + Ok(response.handle.into()) + } + + /// Get cookies + pub async fn get_cookies(&mut self) -> Result>> { + Ok(self.client.get_all_cookies().await?) + } + + /// Add a cookie + pub async fn add_cookie(&mut self, cookie: fantoccini::cookies::Cookie<'static>) -> Result<()> { + self.client.add_cookie(cookie).await?; + Ok(()) + } + + /// Delete all cookies + pub async fn delete_all_cookies(&mut self) -> Result<()> { + self.client.delete_all_cookies().await?; + Ok(()) + } + + /// Wait for an element to appear (with timeout) + pub async fn wait_for_element( + &mut self, + selector: &str, + timeout: Duration, + ) -> Result { + let start = std::time::Instant::now(); + let poll_interval = Duration::from_millis(100); + + loop { + if let Ok(elem) = self.find_element(selector).await { + return Ok(elem); + } + + if start.elapsed() >= timeout { + anyhow::bail!("Timeout waiting for element: {}", selector); + } + + tokio::time::sleep(poll_interval).await; + } + } + + /// Wait for an element to be visible (with timeout) + pub async fn wait_for_visible( + &mut self, + selector: &str, + timeout: Duration, + ) -> Result { + let start = std::time::Instant::now(); + let poll_interval = Duration::from_millis(100); + + loop { + if let Ok(elem) = self.find_element(selector).await { + if elem.is_displayed().await.unwrap_or(false) { + return Ok(elem); + } + } + + if start.elapsed() >= timeout { + anyhow::bail!("Timeout waiting for element to be visible: {}", selector); + } + + tokio::time::sleep(poll_interval).await; + } + } +} + +#[async_trait] +impl WebDriverController for ChromeDriver { + async fn navigate(&mut self, url: &str) -> Result<()> { + self.client.goto(url).await?; + Ok(()) + } + + async fn current_url(&self) -> Result { + Ok(self.client.current_url().await?.to_string()) + } + + async fn title(&self) -> Result { + Ok(self.client.title().await?) + } + + async fn find_element(&mut self, selector: &str) -> Result { + let elem = self + .client + .find(fantoccini::Locator::Css(selector)) + .await + .context(format!( + "Failed to find element with selector: {}", + selector + ))?; + Ok(WebElement { inner: elem }) + } + + async fn find_elements(&mut self, selector: &str) -> Result> { + let elems = self + .client + .find_all(fantoccini::Locator::Css(selector)) + .await?; + Ok(elems + .into_iter() + .map(|inner| WebElement { inner }) + .collect()) + } + + async fn execute_script(&mut self, script: &str, args: Vec) -> Result { + Ok(self.client.execute(script, args).await?) + } + + async fn page_source(&self) -> Result { + Ok(self.client.source().await?) + } + + async fn screenshot(&mut self, path: &str) -> Result<()> { + let screenshot_data = self.client.screenshot().await?; + + // Expand tilde in path + let expanded_path = shellexpand::tilde(path); + let path_str = expanded_path.as_ref(); + + // Create parent directories if needed + if let Some(parent) = std::path::Path::new(path_str).parent() { + std::fs::create_dir_all(parent) + .context("Failed to create parent directories for screenshot")?; + } + + std::fs::write(path_str, screenshot_data).context("Failed to write screenshot to file")?; + + Ok(()) + } + + async fn close(&mut self) -> Result<()> { + self.client.close_window().await?; + Ok(()) + } + + async fn quit(mut self) -> Result<()> { + self.client.close().await?; + Ok(()) + } +} diff --git a/crates/g3-computer-control/src/webdriver/mod.rs b/crates/g3-computer-control/src/webdriver/mod.rs index ac25f00..13c9afe 100644 --- a/crates/g3-computer-control/src/webdriver/mod.rs +++ b/crates/g3-computer-control/src/webdriver/mod.rs @@ -1,4 +1,5 @@ pub mod safari; +pub mod chrome; use anyhow::Result; use async_trait::async_trait; diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs index b380486..5845d1e 100644 --- a/crates/g3-config/src/lib.rs +++ b/crates/g3-config/src/lib.rs @@ -115,10 +115,24 @@ pub struct ComputerControlConfig { pub max_actions_per_second: u32, } +/// Browser type for WebDriver +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] +#[serde(rename_all = "lowercase")] +pub enum WebDriverBrowser { + Safari, + #[serde(rename = "chrome-headless")] + #[default] + ChromeHeadless, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct WebDriverConfig { pub enabled: bool, pub safari_port: u16, + #[serde(default)] + pub chrome_port: u16, + #[serde(default)] + pub browser: WebDriverBrowser, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -137,6 +151,8 @@ impl Default for WebDriverConfig { Self { enabled: true, safari_port: 4444, + chrome_port: 9515, + browser: WebDriverBrowser::ChromeHeadless, } } } diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 6f0b51e..38ed30f 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -85,6 +85,109 @@ pub struct ToolCall { pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments } +/// Unified WebDriver session that can hold either Safari or Chrome driver +pub enum WebDriverSession { + Safari(g3_computer_control::SafariDriver), + Chrome(g3_computer_control::ChromeDriver), +} + +#[async_trait::async_trait] +impl g3_computer_control::WebDriverController for WebDriverSession { + async fn navigate(&mut self, url: &str) -> anyhow::Result<()> { + match self { + WebDriverSession::Safari(driver) => driver.navigate(url).await, + WebDriverSession::Chrome(driver) => driver.navigate(url).await, + } + } + + async fn current_url(&self) -> anyhow::Result { + match self { + WebDriverSession::Safari(driver) => driver.current_url().await, + WebDriverSession::Chrome(driver) => driver.current_url().await, + } + } + + async fn title(&self) -> anyhow::Result { + match self { + WebDriverSession::Safari(driver) => driver.title().await, + WebDriverSession::Chrome(driver) => driver.title().await, + } + } + + async fn find_element(&mut self, selector: &str) -> anyhow::Result { + match self { + WebDriverSession::Safari(driver) => driver.find_element(selector).await, + WebDriverSession::Chrome(driver) => driver.find_element(selector).await, + } + } + + async fn find_elements(&mut self, selector: &str) -> anyhow::Result> { + match self { + WebDriverSession::Safari(driver) => driver.find_elements(selector).await, + WebDriverSession::Chrome(driver) => driver.find_elements(selector).await, + } + } + + async fn execute_script(&mut self, script: &str, args: Vec) -> anyhow::Result { + match self { + WebDriverSession::Safari(driver) => driver.execute_script(script, args).await, + WebDriverSession::Chrome(driver) => driver.execute_script(script, args).await, + } + } + + async fn page_source(&self) -> anyhow::Result { + match self { + WebDriverSession::Safari(driver) => driver.page_source().await, + WebDriverSession::Chrome(driver) => driver.page_source().await, + } + } + + async fn screenshot(&mut self, path: &str) -> anyhow::Result<()> { + match self { + WebDriverSession::Safari(driver) => driver.screenshot(path).await, + WebDriverSession::Chrome(driver) => driver.screenshot(path).await, + } + } + + async fn close(&mut self) -> anyhow::Result<()> { + match self { + WebDriverSession::Safari(driver) => driver.close().await, + WebDriverSession::Chrome(driver) => driver.close().await, + } + } + + async fn quit(self) -> anyhow::Result<()> { + match self { + WebDriverSession::Safari(driver) => driver.quit().await, + WebDriverSession::Chrome(driver) => driver.quit().await, + } + } +} + +// Additional methods for WebDriverSession that aren't part of the WebDriverController trait +impl WebDriverSession { + pub async fn back(&mut self) -> anyhow::Result<()> { + match self { + WebDriverSession::Safari(driver) => driver.back().await, + WebDriverSession::Chrome(driver) => driver.back().await, + } + } + + pub async fn forward(&mut self) -> anyhow::Result<()> { + match self { + WebDriverSession::Safari(driver) => driver.forward().await, + WebDriverSession::Chrome(driver) => driver.forward().await, + } + } + + pub async fn refresh(&mut self) -> anyhow::Result<()> { + match self { + WebDriverSession::Safari(driver) => driver.refresh().await, + WebDriverSession::Chrome(driver) => driver.refresh().await, + } + } +} + /// Options for fast-start discovery execution #[derive(Debug, Clone)] pub struct DiscoveryOptions<'a> { @@ -1062,10 +1165,10 @@ pub struct Agent { todo_content: std::sync::Arc>, webdriver_session: std::sync::Arc< tokio::sync::RwLock< - Option>>, + Option>>, >, >, - safaridriver_process: std::sync::Arc>>, + webdriver_process: std::sync::Arc>>, macax_controller: std::sync::Arc>>, tool_call_count: usize, @@ -1356,7 +1459,7 @@ impl Agent { quiet, computer_controller, webdriver_session: std::sync::Arc::new(tokio::sync::RwLock::new(None)), - safaridriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)), + webdriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)), macax_controller: { std::sync::Arc::new(tokio::sync::RwLock::new(if macax_enabled { Some(g3_computer_control::MacAxController::new()?) @@ -3218,10 +3321,19 @@ impl Agent { }, Tool { name: "webdriver_get_page_source".to_string(), - description: "Get the HTML source of the current page".to_string(), + description: "Get the rendered HTML source of the current page. Returns the current DOM state after JavaScript execution.".to_string(), input_schema: json!({ "type": "object", - "properties": {}, + "properties": { + "max_length": { + "type": "integer", + "description": "Maximum length of HTML to return (default: 10000, use 0 for no truncation)" + }, + "save_to_file": { + "type": "string", + "description": "Optional file path to save the HTML instead of returning it inline" + } + }, "required": [] }), }, @@ -5426,46 +5538,82 @@ impl Agent { } drop(session_guard); - // Note: Safari Remote Automation must be enabled before using WebDriver. - // Run this once: safaridriver --enable - // Or enable manually: Safari → Develop → Allow Remote Automation + // Determine which browser to use based on config + use g3_config::WebDriverBrowser; + match &self.config.webdriver.browser { + WebDriverBrowser::Safari => { + // Note: Safari Remote Automation must be enabled before using WebDriver. + // Run this once: safaridriver --enable + // Or enable manually: Safari → Develop → Allow Remote Automation - // Start safaridriver process - let port = self.config.webdriver.safari_port; + let port = self.config.webdriver.safari_port; - let safaridriver_result = tokio::process::Command::new("safaridriver") - .arg("--port") - .arg(port.to_string()) - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .spawn(); + let driver_result = tokio::process::Command::new("safaridriver") + .arg("--port") + .arg(port.to_string()) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn(); - let mut safaridriver_process = match safaridriver_result { - Ok(process) => process, - Err(e) => { - return Ok(format!("❌ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.", e)); + let mut webdriver_process = match driver_result { + Ok(process) => process, + Err(e) => { + return Ok(format!("❌ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.", e)); + } + }; + + // Wait for safaridriver to start up + tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; + + // Connect to SafariDriver + match g3_computer_control::SafariDriver::with_port(port).await { + Ok(driver) => { + let session = std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Safari(driver))); + *self.webdriver_session.write().await = Some(session); + *self.webdriver_process.write().await = Some(webdriver_process); + + Ok("✅ WebDriver session started successfully! Safari should open automatically.".to_string()) + } + Err(e) => { + let _ = webdriver_process.kill().await; + Ok(format!("❌ Failed to connect to SafariDriver: {}\n\nThis might be because:\n - Safari Remote Automation is not enabled (run: safaridriver --enable)\n - Port {} is already in use\n - Safari failed to start\n - Network connectivity issue\n\nTo enable Remote Automation:\n 1. Run: safaridriver --enable (requires password, one-time setup)\n 2. Or manually: Safari → Develop → Allow Remote Automation", e, port)) + } + } } - }; + WebDriverBrowser::ChromeHeadless => { + let port = self.config.webdriver.chrome_port; - // Wait for safaridriver to start up - tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; + // Start chromedriver process + let driver_result = tokio::process::Command::new("chromedriver") + .arg(format!("--port={}", port)) + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .spawn(); - // Connect to SafariDriver - match g3_computer_control::SafariDriver::with_port(port).await { - Ok(driver) => { - let session = std::sync::Arc::new(tokio::sync::Mutex::new(driver)); - *self.webdriver_session.write().await = Some(session); + let mut webdriver_process = match driver_result { + Ok(process) => process, + Err(e) => { + return Ok(format!("❌ Failed to start chromedriver: {}\n\nMake sure chromedriver is installed and in your PATH.\n\nInstall with:\n - macOS: brew install chromedriver\n - Linux: apt install chromium-chromedriver\n - Or download from: https://chromedriver.chromium.org/downloads", e)); + } + }; - // Store the process handle - *self.safaridriver_process.write().await = Some(safaridriver_process); + // Wait for chromedriver to start up + tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await; - Ok("✅ WebDriver session started successfully! Safari should open automatically.".to_string()) - } - Err(e) => { - // Kill the safaridriver process if connection failed - let _ = safaridriver_process.kill().await; + // Connect to ChromeDriver in headless mode + match g3_computer_control::ChromeDriver::with_port_headless(port).await { + Ok(driver) => { + let session = std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Chrome(driver))); + *self.webdriver_session.write().await = Some(session); + *self.webdriver_process.write().await = Some(webdriver_process); - Ok(format!("❌ Failed to connect to SafariDriver: {}\n\nThis might be because:\n - Safari Remote Automation is not enabled (run: safaridriver --enable)\n - Port {} is already in use\n - Safari failed to start\n - Network connectivity issue\n\nTo enable Remote Automation:\n 1. Run: safaridriver --enable (requires password, one-time setup)\n 2. Or manually: Safari → Develop → Allow Remote Automation", e, port)) + Ok("✅ WebDriver session started successfully! Chrome is running in headless mode (no visible window).".to_string()) + } + Err(e) => { + let _ = webdriver_process.kill().await; + Ok(format!("❌ Failed to connect to ChromeDriver: {}\n\nThis might be because:\n - Chrome is not installed\n - ChromeDriver version doesn't match Chrome version\n - Port {} is already in use\n\nMake sure Chrome and ChromeDriver are installed and compatible.", e, port)) + } + } } } } @@ -5756,6 +5904,19 @@ impl Agent { ); } + // Extract optional parameters + let max_length = tool_call + .args + .get("max_length") + .and_then(|v| v.as_u64()) + .map(|n| n as usize) + .unwrap_or(10000); + + let save_to_file = tool_call + .args + .get("save_to_file") + .and_then(|v| v.as_str()); + let session_guard = self.webdriver_session.read().await; let session = match session_guard.as_ref() { Some(s) => s.clone(), @@ -5770,14 +5931,36 @@ impl Agent { let driver = session.lock().await; match driver.page_source().await { Ok(source) => { - // Truncate if too long - if source.len() > 10000 { + // If save_to_file is specified, write to file + if let Some(file_path) = save_to_file { + let expanded_path = shellexpand::tilde(file_path); + let path_str = expanded_path.as_ref(); + + // Create parent directories if needed + if let Some(parent) = std::path::Path::new(path_str).parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + return Ok(format!("❌ Failed to create directories: {}", e)); + } + } + + match std::fs::write(path_str, &source) { + Ok(_) => Ok(format!( + "✅ Page source ({} chars) saved to: {}", + source.len(), + path_str + )), + Err(e) => Ok(format!("❌ Failed to write file: {}", e)), + } + } else if max_length > 0 && source.len() > max_length { + // Truncate if max_length is set and source exceeds it Ok(format!( - "Page source ({} chars, truncated to 10000):\n{}...", + "Page source ({} chars, truncated to {}):\n{}...", source.len(), - &source[..10000] + max_length, + &source[..max_length] )) } else { + // Return full source Ok(format!("Page source ({} chars):\n{}", source.len(), source)) } } @@ -5918,7 +6101,7 @@ impl Agent { // Kill the safaridriver process if let Some(mut process) = - self.safaridriver_process.write().await.take() + self.webdriver_process.write().await.take() { if let Err(e) = process.kill().await { warn!("Failed to kill safaridriver process: {}", e); @@ -6812,7 +6995,7 @@ impl Drop for Agent { // Try to kill safaridriver process if it's still running // We need to use try_lock since we can't await in Drop - if let Ok(mut process_guard) = self.safaridriver_process.try_write() { + if let Ok(mut process_guard) = self.webdriver_process.try_write() { if let Some(process) = process_guard.take() { // Use blocking kill since we can't await in Drop // This is a best-effort cleanup diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs index 79c323b..de4ba13 100644 --- a/crates/g3-core/src/prompts.rs +++ b/crates/g3-core/src/prompts.rs @@ -135,6 +135,24 @@ If you can complete it with 1-2 tool calls, skip TODO. IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg. If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir. +# Web Research with WebDriver + +When you need to look up documentation, search for resources, find data online, or simply search the web to complete your task, you have access to WebDriver browser automation tools. + +**How to use WebDriver for research:** +1. Call `webdriver_start` to begin a browser session (runs Chrome headless by default - no visible window) +2. Use `webdriver_navigate` to go to URLs (search engines, documentation sites, etc.) +3. **IMPORTANT**: Always use `webdriver_get_page_source` with `save_to_file` parameter to save the page HTML to disk +4. Read the saved HTML file with `read_file` to extract the information you need +5. Call `webdriver_quit` when done + +**Best practices:** +- Do NOT use `webdriver_screenshot` or try to decode page content visually - always save HTML to disk and read it +- Save pages to the `tmp/` subdirectory (e.g., `tmp/search_results.html`) +- Parse the HTML text content to find what you need +- For search engines, look for result links and titles in the HTML +- Close the WebDriver session when you're done to free resources + # Code Search Guidelines IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.