Make Chrome headless the default WebDriver browser

- Add --safari flag to CLI for explicitly choosing Safari
- Update --chrome-headless flag description to indicate it's the default
- Update README to reflect Chrome headless as default
- Remove broken link to non-existent docs/webdriver-setup.md
- Add Safari flag handling in all webdriver config locations

The config already had ChromeHeadless as the default, this commit
updates the CLI and documentation to match.
This commit is contained in:
Dhanji R. Prasanna
2025-12-15 16:51:42 +11:00
parent d32bd9be03
commit 3d1b86d24b
9 changed files with 569 additions and 46 deletions

View File

@@ -245,7 +245,7 @@ See `config.example.toml` for a complete configuration example.
## WebDriver Browser Automation ## WebDriver Browser Automation
G3 includes WebDriver support for browser automation tasks using Safari. G3 includes WebDriver support for browser automation tasks. Chrome headless is the default (no visible browser window), with Safari available as an alternative.
**One-Time Setup** (macOS only): **One-Time Setup** (macOS only):
@@ -263,9 +263,20 @@ safaridriver --enable # Requires password
# Then: Develop → Allow Remote Automation # Then: Develop → Allow Remote Automation
``` ```
**For detailed setup instructions and troubleshooting**, see [WebDriver Setup Guide](docs/webdriver-setup.md). **Usage**:
**Usage**: Run G3 with the `--webdriver` flag to enable browser automation tools. ```bash
# Use Chrome in headless mode (default, no visible window, runs in background)
g3 --webdriver
# Use Safari (opens a visible browser window)
g3 --webdriver --safari
```
**Chrome Headless Setup**: Install ChromeDriver:
- macOS: `brew install chromedriver`
- Linux: `apt install chromium-chromedriver`
- Or download from: https://chromedriver.chromium.org/downloads
## macOS Accessibility API Tools ## macOS Accessibility API Tools

View File

@@ -105,6 +105,11 @@ max_actions_per_second = 5
[webdriver] [webdriver]
enabled = false enabled = false
safari_port = 4444 safari_port = 4444
chrome_port = 9515
# Browser to use: "safari" or "chrome-headless" (default)
# Safari opens a visible browser window
# Chrome headless runs in the background without a visible window
browser = "chrome-headless"
[macax] [macax]
enabled = false enabled = false

View File

@@ -343,6 +343,14 @@ pub struct Cli {
#[arg(long)] #[arg(long)]
pub webdriver: bool, pub webdriver: bool,
/// Use Chrome in headless mode for WebDriver (this is the default)
#[arg(long)]
pub chrome_headless: bool,
/// Use Safari for WebDriver (instead of headless Chrome)
#[arg(long)]
pub safari: bool,
/// Enable flock mode - parallel multi-agent development /// Enable flock mode - parallel multi-agent development
#[arg(long, requires = "flock_workspace", requires = "segments")] #[arg(long, requires = "flock_workspace", requires = "segments")]
pub project: Option<PathBuf>, pub project: Option<PathBuf>,
@@ -506,6 +514,18 @@ pub async fn run() -> Result<()> {
config.webdriver.enabled = true; config.webdriver.enabled = true;
} }
// Apply chrome-headless flag override
if cli.chrome_headless {
config.webdriver.enabled = true;
config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless;
}
// Apply safari flag override
if cli.safari {
config.webdriver.enabled = true;
config.webdriver.browser = g3_config::WebDriverBrowser::Safari;
}
// Apply no-auto-compact flag override // Apply no-auto-compact flag override
if cli.manual_compact { if cli.manual_compact {
config.agent.auto_compact = false; config.agent.auto_compact = false;
@@ -774,6 +794,18 @@ async fn run_accumulative_mode(
config.webdriver.enabled = true; config.webdriver.enabled = true;
} }
// Apply chrome-headless flag override
if cli.chrome_headless {
config.webdriver.enabled = true;
config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless;
}
// Apply safari flag override
if cli.safari {
config.webdriver.enabled = true;
config.webdriver.browser = g3_config::WebDriverBrowser::Safari;
}
// Apply no-auto-compact flag override // Apply no-auto-compact flag override
if cli.manual_compact { if cli.manual_compact {
config.agent.auto_compact = false; config.agent.auto_compact = false;
@@ -869,6 +901,18 @@ async fn run_accumulative_mode(
config.webdriver.enabled = true; config.webdriver.enabled = true;
} }
// Apply chrome-headless flag override
if cli.chrome_headless {
config.webdriver.enabled = true;
config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless;
}
// Apply safari flag override
if cli.safari {
config.webdriver.enabled = true;
config.webdriver.browser = g3_config::WebDriverBrowser::Safari;
}
// Apply no-auto-compact flag override // Apply no-auto-compact flag override
if cli.manual_compact { if cli.manual_compact {
config.agent.auto_compact = false; config.agent.auto_compact = false;

View File

@@ -8,7 +8,9 @@ pub mod types;
pub mod webdriver; pub mod webdriver;
// Re-export webdriver types for convenience // Re-export webdriver types for convenience
pub use webdriver::{safari::SafariDriver, WebDriverController, WebElement}; pub use webdriver::{
chrome::ChromeDriver, safari::SafariDriver, WebDriverController, WebElement,
};
// Re-export macax types for convenience // Re-export macax types for convenience
pub use macax::{AXApplication, AXElement, MacAxController}; pub use macax::{AXApplication, AXElement, MacAxController};

View File

@@ -0,0 +1,243 @@
use super::{WebDriverController, WebElement};
use anyhow::{Context, Result};
use async_trait::async_trait;
use fantoccini::{Client, ClientBuilder};
use serde_json::Value;
use std::time::Duration;
/// ChromeDriver WebDriver controller with headless support
pub struct ChromeDriver {
client: Client,
}
impl ChromeDriver {
/// Create a new ChromeDriver instance in headless mode
///
/// This will connect to ChromeDriver running on the default port (9515).
/// ChromeDriver must be installed and available in PATH.
pub async fn new_headless() -> Result<Self> {
Self::with_port_headless(9515).await
}
/// Create a new ChromeDriver instance with a custom port in headless mode
pub async fn with_port_headless(port: u16) -> Result<Self> {
let url = format!("http://localhost:{}", port);
let mut caps = serde_json::Map::new();
caps.insert(
"browserName".to_string(),
Value::String("chrome".to_string()),
);
// Set up Chrome options for headless mode
let mut chrome_options = serde_json::Map::new();
chrome_options.insert(
"args".to_string(),
Value::Array(vec![
Value::String("--headless=new".to_string()),
Value::String("--disable-gpu".to_string()),
Value::String("--no-sandbox".to_string()),
Value::String("--disable-dev-shm-usage".to_string()),
Value::String("--window-size=1920,1080".to_string()),
]),
);
caps.insert(
"goog:chromeOptions".to_string(),
Value::Object(chrome_options),
);
let client = ClientBuilder::native()
.capabilities(caps)
.connect(&url)
.await
.context("Failed to connect to ChromeDriver. Make sure ChromeDriver is running and Chrome is installed.")?;
Ok(Self { client })
}
/// Go back in browser history
pub async fn back(&mut self) -> Result<()> {
self.client.back().await?;
Ok(())
}
/// Go forward in browser history
pub async fn forward(&mut self) -> Result<()> {
self.client.forward().await?;
Ok(())
}
/// Refresh the current page
pub async fn refresh(&mut self) -> Result<()> {
self.client.refresh().await?;
Ok(())
}
/// Get all window handles
pub async fn window_handles(&mut self) -> Result<Vec<String>> {
let handles = self.client.windows().await?;
Ok(handles.into_iter().map(|h| h.into()).collect())
}
/// Switch to a window by handle
pub async fn switch_to_window(&mut self, handle: &str) -> Result<()> {
let window_handle: fantoccini::wd::WindowHandle = handle.to_string().try_into()?;
self.client.switch_to_window(window_handle).await?;
Ok(())
}
/// Get the current window handle
pub async fn current_window_handle(&mut self) -> Result<String> {
Ok(self.client.window().await?.into())
}
/// Close the current window
pub async fn close_window(&mut self) -> Result<()> {
self.client.close_window().await?;
Ok(())
}
/// Create a new window/tab
pub async fn new_window(&mut self, is_tab: bool) -> Result<String> {
let response = self.client.new_window(is_tab).await?;
Ok(response.handle.into())
}
/// Get cookies
pub async fn get_cookies(&mut self) -> Result<Vec<fantoccini::cookies::Cookie<'static>>> {
Ok(self.client.get_all_cookies().await?)
}
/// Add a cookie
pub async fn add_cookie(&mut self, cookie: fantoccini::cookies::Cookie<'static>) -> Result<()> {
self.client.add_cookie(cookie).await?;
Ok(())
}
/// Delete all cookies
pub async fn delete_all_cookies(&mut self) -> Result<()> {
self.client.delete_all_cookies().await?;
Ok(())
}
/// Wait for an element to appear (with timeout)
pub async fn wait_for_element(
&mut self,
selector: &str,
timeout: Duration,
) -> Result<WebElement> {
let start = std::time::Instant::now();
let poll_interval = Duration::from_millis(100);
loop {
if let Ok(elem) = self.find_element(selector).await {
return Ok(elem);
}
if start.elapsed() >= timeout {
anyhow::bail!("Timeout waiting for element: {}", selector);
}
tokio::time::sleep(poll_interval).await;
}
}
/// Wait for an element to be visible (with timeout)
pub async fn wait_for_visible(
&mut self,
selector: &str,
timeout: Duration,
) -> Result<WebElement> {
let start = std::time::Instant::now();
let poll_interval = Duration::from_millis(100);
loop {
if let Ok(elem) = self.find_element(selector).await {
if elem.is_displayed().await.unwrap_or(false) {
return Ok(elem);
}
}
if start.elapsed() >= timeout {
anyhow::bail!("Timeout waiting for element to be visible: {}", selector);
}
tokio::time::sleep(poll_interval).await;
}
}
}
#[async_trait]
impl WebDriverController for ChromeDriver {
async fn navigate(&mut self, url: &str) -> Result<()> {
self.client.goto(url).await?;
Ok(())
}
async fn current_url(&self) -> Result<String> {
Ok(self.client.current_url().await?.to_string())
}
async fn title(&self) -> Result<String> {
Ok(self.client.title().await?)
}
async fn find_element(&mut self, selector: &str) -> Result<WebElement> {
let elem = self
.client
.find(fantoccini::Locator::Css(selector))
.await
.context(format!(
"Failed to find element with selector: {}",
selector
))?;
Ok(WebElement { inner: elem })
}
async fn find_elements(&mut self, selector: &str) -> Result<Vec<WebElement>> {
let elems = self
.client
.find_all(fantoccini::Locator::Css(selector))
.await?;
Ok(elems
.into_iter()
.map(|inner| WebElement { inner })
.collect())
}
async fn execute_script(&mut self, script: &str, args: Vec<Value>) -> Result<Value> {
Ok(self.client.execute(script, args).await?)
}
async fn page_source(&self) -> Result<String> {
Ok(self.client.source().await?)
}
async fn screenshot(&mut self, path: &str) -> Result<()> {
let screenshot_data = self.client.screenshot().await?;
// Expand tilde in path
let expanded_path = shellexpand::tilde(path);
let path_str = expanded_path.as_ref();
// Create parent directories if needed
if let Some(parent) = std::path::Path::new(path_str).parent() {
std::fs::create_dir_all(parent)
.context("Failed to create parent directories for screenshot")?;
}
std::fs::write(path_str, screenshot_data).context("Failed to write screenshot to file")?;
Ok(())
}
async fn close(&mut self) -> Result<()> {
self.client.close_window().await?;
Ok(())
}
async fn quit(mut self) -> Result<()> {
self.client.close().await?;
Ok(())
}
}

View File

@@ -1,4 +1,5 @@
pub mod safari; pub mod safari;
pub mod chrome;
use anyhow::Result; use anyhow::Result;
use async_trait::async_trait; use async_trait::async_trait;

View File

@@ -115,10 +115,24 @@ pub struct ComputerControlConfig {
pub max_actions_per_second: u32, pub max_actions_per_second: u32,
} }
/// Browser type for WebDriver
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
#[serde(rename_all = "lowercase")]
pub enum WebDriverBrowser {
Safari,
#[serde(rename = "chrome-headless")]
#[default]
ChromeHeadless,
}
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebDriverConfig { pub struct WebDriverConfig {
pub enabled: bool, pub enabled: bool,
pub safari_port: u16, pub safari_port: u16,
#[serde(default)]
pub chrome_port: u16,
#[serde(default)]
pub browser: WebDriverBrowser,
} }
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -137,6 +151,8 @@ impl Default for WebDriverConfig {
Self { Self {
enabled: true, enabled: true,
safari_port: 4444, safari_port: 4444,
chrome_port: 9515,
browser: WebDriverBrowser::ChromeHeadless,
} }
} }
} }

View File

@@ -85,6 +85,109 @@ pub struct ToolCall {
pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments
} }
/// Unified WebDriver session that can hold either Safari or Chrome driver
pub enum WebDriverSession {
Safari(g3_computer_control::SafariDriver),
Chrome(g3_computer_control::ChromeDriver),
}
#[async_trait::async_trait]
impl g3_computer_control::WebDriverController for WebDriverSession {
async fn navigate(&mut self, url: &str) -> anyhow::Result<()> {
match self {
WebDriverSession::Safari(driver) => driver.navigate(url).await,
WebDriverSession::Chrome(driver) => driver.navigate(url).await,
}
}
async fn current_url(&self) -> anyhow::Result<String> {
match self {
WebDriverSession::Safari(driver) => driver.current_url().await,
WebDriverSession::Chrome(driver) => driver.current_url().await,
}
}
async fn title(&self) -> anyhow::Result<String> {
match self {
WebDriverSession::Safari(driver) => driver.title().await,
WebDriverSession::Chrome(driver) => driver.title().await,
}
}
async fn find_element(&mut self, selector: &str) -> anyhow::Result<g3_computer_control::WebElement> {
match self {
WebDriverSession::Safari(driver) => driver.find_element(selector).await,
WebDriverSession::Chrome(driver) => driver.find_element(selector).await,
}
}
async fn find_elements(&mut self, selector: &str) -> anyhow::Result<Vec<g3_computer_control::WebElement>> {
match self {
WebDriverSession::Safari(driver) => driver.find_elements(selector).await,
WebDriverSession::Chrome(driver) => driver.find_elements(selector).await,
}
}
async fn execute_script(&mut self, script: &str, args: Vec<serde_json::Value>) -> anyhow::Result<serde_json::Value> {
match self {
WebDriverSession::Safari(driver) => driver.execute_script(script, args).await,
WebDriverSession::Chrome(driver) => driver.execute_script(script, args).await,
}
}
async fn page_source(&self) -> anyhow::Result<String> {
match self {
WebDriverSession::Safari(driver) => driver.page_source().await,
WebDriverSession::Chrome(driver) => driver.page_source().await,
}
}
async fn screenshot(&mut self, path: &str) -> anyhow::Result<()> {
match self {
WebDriverSession::Safari(driver) => driver.screenshot(path).await,
WebDriverSession::Chrome(driver) => driver.screenshot(path).await,
}
}
async fn close(&mut self) -> anyhow::Result<()> {
match self {
WebDriverSession::Safari(driver) => driver.close().await,
WebDriverSession::Chrome(driver) => driver.close().await,
}
}
async fn quit(self) -> anyhow::Result<()> {
match self {
WebDriverSession::Safari(driver) => driver.quit().await,
WebDriverSession::Chrome(driver) => driver.quit().await,
}
}
}
// Additional methods for WebDriverSession that aren't part of the WebDriverController trait
impl WebDriverSession {
pub async fn back(&mut self) -> anyhow::Result<()> {
match self {
WebDriverSession::Safari(driver) => driver.back().await,
WebDriverSession::Chrome(driver) => driver.back().await,
}
}
pub async fn forward(&mut self) -> anyhow::Result<()> {
match self {
WebDriverSession::Safari(driver) => driver.forward().await,
WebDriverSession::Chrome(driver) => driver.forward().await,
}
}
pub async fn refresh(&mut self) -> anyhow::Result<()> {
match self {
WebDriverSession::Safari(driver) => driver.refresh().await,
WebDriverSession::Chrome(driver) => driver.refresh().await,
}
}
}
/// Options for fast-start discovery execution /// Options for fast-start discovery execution
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct DiscoveryOptions<'a> { pub struct DiscoveryOptions<'a> {
@@ -1062,10 +1165,10 @@ pub struct Agent<W: UiWriter> {
todo_content: std::sync::Arc<tokio::sync::RwLock<String>>, todo_content: std::sync::Arc<tokio::sync::RwLock<String>>,
webdriver_session: std::sync::Arc< webdriver_session: std::sync::Arc<
tokio::sync::RwLock< tokio::sync::RwLock<
Option<std::sync::Arc<tokio::sync::Mutex<g3_computer_control::SafariDriver>>>, Option<std::sync::Arc<tokio::sync::Mutex<WebDriverSession>>>,
>, >,
>, >,
safaridriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>, webdriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
macax_controller: macax_controller:
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>, std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
tool_call_count: usize, tool_call_count: usize,
@@ -1356,7 +1459,7 @@ impl<W: UiWriter> Agent<W> {
quiet, quiet,
computer_controller, computer_controller,
webdriver_session: std::sync::Arc::new(tokio::sync::RwLock::new(None)), webdriver_session: std::sync::Arc::new(tokio::sync::RwLock::new(None)),
safaridriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)), webdriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)),
macax_controller: { macax_controller: {
std::sync::Arc::new(tokio::sync::RwLock::new(if macax_enabled { std::sync::Arc::new(tokio::sync::RwLock::new(if macax_enabled {
Some(g3_computer_control::MacAxController::new()?) Some(g3_computer_control::MacAxController::new()?)
@@ -3218,10 +3321,19 @@ impl<W: UiWriter> Agent<W> {
}, },
Tool { Tool {
name: "webdriver_get_page_source".to_string(), name: "webdriver_get_page_source".to_string(),
description: "Get the HTML source of the current page".to_string(), description: "Get the rendered HTML source of the current page. Returns the current DOM state after JavaScript execution.".to_string(),
input_schema: json!({ input_schema: json!({
"type": "object", "type": "object",
"properties": {}, "properties": {
"max_length": {
"type": "integer",
"description": "Maximum length of HTML to return (default: 10000, use 0 for no truncation)"
},
"save_to_file": {
"type": "string",
"description": "Optional file path to save the HTML instead of returning it inline"
}
},
"required": [] "required": []
}), }),
}, },
@@ -5426,21 +5538,24 @@ impl<W: UiWriter> Agent<W> {
} }
drop(session_guard); drop(session_guard);
// Determine which browser to use based on config
use g3_config::WebDriverBrowser;
match &self.config.webdriver.browser {
WebDriverBrowser::Safari => {
// Note: Safari Remote Automation must be enabled before using WebDriver. // Note: Safari Remote Automation must be enabled before using WebDriver.
// Run this once: safaridriver --enable // Run this once: safaridriver --enable
// Or enable manually: Safari → Develop → Allow Remote Automation // Or enable manually: Safari → Develop → Allow Remote Automation
// Start safaridriver process
let port = self.config.webdriver.safari_port; let port = self.config.webdriver.safari_port;
let safaridriver_result = tokio::process::Command::new("safaridriver") let driver_result = tokio::process::Command::new("safaridriver")
.arg("--port") .arg("--port")
.arg(port.to_string()) .arg(port.to_string())
.stdout(std::process::Stdio::null()) .stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null()) .stderr(std::process::Stdio::null())
.spawn(); .spawn();
let mut safaridriver_process = match safaridriver_result { let mut webdriver_process = match driver_result {
Ok(process) => process, Ok(process) => process,
Err(e) => { Err(e) => {
return Ok(format!("❌ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.", e)); return Ok(format!("❌ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.", e));
@@ -5453,22 +5568,55 @@ impl<W: UiWriter> Agent<W> {
// Connect to SafariDriver // Connect to SafariDriver
match g3_computer_control::SafariDriver::with_port(port).await { match g3_computer_control::SafariDriver::with_port(port).await {
Ok(driver) => { Ok(driver) => {
let session = std::sync::Arc::new(tokio::sync::Mutex::new(driver)); let session = std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Safari(driver)));
*self.webdriver_session.write().await = Some(session); *self.webdriver_session.write().await = Some(session);
*self.webdriver_process.write().await = Some(webdriver_process);
// Store the process handle
*self.safaridriver_process.write().await = Some(safaridriver_process);
Ok("✅ WebDriver session started successfully! Safari should open automatically.".to_string()) Ok("✅ WebDriver session started successfully! Safari should open automatically.".to_string())
} }
Err(e) => { Err(e) => {
// Kill the safaridriver process if connection failed let _ = webdriver_process.kill().await;
let _ = safaridriver_process.kill().await;
Ok(format!("❌ Failed to connect to SafariDriver: {}\n\nThis might be because:\n - Safari Remote Automation is not enabled (run: safaridriver --enable)\n - Port {} is already in use\n - Safari failed to start\n - Network connectivity issue\n\nTo enable Remote Automation:\n 1. Run: safaridriver --enable (requires password, one-time setup)\n 2. Or manually: Safari → Develop → Allow Remote Automation", e, port)) Ok(format!("❌ Failed to connect to SafariDriver: {}\n\nThis might be because:\n - Safari Remote Automation is not enabled (run: safaridriver --enable)\n - Port {} is already in use\n - Safari failed to start\n - Network connectivity issue\n\nTo enable Remote Automation:\n 1. Run: safaridriver --enable (requires password, one-time setup)\n 2. Or manually: Safari → Develop → Allow Remote Automation", e, port))
} }
} }
} }
WebDriverBrowser::ChromeHeadless => {
let port = self.config.webdriver.chrome_port;
// Start chromedriver process
let driver_result = tokio::process::Command::new("chromedriver")
.arg(format!("--port={}", port))
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.spawn();
let mut webdriver_process = match driver_result {
Ok(process) => process,
Err(e) => {
return Ok(format!("❌ Failed to start chromedriver: {}\n\nMake sure chromedriver is installed and in your PATH.\n\nInstall with:\n - macOS: brew install chromedriver\n - Linux: apt install chromium-chromedriver\n - Or download from: https://chromedriver.chromium.org/downloads", e));
}
};
// Wait for chromedriver to start up
tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
// Connect to ChromeDriver in headless mode
match g3_computer_control::ChromeDriver::with_port_headless(port).await {
Ok(driver) => {
let session = std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Chrome(driver)));
*self.webdriver_session.write().await = Some(session);
*self.webdriver_process.write().await = Some(webdriver_process);
Ok("✅ WebDriver session started successfully! Chrome is running in headless mode (no visible window).".to_string())
}
Err(e) => {
let _ = webdriver_process.kill().await;
Ok(format!("❌ Failed to connect to ChromeDriver: {}\n\nThis might be because:\n - Chrome is not installed\n - ChromeDriver version doesn't match Chrome version\n - Port {} is already in use\n\nMake sure Chrome and ChromeDriver are installed and compatible.", e, port))
}
}
}
}
}
"webdriver_navigate" => { "webdriver_navigate" => {
debug!("Processing webdriver_navigate tool call"); debug!("Processing webdriver_navigate tool call");
@@ -5756,6 +5904,19 @@ impl<W: UiWriter> Agent<W> {
); );
} }
// Extract optional parameters
let max_length = tool_call
.args
.get("max_length")
.and_then(|v| v.as_u64())
.map(|n| n as usize)
.unwrap_or(10000);
let save_to_file = tool_call
.args
.get("save_to_file")
.and_then(|v| v.as_str());
let session_guard = self.webdriver_session.read().await; let session_guard = self.webdriver_session.read().await;
let session = match session_guard.as_ref() { let session = match session_guard.as_ref() {
Some(s) => s.clone(), Some(s) => s.clone(),
@@ -5770,14 +5931,36 @@ impl<W: UiWriter> Agent<W> {
let driver = session.lock().await; let driver = session.lock().await;
match driver.page_source().await { match driver.page_source().await {
Ok(source) => { Ok(source) => {
// Truncate if too long // If save_to_file is specified, write to file
if source.len() > 10000 { if let Some(file_path) = save_to_file {
Ok(format!( let expanded_path = shellexpand::tilde(file_path);
"Page source ({} chars, truncated to 10000):\n{}...", let path_str = expanded_path.as_ref();
// Create parent directories if needed
if let Some(parent) = std::path::Path::new(path_str).parent() {
if let Err(e) = std::fs::create_dir_all(parent) {
return Ok(format!("❌ Failed to create directories: {}", e));
}
}
match std::fs::write(path_str, &source) {
Ok(_) => Ok(format!(
"✅ Page source ({} chars) saved to: {}",
source.len(), source.len(),
&source[..10000] path_str
)),
Err(e) => Ok(format!("❌ Failed to write file: {}", e)),
}
} else if max_length > 0 && source.len() > max_length {
// Truncate if max_length is set and source exceeds it
Ok(format!(
"Page source ({} chars, truncated to {}):\n{}...",
source.len(),
max_length,
&source[..max_length]
)) ))
} else { } else {
// Return full source
Ok(format!("Page source ({} chars):\n{}", source.len(), source)) Ok(format!("Page source ({} chars):\n{}", source.len(), source))
} }
} }
@@ -5918,7 +6101,7 @@ impl<W: UiWriter> Agent<W> {
// Kill the safaridriver process // Kill the safaridriver process
if let Some(mut process) = if let Some(mut process) =
self.safaridriver_process.write().await.take() self.webdriver_process.write().await.take()
{ {
if let Err(e) = process.kill().await { if let Err(e) = process.kill().await {
warn!("Failed to kill safaridriver process: {}", e); warn!("Failed to kill safaridriver process: {}", e);
@@ -6812,7 +6995,7 @@ impl<W: UiWriter> Drop for Agent<W> {
// Try to kill safaridriver process if it's still running // Try to kill safaridriver process if it's still running
// We need to use try_lock since we can't await in Drop // We need to use try_lock since we can't await in Drop
if let Ok(mut process_guard) = self.safaridriver_process.try_write() { if let Ok(mut process_guard) = self.webdriver_process.try_write() {
if let Some(process) = process_guard.take() { if let Some(process) = process_guard.take() {
// Use blocking kill since we can't await in Drop // Use blocking kill since we can't await in Drop
// This is a best-effort cleanup // This is a best-effort cleanup

View File

@@ -135,6 +135,24 @@ If you can complete it with 1-2 tool calls, skip TODO.
IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg. IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.
If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir. If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
# Web Research with WebDriver
When you need to look up documentation, search for resources, find data online, or simply search the web to complete your task, you have access to WebDriver browser automation tools.
**How to use WebDriver for research:**
1. Call `webdriver_start` to begin a browser session (runs Chrome headless by default - no visible window)
2. Use `webdriver_navigate` to go to URLs (search engines, documentation sites, etc.)
3. **IMPORTANT**: Always use `webdriver_get_page_source` with `save_to_file` parameter to save the page HTML to disk
4. Read the saved HTML file with `read_file` to extract the information you need
5. Call `webdriver_quit` when done
**Best practices:**
- Do NOT use `webdriver_screenshot` or try to decode page content visually - always save HTML to disk and read it
- Save pages to the `tmp/` subdirectory (e.g., `tmp/search_results.html`)
- Parse the HTML text content to find what you need
- For search engines, look for result links and titles in the HTML
- Close the WebDriver session when you're done to free resources
# Code Search Guidelines # Code Search Guidelines
IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg. IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.