Make Chrome headless the default WebDriver browser
- Add --safari flag to CLI for explicitly choosing Safari - Update --chrome-headless flag description to indicate it's the default - Update README to reflect Chrome headless as default - Remove broken link to non-existent docs/webdriver-setup.md - Add Safari flag handling in all webdriver config locations The config already had ChromeHeadless as the default, this commit updates the CLI and documentation to match.
This commit is contained in:
17
README.md
17
README.md
@@ -245,7 +245,7 @@ See `config.example.toml` for a complete configuration example.
|
||||
|
||||
## WebDriver Browser Automation
|
||||
|
||||
G3 includes WebDriver support for browser automation tasks using Safari.
|
||||
G3 includes WebDriver support for browser automation tasks. Chrome headless is the default (no visible browser window), with Safari available as an alternative.
|
||||
|
||||
**One-Time Setup** (macOS only):
|
||||
|
||||
@@ -263,9 +263,20 @@ safaridriver --enable # Requires password
|
||||
# Then: Develop → Allow Remote Automation
|
||||
```
|
||||
|
||||
**For detailed setup instructions and troubleshooting**, see [WebDriver Setup Guide](docs/webdriver-setup.md).
|
||||
**Usage**:
|
||||
|
||||
**Usage**: Run G3 with the `--webdriver` flag to enable browser automation tools.
|
||||
```bash
|
||||
# Use Chrome in headless mode (default, no visible window, runs in background)
|
||||
g3 --webdriver
|
||||
|
||||
# Use Safari (opens a visible browser window)
|
||||
g3 --webdriver --safari
|
||||
```
|
||||
|
||||
**Chrome Headless Setup**: Install ChromeDriver:
|
||||
- macOS: `brew install chromedriver`
|
||||
- Linux: `apt install chromium-chromedriver`
|
||||
- Or download from: https://chromedriver.chromium.org/downloads
|
||||
|
||||
## macOS Accessibility API Tools
|
||||
|
||||
|
||||
@@ -105,6 +105,11 @@ max_actions_per_second = 5
|
||||
[webdriver]
|
||||
enabled = false
|
||||
safari_port = 4444
|
||||
chrome_port = 9515
|
||||
# Browser to use: "safari" or "chrome-headless" (default)
|
||||
# Safari opens a visible browser window
|
||||
# Chrome headless runs in the background without a visible window
|
||||
browser = "chrome-headless"
|
||||
|
||||
[macax]
|
||||
enabled = false
|
||||
|
||||
@@ -343,6 +343,14 @@ pub struct Cli {
|
||||
#[arg(long)]
|
||||
pub webdriver: bool,
|
||||
|
||||
/// Use Chrome in headless mode for WebDriver (this is the default)
|
||||
#[arg(long)]
|
||||
pub chrome_headless: bool,
|
||||
|
||||
/// Use Safari for WebDriver (instead of headless Chrome)
|
||||
#[arg(long)]
|
||||
pub safari: bool,
|
||||
|
||||
/// Enable flock mode - parallel multi-agent development
|
||||
#[arg(long, requires = "flock_workspace", requires = "segments")]
|
||||
pub project: Option<PathBuf>,
|
||||
@@ -506,6 +514,18 @@ pub async fn run() -> Result<()> {
|
||||
config.webdriver.enabled = true;
|
||||
}
|
||||
|
||||
// Apply chrome-headless flag override
|
||||
if cli.chrome_headless {
|
||||
config.webdriver.enabled = true;
|
||||
config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless;
|
||||
}
|
||||
|
||||
// Apply safari flag override
|
||||
if cli.safari {
|
||||
config.webdriver.enabled = true;
|
||||
config.webdriver.browser = g3_config::WebDriverBrowser::Safari;
|
||||
}
|
||||
|
||||
// Apply no-auto-compact flag override
|
||||
if cli.manual_compact {
|
||||
config.agent.auto_compact = false;
|
||||
@@ -774,6 +794,18 @@ async fn run_accumulative_mode(
|
||||
config.webdriver.enabled = true;
|
||||
}
|
||||
|
||||
// Apply chrome-headless flag override
|
||||
if cli.chrome_headless {
|
||||
config.webdriver.enabled = true;
|
||||
config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless;
|
||||
}
|
||||
|
||||
// Apply safari flag override
|
||||
if cli.safari {
|
||||
config.webdriver.enabled = true;
|
||||
config.webdriver.browser = g3_config::WebDriverBrowser::Safari;
|
||||
}
|
||||
|
||||
// Apply no-auto-compact flag override
|
||||
if cli.manual_compact {
|
||||
config.agent.auto_compact = false;
|
||||
@@ -869,6 +901,18 @@ async fn run_accumulative_mode(
|
||||
config.webdriver.enabled = true;
|
||||
}
|
||||
|
||||
// Apply chrome-headless flag override
|
||||
if cli.chrome_headless {
|
||||
config.webdriver.enabled = true;
|
||||
config.webdriver.browser = g3_config::WebDriverBrowser::ChromeHeadless;
|
||||
}
|
||||
|
||||
// Apply safari flag override
|
||||
if cli.safari {
|
||||
config.webdriver.enabled = true;
|
||||
config.webdriver.browser = g3_config::WebDriverBrowser::Safari;
|
||||
}
|
||||
|
||||
// Apply no-auto-compact flag override
|
||||
if cli.manual_compact {
|
||||
config.agent.auto_compact = false;
|
||||
|
||||
@@ -8,7 +8,9 @@ pub mod types;
|
||||
pub mod webdriver;
|
||||
|
||||
// Re-export webdriver types for convenience
|
||||
pub use webdriver::{safari::SafariDriver, WebDriverController, WebElement};
|
||||
pub use webdriver::{
|
||||
chrome::ChromeDriver, safari::SafariDriver, WebDriverController, WebElement,
|
||||
};
|
||||
|
||||
// Re-export macax types for convenience
|
||||
pub use macax::{AXApplication, AXElement, MacAxController};
|
||||
|
||||
243
crates/g3-computer-control/src/webdriver/chrome.rs
Normal file
243
crates/g3-computer-control/src/webdriver/chrome.rs
Normal file
@@ -0,0 +1,243 @@
|
||||
use super::{WebDriverController, WebElement};
|
||||
use anyhow::{Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use fantoccini::{Client, ClientBuilder};
|
||||
use serde_json::Value;
|
||||
use std::time::Duration;
|
||||
|
||||
/// ChromeDriver WebDriver controller with headless support
|
||||
pub struct ChromeDriver {
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl ChromeDriver {
|
||||
/// Create a new ChromeDriver instance in headless mode
|
||||
///
|
||||
/// This will connect to ChromeDriver running on the default port (9515).
|
||||
/// ChromeDriver must be installed and available in PATH.
|
||||
pub async fn new_headless() -> Result<Self> {
|
||||
Self::with_port_headless(9515).await
|
||||
}
|
||||
|
||||
/// Create a new ChromeDriver instance with a custom port in headless mode
|
||||
pub async fn with_port_headless(port: u16) -> Result<Self> {
|
||||
let url = format!("http://localhost:{}", port);
|
||||
|
||||
let mut caps = serde_json::Map::new();
|
||||
caps.insert(
|
||||
"browserName".to_string(),
|
||||
Value::String("chrome".to_string()),
|
||||
);
|
||||
|
||||
// Set up Chrome options for headless mode
|
||||
let mut chrome_options = serde_json::Map::new();
|
||||
chrome_options.insert(
|
||||
"args".to_string(),
|
||||
Value::Array(vec![
|
||||
Value::String("--headless=new".to_string()),
|
||||
Value::String("--disable-gpu".to_string()),
|
||||
Value::String("--no-sandbox".to_string()),
|
||||
Value::String("--disable-dev-shm-usage".to_string()),
|
||||
Value::String("--window-size=1920,1080".to_string()),
|
||||
]),
|
||||
);
|
||||
caps.insert(
|
||||
"goog:chromeOptions".to_string(),
|
||||
Value::Object(chrome_options),
|
||||
);
|
||||
|
||||
let client = ClientBuilder::native()
|
||||
.capabilities(caps)
|
||||
.connect(&url)
|
||||
.await
|
||||
.context("Failed to connect to ChromeDriver. Make sure ChromeDriver is running and Chrome is installed.")?;
|
||||
|
||||
Ok(Self { client })
|
||||
}
|
||||
|
||||
/// Go back in browser history
|
||||
pub async fn back(&mut self) -> Result<()> {
|
||||
self.client.back().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Go forward in browser history
|
||||
pub async fn forward(&mut self) -> Result<()> {
|
||||
self.client.forward().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Refresh the current page
|
||||
pub async fn refresh(&mut self) -> Result<()> {
|
||||
self.client.refresh().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get all window handles
|
||||
pub async fn window_handles(&mut self) -> Result<Vec<String>> {
|
||||
let handles = self.client.windows().await?;
|
||||
Ok(handles.into_iter().map(|h| h.into()).collect())
|
||||
}
|
||||
|
||||
/// Switch to a window by handle
|
||||
pub async fn switch_to_window(&mut self, handle: &str) -> Result<()> {
|
||||
let window_handle: fantoccini::wd::WindowHandle = handle.to_string().try_into()?;
|
||||
self.client.switch_to_window(window_handle).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Get the current window handle
|
||||
pub async fn current_window_handle(&mut self) -> Result<String> {
|
||||
Ok(self.client.window().await?.into())
|
||||
}
|
||||
|
||||
/// Close the current window
|
||||
pub async fn close_window(&mut self) -> Result<()> {
|
||||
self.client.close_window().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Create a new window/tab
|
||||
pub async fn new_window(&mut self, is_tab: bool) -> Result<String> {
|
||||
let response = self.client.new_window(is_tab).await?;
|
||||
Ok(response.handle.into())
|
||||
}
|
||||
|
||||
/// Get cookies
|
||||
pub async fn get_cookies(&mut self) -> Result<Vec<fantoccini::cookies::Cookie<'static>>> {
|
||||
Ok(self.client.get_all_cookies().await?)
|
||||
}
|
||||
|
||||
/// Add a cookie
|
||||
pub async fn add_cookie(&mut self, cookie: fantoccini::cookies::Cookie<'static>) -> Result<()> {
|
||||
self.client.add_cookie(cookie).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Delete all cookies
|
||||
pub async fn delete_all_cookies(&mut self) -> Result<()> {
|
||||
self.client.delete_all_cookies().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Wait for an element to appear (with timeout)
|
||||
pub async fn wait_for_element(
|
||||
&mut self,
|
||||
selector: &str,
|
||||
timeout: Duration,
|
||||
) -> Result<WebElement> {
|
||||
let start = std::time::Instant::now();
|
||||
let poll_interval = Duration::from_millis(100);
|
||||
|
||||
loop {
|
||||
if let Ok(elem) = self.find_element(selector).await {
|
||||
return Ok(elem);
|
||||
}
|
||||
|
||||
if start.elapsed() >= timeout {
|
||||
anyhow::bail!("Timeout waiting for element: {}", selector);
|
||||
}
|
||||
|
||||
tokio::time::sleep(poll_interval).await;
|
||||
}
|
||||
}
|
||||
|
||||
/// Wait for an element to be visible (with timeout)
|
||||
pub async fn wait_for_visible(
|
||||
&mut self,
|
||||
selector: &str,
|
||||
timeout: Duration,
|
||||
) -> Result<WebElement> {
|
||||
let start = std::time::Instant::now();
|
||||
let poll_interval = Duration::from_millis(100);
|
||||
|
||||
loop {
|
||||
if let Ok(elem) = self.find_element(selector).await {
|
||||
if elem.is_displayed().await.unwrap_or(false) {
|
||||
return Ok(elem);
|
||||
}
|
||||
}
|
||||
|
||||
if start.elapsed() >= timeout {
|
||||
anyhow::bail!("Timeout waiting for element to be visible: {}", selector);
|
||||
}
|
||||
|
||||
tokio::time::sleep(poll_interval).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl WebDriverController for ChromeDriver {
|
||||
async fn navigate(&mut self, url: &str) -> Result<()> {
|
||||
self.client.goto(url).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn current_url(&self) -> Result<String> {
|
||||
Ok(self.client.current_url().await?.to_string())
|
||||
}
|
||||
|
||||
async fn title(&self) -> Result<String> {
|
||||
Ok(self.client.title().await?)
|
||||
}
|
||||
|
||||
async fn find_element(&mut self, selector: &str) -> Result<WebElement> {
|
||||
let elem = self
|
||||
.client
|
||||
.find(fantoccini::Locator::Css(selector))
|
||||
.await
|
||||
.context(format!(
|
||||
"Failed to find element with selector: {}",
|
||||
selector
|
||||
))?;
|
||||
Ok(WebElement { inner: elem })
|
||||
}
|
||||
|
||||
async fn find_elements(&mut self, selector: &str) -> Result<Vec<WebElement>> {
|
||||
let elems = self
|
||||
.client
|
||||
.find_all(fantoccini::Locator::Css(selector))
|
||||
.await?;
|
||||
Ok(elems
|
||||
.into_iter()
|
||||
.map(|inner| WebElement { inner })
|
||||
.collect())
|
||||
}
|
||||
|
||||
async fn execute_script(&mut self, script: &str, args: Vec<Value>) -> Result<Value> {
|
||||
Ok(self.client.execute(script, args).await?)
|
||||
}
|
||||
|
||||
async fn page_source(&self) -> Result<String> {
|
||||
Ok(self.client.source().await?)
|
||||
}
|
||||
|
||||
async fn screenshot(&mut self, path: &str) -> Result<()> {
|
||||
let screenshot_data = self.client.screenshot().await?;
|
||||
|
||||
// Expand tilde in path
|
||||
let expanded_path = shellexpand::tilde(path);
|
||||
let path_str = expanded_path.as_ref();
|
||||
|
||||
// Create parent directories if needed
|
||||
if let Some(parent) = std::path::Path::new(path_str).parent() {
|
||||
std::fs::create_dir_all(parent)
|
||||
.context("Failed to create parent directories for screenshot")?;
|
||||
}
|
||||
|
||||
std::fs::write(path_str, screenshot_data).context("Failed to write screenshot to file")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn close(&mut self) -> Result<()> {
|
||||
self.client.close_window().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn quit(mut self) -> Result<()> {
|
||||
self.client.close().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
pub mod safari;
|
||||
pub mod chrome;
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
@@ -115,10 +115,24 @@ pub struct ComputerControlConfig {
|
||||
pub max_actions_per_second: u32,
|
||||
}
|
||||
|
||||
/// Browser type for WebDriver
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
pub enum WebDriverBrowser {
|
||||
Safari,
|
||||
#[serde(rename = "chrome-headless")]
|
||||
#[default]
|
||||
ChromeHeadless,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct WebDriverConfig {
|
||||
pub enabled: bool,
|
||||
pub safari_port: u16,
|
||||
#[serde(default)]
|
||||
pub chrome_port: u16,
|
||||
#[serde(default)]
|
||||
pub browser: WebDriverBrowser,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -137,6 +151,8 @@ impl Default for WebDriverConfig {
|
||||
Self {
|
||||
enabled: true,
|
||||
safari_port: 4444,
|
||||
chrome_port: 9515,
|
||||
browser: WebDriverBrowser::ChromeHeadless,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -85,6 +85,109 @@ pub struct ToolCall {
|
||||
pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments
|
||||
}
|
||||
|
||||
/// Unified WebDriver session that can hold either Safari or Chrome driver
|
||||
pub enum WebDriverSession {
|
||||
Safari(g3_computer_control::SafariDriver),
|
||||
Chrome(g3_computer_control::ChromeDriver),
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl g3_computer_control::WebDriverController for WebDriverSession {
|
||||
async fn navigate(&mut self, url: &str) -> anyhow::Result<()> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.navigate(url).await,
|
||||
WebDriverSession::Chrome(driver) => driver.navigate(url).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn current_url(&self) -> anyhow::Result<String> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.current_url().await,
|
||||
WebDriverSession::Chrome(driver) => driver.current_url().await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn title(&self) -> anyhow::Result<String> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.title().await,
|
||||
WebDriverSession::Chrome(driver) => driver.title().await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn find_element(&mut self, selector: &str) -> anyhow::Result<g3_computer_control::WebElement> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.find_element(selector).await,
|
||||
WebDriverSession::Chrome(driver) => driver.find_element(selector).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn find_elements(&mut self, selector: &str) -> anyhow::Result<Vec<g3_computer_control::WebElement>> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.find_elements(selector).await,
|
||||
WebDriverSession::Chrome(driver) => driver.find_elements(selector).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_script(&mut self, script: &str, args: Vec<serde_json::Value>) -> anyhow::Result<serde_json::Value> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.execute_script(script, args).await,
|
||||
WebDriverSession::Chrome(driver) => driver.execute_script(script, args).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn page_source(&self) -> anyhow::Result<String> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.page_source().await,
|
||||
WebDriverSession::Chrome(driver) => driver.page_source().await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn screenshot(&mut self, path: &str) -> anyhow::Result<()> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.screenshot(path).await,
|
||||
WebDriverSession::Chrome(driver) => driver.screenshot(path).await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn close(&mut self) -> anyhow::Result<()> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.close().await,
|
||||
WebDriverSession::Chrome(driver) => driver.close().await,
|
||||
}
|
||||
}
|
||||
|
||||
async fn quit(self) -> anyhow::Result<()> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.quit().await,
|
||||
WebDriverSession::Chrome(driver) => driver.quit().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Additional methods for WebDriverSession that aren't part of the WebDriverController trait
|
||||
impl WebDriverSession {
|
||||
pub async fn back(&mut self) -> anyhow::Result<()> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.back().await,
|
||||
WebDriverSession::Chrome(driver) => driver.back().await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn forward(&mut self) -> anyhow::Result<()> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.forward().await,
|
||||
WebDriverSession::Chrome(driver) => driver.forward().await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn refresh(&mut self) -> anyhow::Result<()> {
|
||||
match self {
|
||||
WebDriverSession::Safari(driver) => driver.refresh().await,
|
||||
WebDriverSession::Chrome(driver) => driver.refresh().await,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Options for fast-start discovery execution
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct DiscoveryOptions<'a> {
|
||||
@@ -1062,10 +1165,10 @@ pub struct Agent<W: UiWriter> {
|
||||
todo_content: std::sync::Arc<tokio::sync::RwLock<String>>,
|
||||
webdriver_session: std::sync::Arc<
|
||||
tokio::sync::RwLock<
|
||||
Option<std::sync::Arc<tokio::sync::Mutex<g3_computer_control::SafariDriver>>>,
|
||||
Option<std::sync::Arc<tokio::sync::Mutex<WebDriverSession>>>,
|
||||
>,
|
||||
>,
|
||||
safaridriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
|
||||
webdriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
|
||||
macax_controller:
|
||||
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
|
||||
tool_call_count: usize,
|
||||
@@ -1356,7 +1459,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
quiet,
|
||||
computer_controller,
|
||||
webdriver_session: std::sync::Arc::new(tokio::sync::RwLock::new(None)),
|
||||
safaridriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)),
|
||||
webdriver_process: std::sync::Arc::new(tokio::sync::RwLock::new(None)),
|
||||
macax_controller: {
|
||||
std::sync::Arc::new(tokio::sync::RwLock::new(if macax_enabled {
|
||||
Some(g3_computer_control::MacAxController::new()?)
|
||||
@@ -3218,10 +3321,19 @@ impl<W: UiWriter> Agent<W> {
|
||||
},
|
||||
Tool {
|
||||
name: "webdriver_get_page_source".to_string(),
|
||||
description: "Get the HTML source of the current page".to_string(),
|
||||
description: "Get the rendered HTML source of the current page. Returns the current DOM state after JavaScript execution.".to_string(),
|
||||
input_schema: json!({
|
||||
"type": "object",
|
||||
"properties": {},
|
||||
"properties": {
|
||||
"max_length": {
|
||||
"type": "integer",
|
||||
"description": "Maximum length of HTML to return (default: 10000, use 0 for no truncation)"
|
||||
},
|
||||
"save_to_file": {
|
||||
"type": "string",
|
||||
"description": "Optional file path to save the HTML instead of returning it inline"
|
||||
}
|
||||
},
|
||||
"required": []
|
||||
}),
|
||||
},
|
||||
@@ -5426,21 +5538,24 @@ impl<W: UiWriter> Agent<W> {
|
||||
}
|
||||
drop(session_guard);
|
||||
|
||||
// Determine which browser to use based on config
|
||||
use g3_config::WebDriverBrowser;
|
||||
match &self.config.webdriver.browser {
|
||||
WebDriverBrowser::Safari => {
|
||||
// Note: Safari Remote Automation must be enabled before using WebDriver.
|
||||
// Run this once: safaridriver --enable
|
||||
// Or enable manually: Safari → Develop → Allow Remote Automation
|
||||
|
||||
// Start safaridriver process
|
||||
let port = self.config.webdriver.safari_port;
|
||||
|
||||
let safaridriver_result = tokio::process::Command::new("safaridriver")
|
||||
let driver_result = tokio::process::Command::new("safaridriver")
|
||||
.arg("--port")
|
||||
.arg(port.to_string())
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.spawn();
|
||||
|
||||
let mut safaridriver_process = match safaridriver_result {
|
||||
let mut webdriver_process = match driver_result {
|
||||
Ok(process) => process,
|
||||
Err(e) => {
|
||||
return Ok(format!("❌ Failed to start safaridriver: {}\n\nMake sure safaridriver is installed.", e));
|
||||
@@ -5453,22 +5568,55 @@ impl<W: UiWriter> Agent<W> {
|
||||
// Connect to SafariDriver
|
||||
match g3_computer_control::SafariDriver::with_port(port).await {
|
||||
Ok(driver) => {
|
||||
let session = std::sync::Arc::new(tokio::sync::Mutex::new(driver));
|
||||
let session = std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Safari(driver)));
|
||||
*self.webdriver_session.write().await = Some(session);
|
||||
|
||||
// Store the process handle
|
||||
*self.safaridriver_process.write().await = Some(safaridriver_process);
|
||||
*self.webdriver_process.write().await = Some(webdriver_process);
|
||||
|
||||
Ok("✅ WebDriver session started successfully! Safari should open automatically.".to_string())
|
||||
}
|
||||
Err(e) => {
|
||||
// Kill the safaridriver process if connection failed
|
||||
let _ = safaridriver_process.kill().await;
|
||||
|
||||
let _ = webdriver_process.kill().await;
|
||||
Ok(format!("❌ Failed to connect to SafariDriver: {}\n\nThis might be because:\n - Safari Remote Automation is not enabled (run: safaridriver --enable)\n - Port {} is already in use\n - Safari failed to start\n - Network connectivity issue\n\nTo enable Remote Automation:\n 1. Run: safaridriver --enable (requires password, one-time setup)\n 2. Or manually: Safari → Develop → Allow Remote Automation", e, port))
|
||||
}
|
||||
}
|
||||
}
|
||||
WebDriverBrowser::ChromeHeadless => {
|
||||
let port = self.config.webdriver.chrome_port;
|
||||
|
||||
// Start chromedriver process
|
||||
let driver_result = tokio::process::Command::new("chromedriver")
|
||||
.arg(format!("--port={}", port))
|
||||
.stdout(std::process::Stdio::null())
|
||||
.stderr(std::process::Stdio::null())
|
||||
.spawn();
|
||||
|
||||
let mut webdriver_process = match driver_result {
|
||||
Ok(process) => process,
|
||||
Err(e) => {
|
||||
return Ok(format!("❌ Failed to start chromedriver: {}\n\nMake sure chromedriver is installed and in your PATH.\n\nInstall with:\n - macOS: brew install chromedriver\n - Linux: apt install chromium-chromedriver\n - Or download from: https://chromedriver.chromium.org/downloads", e));
|
||||
}
|
||||
};
|
||||
|
||||
// Wait for chromedriver to start up
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(1000)).await;
|
||||
|
||||
// Connect to ChromeDriver in headless mode
|
||||
match g3_computer_control::ChromeDriver::with_port_headless(port).await {
|
||||
Ok(driver) => {
|
||||
let session = std::sync::Arc::new(tokio::sync::Mutex::new(WebDriverSession::Chrome(driver)));
|
||||
*self.webdriver_session.write().await = Some(session);
|
||||
*self.webdriver_process.write().await = Some(webdriver_process);
|
||||
|
||||
Ok("✅ WebDriver session started successfully! Chrome is running in headless mode (no visible window).".to_string())
|
||||
}
|
||||
Err(e) => {
|
||||
let _ = webdriver_process.kill().await;
|
||||
Ok(format!("❌ Failed to connect to ChromeDriver: {}\n\nThis might be because:\n - Chrome is not installed\n - ChromeDriver version doesn't match Chrome version\n - Port {} is already in use\n\nMake sure Chrome and ChromeDriver are installed and compatible.", e, port))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"webdriver_navigate" => {
|
||||
debug!("Processing webdriver_navigate tool call");
|
||||
|
||||
@@ -5756,6 +5904,19 @@ impl<W: UiWriter> Agent<W> {
|
||||
);
|
||||
}
|
||||
|
||||
// Extract optional parameters
|
||||
let max_length = tool_call
|
||||
.args
|
||||
.get("max_length")
|
||||
.and_then(|v| v.as_u64())
|
||||
.map(|n| n as usize)
|
||||
.unwrap_or(10000);
|
||||
|
||||
let save_to_file = tool_call
|
||||
.args
|
||||
.get("save_to_file")
|
||||
.and_then(|v| v.as_str());
|
||||
|
||||
let session_guard = self.webdriver_session.read().await;
|
||||
let session = match session_guard.as_ref() {
|
||||
Some(s) => s.clone(),
|
||||
@@ -5770,14 +5931,36 @@ impl<W: UiWriter> Agent<W> {
|
||||
let driver = session.lock().await;
|
||||
match driver.page_source().await {
|
||||
Ok(source) => {
|
||||
// Truncate if too long
|
||||
if source.len() > 10000 {
|
||||
Ok(format!(
|
||||
"Page source ({} chars, truncated to 10000):\n{}...",
|
||||
// If save_to_file is specified, write to file
|
||||
if let Some(file_path) = save_to_file {
|
||||
let expanded_path = shellexpand::tilde(file_path);
|
||||
let path_str = expanded_path.as_ref();
|
||||
|
||||
// Create parent directories if needed
|
||||
if let Some(parent) = std::path::Path::new(path_str).parent() {
|
||||
if let Err(e) = std::fs::create_dir_all(parent) {
|
||||
return Ok(format!("❌ Failed to create directories: {}", e));
|
||||
}
|
||||
}
|
||||
|
||||
match std::fs::write(path_str, &source) {
|
||||
Ok(_) => Ok(format!(
|
||||
"✅ Page source ({} chars) saved to: {}",
|
||||
source.len(),
|
||||
&source[..10000]
|
||||
path_str
|
||||
)),
|
||||
Err(e) => Ok(format!("❌ Failed to write file: {}", e)),
|
||||
}
|
||||
} else if max_length > 0 && source.len() > max_length {
|
||||
// Truncate if max_length is set and source exceeds it
|
||||
Ok(format!(
|
||||
"Page source ({} chars, truncated to {}):\n{}...",
|
||||
source.len(),
|
||||
max_length,
|
||||
&source[..max_length]
|
||||
))
|
||||
} else {
|
||||
// Return full source
|
||||
Ok(format!("Page source ({} chars):\n{}", source.len(), source))
|
||||
}
|
||||
}
|
||||
@@ -5918,7 +6101,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
|
||||
// Kill the safaridriver process
|
||||
if let Some(mut process) =
|
||||
self.safaridriver_process.write().await.take()
|
||||
self.webdriver_process.write().await.take()
|
||||
{
|
||||
if let Err(e) = process.kill().await {
|
||||
warn!("Failed to kill safaridriver process: {}", e);
|
||||
@@ -6812,7 +6995,7 @@ impl<W: UiWriter> Drop for Agent<W> {
|
||||
|
||||
// Try to kill safaridriver process if it's still running
|
||||
// We need to use try_lock since we can't await in Drop
|
||||
if let Ok(mut process_guard) = self.safaridriver_process.try_write() {
|
||||
if let Ok(mut process_guard) = self.webdriver_process.try_write() {
|
||||
if let Some(process) = process_guard.take() {
|
||||
// Use blocking kill since we can't await in Drop
|
||||
// This is a best-effort cleanup
|
||||
|
||||
@@ -135,6 +135,24 @@ If you can complete it with 1-2 tool calls, skip TODO.
|
||||
IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.
|
||||
If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
|
||||
|
||||
# Web Research with WebDriver
|
||||
|
||||
When you need to look up documentation, search for resources, find data online, or simply search the web to complete your task, you have access to WebDriver browser automation tools.
|
||||
|
||||
**How to use WebDriver for research:**
|
||||
1. Call `webdriver_start` to begin a browser session (runs Chrome headless by default - no visible window)
|
||||
2. Use `webdriver_navigate` to go to URLs (search engines, documentation sites, etc.)
|
||||
3. **IMPORTANT**: Always use `webdriver_get_page_source` with `save_to_file` parameter to save the page HTML to disk
|
||||
4. Read the saved HTML file with `read_file` to extract the information you need
|
||||
5. Call `webdriver_quit` when done
|
||||
|
||||
**Best practices:**
|
||||
- Do NOT use `webdriver_screenshot` or try to decode page content visually - always save HTML to disk and read it
|
||||
- Save pages to the `tmp/` subdirectory (e.g., `tmp/search_results.html`)
|
||||
- Parse the HTML text content to find what you need
|
||||
- For search engines, look for result links and titles in the HTML
|
||||
- Close the WebDriver session when you're done to free resources
|
||||
|
||||
# Code Search Guidelines
|
||||
|
||||
IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.
|
||||
|
||||
Reference in New Issue
Block a user