From 60aeb67c56e07968c0362c3c1a4d413f5be260c3 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Sat, 10 Jan 2026 20:34:14 +1100 Subject: [PATCH] Add stealth mode for Chrome headless to evade bot detection Implements comprehensive anti-detection measures: - Override navigator.webdriver to return undefined - Inject fake chrome.runtime, chrome.loadTimes, chrome.csi objects - Add realistic plugins and mimeTypes arrays - Patch permissions API to hide automation - Set realistic navigator properties (languages, hardwareConcurrency, deviceMemory) - Remove ChromeDriver-specific window properties (cdc_*) - Patch Function.prototype.toString to hide modifications - Add Chrome flags: --disable-blink-features=AutomationControlled - Set realistic user-agent without HeadlessChrome identifier - Exclude 'enable-automation' switch Tested against bot detection sites: - bot.sannysoft.com: All major tests pass - Search engines: Works with DuckDuckGo, Yahoo, Brave, Startpage - Still detected by: Google reCAPTCHA, Cloudflare Turnstile, Bing --- config.example.toml | 4 + .../src/webdriver/chrome.rs | 160 +++++++++++++++++- crates/g3-config/src/lib.rs | 5 + crates/g3-core/src/tools/webdriver.rs | 10 +- scripts/setup-chrome-for-testing.sh | 9 +- 5 files changed, 183 insertions(+), 5 deletions(-) diff --git a/config.example.toml b/config.example.toml index 8c47b5c..9b7b75c 100644 --- a/config.example.toml +++ b/config.example.toml @@ -116,6 +116,10 @@ browser = "safari" # Run: ./scripts/setup-chrome-for-testing.sh to install matching versions # chrome_binary = "/Users/yourname/.chrome-for-testing/chrome-mac-arm64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" # chrome_binary = "/Users/yourname/.chrome-for-testing/chrome-mac-x64/Google Chrome for Testing.app/Contents/MacOS/Google Chrome for Testing" +# Optional: Path to ChromeDriver binary +# If not set, looks for 'chromedriver' in PATH +# The setup script creates a symlink at ~/.local/bin/chromedriver +# chromedriver_binary = "/Users/yourname/.local/bin/chromedriver" [macax] enabled = false diff --git a/crates/g3-computer-control/src/webdriver/chrome.rs b/crates/g3-computer-control/src/webdriver/chrome.rs index 6830ab3..d96fd6a 100644 --- a/crates/g3-computer-control/src/webdriver/chrome.rs +++ b/crates/g3-computer-control/src/webdriver/chrome.rs @@ -10,6 +10,131 @@ pub struct ChromeDriver { client: Client, } +/// Stealth script to hide automation indicators from bot detection +const STEALTH_SCRIPT: &str = r#" + (function() { + 'use strict'; + + // 1. Override navigator.webdriver to return undefined (like a real browser) + Object.defineProperty(navigator, 'webdriver', { + get: () => undefined, + configurable: true + }); + + // 2. Add realistic chrome object that real Chrome has + if (!window.chrome) { + window.chrome = {}; + } + window.chrome.runtime = { + connect: function() {}, + sendMessage: function() {}, + onMessage: { addListener: function() {} }, + onConnect: { addListener: function() {} }, + id: undefined + }; + window.chrome.loadTimes = function() { + return { + commitLoadTime: Date.now() / 1000, + connectionInfo: 'h2', + finishDocumentLoadTime: Date.now() / 1000, + finishLoadTime: Date.now() / 1000, + firstPaintAfterLoadTime: 0, + firstPaintTime: Date.now() / 1000, + navigationType: 'Other', + npnNegotiatedProtocol: 'h2', + requestTime: Date.now() / 1000, + startLoadTime: Date.now() / 1000, + wasAlternateProtocolAvailable: false, + wasFetchedViaSpdy: true, + wasNpnNegotiated: true + }; + }; + window.chrome.csi = function() { + return { + onloadT: Date.now(), + pageT: Date.now() - performance.timing.navigationStart, + startE: performance.timing.navigationStart, + tran: 15 + }; + }; + + // 3. Add realistic plugins array (headless Chrome has empty plugins) + Object.defineProperty(navigator, 'plugins', { + get: () => { + const plugins = [ + { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' }, + { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' }, + { name: 'Native Client', filename: 'internal-nacl-plugin', description: '' } + ]; + plugins.item = (i) => plugins[i] || null; + plugins.namedItem = (name) => plugins.find(p => p.name === name) || null; + plugins.refresh = () => {}; + Object.setPrototypeOf(plugins, PluginArray.prototype); + return plugins; + }, + configurable: true + }); + + // 4. Add realistic mimeTypes + Object.defineProperty(navigator, 'mimeTypes', { + get: () => { + const mimeTypes = [ + { type: 'application/pdf', suffixes: 'pdf', description: 'Portable Document Format' }, + { type: 'application/x-google-chrome-pdf', suffixes: 'pdf', description: 'Portable Document Format' } + ]; + mimeTypes.item = (i) => mimeTypes[i] || null; + mimeTypes.namedItem = (name) => mimeTypes.find(m => m.type === name) || null; + Object.setPrototypeOf(mimeTypes, MimeTypeArray.prototype); + return mimeTypes; + }, + configurable: true + }); + + // 5. Fix permissions API to not reveal automation + const originalQuery = window.navigator.permissions?.query; + if (originalQuery) { + window.navigator.permissions.query = (parameters) => { + if (parameters.name === 'notifications') { + return Promise.resolve({ state: Notification.permission, onchange: null }); + } + return originalQuery.call(window.navigator.permissions, parameters); + }; + } + + // 6. Override languages to have realistic values + Object.defineProperty(navigator, 'languages', { + get: () => ['en-US', 'en'], + configurable: true + }); + + // 7. Fix hardwareConcurrency (headless often shows different values) + Object.defineProperty(navigator, 'hardwareConcurrency', { + get: () => 8, + configurable: true + }); + + // 8. Fix deviceMemory + Object.defineProperty(navigator, 'deviceMemory', { + get: () => 8, + configurable: true + }); + + // 9. Remove automation-related properties from window + delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array; + delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise; + delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol; + + // 10. Fix toString methods to not reveal native code modifications + const originalToString = Function.prototype.toString; + Function.prototype.toString = function() { + if (this === navigator.permissions.query) { + return 'function query() { [native code] }'; + } + return originalToString.call(this); + }; + })(); +"#; + impl ChromeDriver { /// Create a new ChromeDriver instance in headless mode /// @@ -51,9 +176,32 @@ impl ChromeDriver { Value::String("--no-sandbox".to_string()), Value::String("--disable-dev-shm-usage".to_string()), Value::String("--window-size=1920,1080".to_string()), + Value::String("--disable-blink-features=AutomationControlled".to_string()), + // Stealth: Set a realistic user-agent (removes HeadlessChrome identifier) + Value::String("--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36".to_string()), + // Stealth: Disable automation-related info bars + Value::String("--disable-infobars".to_string()), + // Stealth: Set realistic language + Value::String("--lang=en-US,en".to_string()), + // Stealth: Disable extensions to avoid detection + Value::String("--disable-extensions".to_string()), ]), ); + // Exclude automation switches to hide webdriver detection + chrome_options.insert( + "excludeSwitches".to_string(), + Value::Array(vec![ + Value::String("enable-automation".to_string()), + ]), + ); + + // Disable automation extension + chrome_options.insert( + "useAutomationExtension".to_string(), + Value::Bool(false), + ); + // If a custom Chrome binary is specified, use it if let Some(binary) = chrome_binary { chrome_options.insert("binary".to_string(), Value::String(binary.to_string())); @@ -75,7 +223,14 @@ impl ChromeDriver { .context("Connection to ChromeDriver timed out after 30 seconds")? .context("Failed to connect to ChromeDriver")?; - Ok(Self { client }) + let driver = Self { client }; + + // Inject stealth script immediately after connection + // This ensures it runs before any navigation and on every new document + // Ignore errors as this is best-effort stealth + let _ = driver.client.execute(STEALTH_SCRIPT, vec![]).await; + + Ok(driver) } /// Go back in browser history @@ -194,6 +349,9 @@ impl ChromeDriver { impl WebDriverController for ChromeDriver { async fn navigate(&mut self, url: &str) -> Result<()> { self.client.goto(url).await?; + // Inject stealth script after navigation to hide automation indicators + // Ignore errors as some pages may have strict CSP + let _ = self.client.execute(STEALTH_SCRIPT, vec![]).await; Ok(()) } diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs index 7f86d4f..d1abdf2 100644 --- a/crates/g3-config/src/lib.rs +++ b/crates/g3-config/src/lib.rs @@ -134,6 +134,10 @@ pub struct WebDriverConfig { /// If not set, ChromeDriver will use the default Chrome installation pub chrome_binary: Option, #[serde(default)] + /// Optional path to ChromeDriver binary + /// If not set, looks for 'chromedriver' in PATH + pub chromedriver_binary: Option, + #[serde(default)] pub browser: WebDriverBrowser, } @@ -144,6 +148,7 @@ impl Default for WebDriverConfig { safari_port: 4444, chrome_port: 9515, chrome_binary: None, + chromedriver_binary: None, browser: WebDriverBrowser::Safari, } } diff --git a/crates/g3-core/src/tools/webdriver.rs b/crates/g3-core/src/tools/webdriver.rs index c43b4bc..c00350c 100644 --- a/crates/g3-core/src/tools/webdriver.rs +++ b/crates/g3-core/src/tools/webdriver.rs @@ -119,8 +119,16 @@ async fn start_safari_driver(ctx: &ToolContext<'_, W>) -> Result(ctx: &ToolContext<'_, W>) -> Result { let port = ctx.config.webdriver.chrome_port; + // Use configured chromedriver binary or fall back to 'chromedriver' in PATH + let chromedriver_cmd = ctx + .config + .webdriver + .chromedriver_binary + .as_deref() + .unwrap_or("chromedriver"); + // Start chromedriver process - let driver_result = tokio::process::Command::new("chromedriver") + let driver_result = tokio::process::Command::new(chromedriver_cmd) .arg(format!("--port={}", port)) .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) diff --git a/scripts/setup-chrome-for-testing.sh b/scripts/setup-chrome-for-testing.sh index 7925ef6..ddb6e7d 100755 --- a/scripts/setup-chrome-for-testing.sh +++ b/scripts/setup-chrome-for-testing.sh @@ -75,10 +75,13 @@ rm chromedriver.zip CHROMEDRIVER_DIR="chromedriver-$PLATFORM" if [ -f "$CHROMEDRIVER_DIR/chromedriver" ]; then - # Create symlink in bin directory + # Create symlinks in bin directory + # Primary symlink: 'chromedriver' - works with g3 out of the box + ln -sf "$INSTALL_DIR/$CHROMEDRIVER_DIR/chromedriver" "$BIN_DIR/chromedriver" + # Secondary symlink: 'chromedriver-for-testing' - explicit name to avoid confusion ln -sf "$INSTALL_DIR/$CHROMEDRIVER_DIR/chromedriver" "$BIN_DIR/chromedriver-for-testing" chmod +x "$INSTALL_DIR/$CHROMEDRIVER_DIR/chromedriver" - echo "✅ ChromeDriver installed and linked to: $BIN_DIR/chromedriver-for-testing" + echo "✅ ChromeDriver installed and linked to: $BIN_DIR/chromedriver" else echo "❌ ChromeDriver extraction failed" exit 1 @@ -107,7 +110,7 @@ echo " ChromeDriver: $VERSION" echo "" echo "Binaries:" echo " Chrome: $BIN_DIR/chrome-for-testing" -echo " ChromeDriver: $BIN_DIR/chromedriver-for-testing" +echo " ChromeDriver: $BIN_DIR/chromedriver" echo "" echo "To use with g3, make sure $BIN_DIR is in your PATH:" echo " export PATH=\"$BIN_DIR:\$PATH\""