From e1e732150a1a29e7cd1a4850e512dee38cfbf1bf Mon Sep 17 00:00:00 2001 From: Dhanji Prasanna Date: Fri, 24 Oct 2025 10:11:43 +1100 Subject: [PATCH] coach rigor +++ --- crates/g3-cli/src/lib.rs | 14 +- .../examples/list_windows.rs | 8 +- .../tests/integration_test.rs | 45 ------ crates/g3-core/src/lib.rs | 149 +++++++++++++++++- crates/g3-core/tests/test_context_thinning.rs | 121 +++++++++++++- 5 files changed, 278 insertions(+), 59 deletions(-) diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index db3b3f3..081c5da 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -243,6 +243,10 @@ pub struct Cli { /// Enable macOS Accessibility API tools for native app automation #[arg(long)] pub macax: bool, + + /// Enable WebDriver browser automation tools + #[arg(long)] + pub webdriver: bool, } pub async fn run() -> Result<()> { @@ -451,6 +455,11 @@ Output ONLY the markdown content, no explanations or meta-commentary."#, } } + // Apply webdriver flag override + if cli.webdriver { + config.webdriver.enabled = true; + } + // Validate provider if specified if let Some(ref provider) = cli.provider { let valid_providers = ["anthropic", "databricks", "embedded", "openai"]; @@ -1630,6 +1639,7 @@ Review the current state of the project and provide a concise critique focusing 2. Whether the project compiles successfully 3. What requirements are missing or incorrect 4. Specific improvements needed to satisfy requirements +5. Use UI tools such as webdriver to test functionality thoroughly CRITICAL INSTRUCTIONS: 1. You MUST use the final_output tool to provide your feedback @@ -1637,13 +1647,13 @@ CRITICAL INSTRUCTIONS: 3. Focus ONLY on what needs to be fixed or improved 4. Do NOT include your analysis process, file contents, or compilation output in the summary -If the implementation generally meets all requirements and compiles without errors: +If the implementation thoroughly meets all requirements, compiles and is fully tested (especially UI flows) *WITHOUT* gaps or errors: - Call final_output with summary: 'IMPLEMENTATION_APPROVED' If improvements are needed: - Call final_output with a brief summary listing ONLY the specific issues to fix -Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and generally fits the requirements. Don't be picky.", +Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and thoroughly fits the requirements (implementation > 95% complete). Be rigorous, especially by testing that all UI features work.", requirements ); diff --git a/crates/g3-computer-control/examples/list_windows.rs b/crates/g3-computer-control/examples/list_windows.rs index 5b571d9..e638a19 100644 --- a/crates/g3-computer-control/examples/list_windows.rs +++ b/crates/g3-computer-control/examples/list_windows.rs @@ -1,7 +1,7 @@ use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo}; use core_foundation::dictionary::CFDictionary; use core_foundation::string::CFString; -use core_foundation::base::TCFType; +use core_foundation::base::{TCFType, ToVoid}; fn main() { println!("Listing all on-screen windows..."); @@ -22,7 +22,7 @@ fn main() { // Get window ID let window_id_key = CFString::from_static_string("kCGWindowNumber"); - let window_id: i64 = if let Some(value) = dict.find(window_id_key.as_concrete_TypeRef()) { + let window_id: i64 = if let Some(value) = dict.find(window_id_key.to_void()) { let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _); num.to_i64().unwrap_or(0) } else { @@ -31,7 +31,7 @@ fn main() { // Get owner name let owner_key = CFString::from_static_string("kCGWindowOwnerName"); - let owner: String = if let Some(value) = dict.find(owner_key.as_concrete_TypeRef()) { + let owner: String = if let Some(value) = dict.find(owner_key.to_void()) { let s: CFString = TCFType::wrap_under_get_rule(*value as *const _); s.to_string() } else { @@ -40,7 +40,7 @@ fn main() { // Get window name/title let name_key = CFString::from_static_string("kCGWindowName"); - let title: String = if let Some(value) = dict.find(name_key.as_concrete_TypeRef()) { + let title: String = if let Some(value) = dict.find(name_key.to_void()) { let s: CFString = TCFType::wrap_under_get_rule(*value as *const _); s.to_string() } else { diff --git a/crates/g3-computer-control/tests/integration_test.rs b/crates/g3-computer-control/tests/integration_test.rs index 75c884f..87227e5 100644 --- a/crates/g3-computer-control/tests/integration_test.rs +++ b/crates/g3-computer-control/tests/integration_test.rs @@ -1,23 +1,5 @@ use g3_computer_control::*; -#[tokio::test] -async fn test_mouse_movement() { - let controller = create_controller().expect("Failed to create controller"); - - // Move mouse to center of screen (assuming 1920x1080) - let result = controller.move_mouse(960, 540).await; - assert!(result.is_ok(), "Failed to move mouse: {:?}", result.err()); -} - -#[tokio::test] -async fn test_typing() { - let controller = create_controller().expect("Failed to create controller"); - - // Type some text - let result = controller.type_text("Hello, World!").await; - assert!(result.is_ok(), "Failed to type text: {:?}", result.err()); -} - #[tokio::test] async fn test_screenshot() { let controller = create_controller().expect("Failed to create controller"); @@ -33,30 +15,3 @@ async fn test_screenshot() { // Clean up let _ = std::fs::remove_file(path); } - -#[tokio::test] -async fn test_click() { - let controller = create_controller().expect("Failed to create controller"); - - // Click at a safe location - let result = controller.click(types::MouseButton::Left).await; - assert!(result.is_ok(), "Failed to click: {:?}", result.err()); -} - -#[tokio::test] -async fn test_double_click() { - let controller = create_controller().expect("Failed to create controller"); - - // Double click - let result = controller.double_click(types::MouseButton::Left).await; - assert!(result.is_ok(), "Failed to double click: {:?}", result.err()); -} - -#[tokio::test] -async fn test_press_key() { - let controller = create_controller().expect("Failed to create controller"); - - // Press escape key - let result = controller.press_key("escape").await; - assert!(result.is_ok(), "Failed to press key: {:?}", result.err()); -} diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 2786182..61bb974 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -466,6 +466,7 @@ Format this as a detailed but concise summary that can be used to resume the con let first_third_end = (total_messages / 3).max(1); let mut leaned_count = 0; + let mut tool_call_leaned_count = 0; let mut chars_saved = 0; // Create ~/tmp directory if it doesn't exist @@ -478,7 +479,7 @@ Format this as a detailed but concise summary that can be used to resume the con // Scan the first third of messages for i in 0..first_third_end { if let Some(message) = self.conversation_history.get_mut(i) { - // Only process User messages that look like tool results + // Process User messages that look like tool results if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") { let content_len = message.content.len(); @@ -508,6 +509,109 @@ Format this as a detailed but concise summary that can be used to resume the con debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path); } } + + // Process Assistant messages that contain tool calls with large arguments + if matches!(message.role, MessageRole::Assistant) { + // Try to parse the message content as JSON to find tool calls + let content = &message.content; + + // Look for JSON tool call patterns + if let Some(tool_call_start) = content.find(r#"{"tool":"#) + .or_else(|| content.find(r#"{ "tool":"#)) + .or_else(|| content.find(r#"{"tool" :"#)) + .or_else(|| content.find(r#"{ "tool" :"#)) + { + // Try to extract and parse the JSON tool call + let json_portion = &content[tool_call_start..]; + + // Find the end of the JSON object + if let Some(json_end) = Self::find_json_end(json_portion) { + let json_str = &json_portion[..=json_end]; + + // Try to parse as ToolCall + if let Ok(mut tool_call) = serde_json::from_str::(json_str) { + let mut modified = false; + + // Handle write_file tool calls + if tool_call.tool == "write_file" { + if let Some(args_obj) = tool_call.args.as_object_mut() { + // Extract content to avoid borrow issues + let content_info = args_obj.get("content") + .and_then(|v| v.as_str()) + .map(|s| (s.to_string(), s.len())); + + if let Some((content_str, content_len)) = content_info { + // Only thin if content is greater than 1000 chars + if content_len > 1000 { + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let filename = format!("leaned_write_file_content_{}_{}.txt", timestamp, i); + let file_path = format!("{}/{}", tmp_dir, filename); + + if std::fs::write(&file_path, &content_str).is_ok() { + args_obj.insert( + "content".to_string(), + serde_json::Value::String(format!("", file_path)) + ); + modified = true; + chars_saved += content_len; + tool_call_leaned_count += 1; + debug!("Thinned write_file content {} ({} chars) to {}", i, content_len, file_path); + } + } + } + } + } + + // Handle str_replace tool calls + if tool_call.tool == "str_replace" { + if let Some(args_obj) = tool_call.args.as_object_mut() { + // Extract diff to avoid borrow issues + let diff_info = args_obj.get("diff") + .and_then(|v| v.as_str()) + .map(|s| (s.to_string(), s.len())); + + if let Some((diff_str, diff_len)) = diff_info { + // Only thin if diff is greater than 1000 chars + if diff_len > 1000 { + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let filename = format!("leaned_str_replace_diff_{}_{}.txt", timestamp, i); + let file_path = format!("{}/{}", tmp_dir, filename); + + if std::fs::write(&file_path, &diff_str).is_ok() { + args_obj.insert( + "diff".to_string(), + serde_json::Value::String(format!("", file_path)) + ); + modified = true; + chars_saved += diff_len; + tool_call_leaned_count += 1; + debug!("Thinned str_replace diff {} ({} chars) to {}", i, diff_len, file_path); + } + } + } + } + } + + // If we modified the tool call, reconstruct the message + if modified { + let prefix = &content[..tool_call_start]; + let suffix = &content[tool_call_start + json_str.len()..]; + + // Serialize the modified tool call + if let Ok(new_json) = serde_json::to_string(&tool_call) { + message.content = format!("{}{}{}", prefix, new_json, suffix); + } + } + } + } + } + } } } @@ -515,10 +619,18 @@ Format this as a detailed but concise summary that can be used to resume the con self.recalculate_tokens(); if leaned_count > 0 { - (format!("🥒 Context thinned at {}%: {} tool results, ~{} chars saved", - current_threshold, leaned_count, chars_saved), chars_saved) + if tool_call_leaned_count > 0 { + (format!("🥒 Context thinned at {}%: {} tool results + {} tool calls, ~{} chars saved", + current_threshold, leaned_count, tool_call_leaned_count, chars_saved), chars_saved) + } else { + (format!("🥒 Context thinned at {}%: {} tool results, ~{} chars saved", + current_threshold, leaned_count, chars_saved), chars_saved) + } + } else if tool_call_leaned_count > 0 { + (format!("🥒 Context thinned at {}%: {} tool calls, ~{} chars saved", + current_threshold, tool_call_leaned_count, chars_saved), chars_saved) } else { - (format!("ℹ Context thinning triggered at {}% but no large tool results found in first third", + (format!("ℹ Context thinning triggered at {}% but no large tool results or tool calls found in first third", current_threshold), 0) } } @@ -533,6 +645,35 @@ Format this as a detailed but concise summary that can be used to resume the con debug!("Recalculated tokens after thinning: {} tokens", total); } + + /// Helper function to find the end of a JSON object + fn find_json_end(json_str: &str) -> Option { + let mut brace_count = 0; + let mut in_string = false; + let mut escape_next = false; + + for (i, ch) in json_str.char_indices() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => escape_next = true, + '"' if !escape_next => in_string = !in_string, + '{' if !in_string => brace_count += 1, + '}' if !in_string => { + brace_count -= 1; + if brace_count == 0 { + return Some(i); + } + } + _ => {} + } + } + + None + } } pub struct Agent { diff --git a/crates/g3-core/tests/test_context_thinning.rs b/crates/g3-core/tests/test_context_thinning.rs index 760524f..db6761f 100644 --- a/crates/g3-core/tests/test_context_thinning.rs +++ b/crates/g3-core/tests/test_context_thinning.rs @@ -72,7 +72,7 @@ fn test_thin_context_basic() { // Trigger thinning at 50% context.used_tokens = 5000; - let summary = context.thin_context(); + let (summary, _chars_saved) = context.thin_context(); println!("Thinning summary: {}", summary); @@ -93,6 +93,119 @@ fn test_thin_context_basic() { } } +#[test] +fn test_thin_write_file_tool_calls() { + let mut context = ContextWindow::new(10000); + + // Add some messages including a write_file tool call with large content + context.add_message(Message { + role: MessageRole::User, + content: "Please create a large file".to_string(), + }); + + // Add an assistant message with a write_file tool call containing large content + let large_content = "x".repeat(1500); + let tool_call_json = format!( + r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#, + large_content + ); + context.add_message(Message { + role: MessageRole::Assistant, + content: format!("I'll create that file.\n\n{}", tool_call_json), + }); + + context.add_message(Message { + role: MessageRole::User, + content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(), + }); + + // Add more messages to ensure we have enough for "first third" logic + for i in 0..6 { + context.add_message(Message { + role: MessageRole::Assistant, + content: format!("Response {}", i), + }); + } + + // Trigger thinning at 50% + context.used_tokens = 5000; + let (summary, _chars_saved) = context.thin_context(); + + println!("Thinning summary: {}", summary); + + // Should have thinned the write_file tool call + assert!(summary.contains("tool call") || summary.contains("chars saved")); + + // Check that the large content was replaced with a file reference + let first_third_end = context.conversation_history.len() / 3; + for i in 0..first_third_end { + if let Some(msg) = context.conversation_history.get(i) { + if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("write_file") { + // The content should now reference an external file + assert!(msg.content.contains("