coach rigor +++

2025-10-24 10:11:43 +11:00
parent 0be4829ca9
commit e1e732150a
5 changed files with 278 additions and 59 deletions
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -243,6 +243,10 @@ pub struct Cli {
    /// Enable macOS Accessibility API tools for native app automation
    #[arg(long)]
    pub macax: bool,
+
+    /// Enable WebDriver browser automation tools
+    #[arg(long)]
+    pub webdriver: bool,
 }

 pub async fn run() -> Result<()> {
@@ -451,6 +455,11 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
        }
    }

+    // Apply webdriver flag override
+    if cli.webdriver {
+        config.webdriver.enabled = true;
+    }
+
    // Validate provider if specified
    if let Some(ref provider) = cli.provider {
        let valid_providers = ["anthropic", "databricks", "embedded", "openai"];
@@ -1630,6 +1639,7 @@ Review the current state of the project and provide a concise critique focusing
 2. Whether the project compiles successfully
 3. What requirements are missing or incorrect
 4. Specific improvements needed to satisfy requirements
+5. Use UI tools such as webdriver to test functionality thoroughly

 CRITICAL INSTRUCTIONS:
 1. You MUST use the final_output tool to provide your feedback
@@ -1637,13 +1647,13 @@ CRITICAL INSTRUCTIONS:
 3. Focus ONLY on what needs to be fixed or improved
 4. Do NOT include your analysis process, file contents, or compilation output in the summary

-If the implementation generally meets all requirements and compiles without errors:
+If the implementation thoroughly meets all requirements, compiles and is fully tested (especially UI flows) *WITHOUT* gaps or errors:
 - Call final_output with summary: 'IMPLEMENTATION_APPROVED'

 If improvements are needed:
 - Call final_output with a brief summary listing ONLY the specific issues to fix

-Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and generally fits the requirements. Don't be picky.",
+Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and thoroughly fits the requirements (implementation > 95% complete). Be rigorous, especially by testing that all UI features work.",
            requirements
        );

--- a/crates/g3-computer-control/examples/list_windows.rs
+++ b/crates/g3-computer-control/examples/list_windows.rs
@@ -1,7 +1,7 @@
 use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
 use core_foundation::dictionary::CFDictionary;
 use core_foundation::string::CFString;
-use core_foundation::base::TCFType;
+use core_foundation::base::{TCFType, ToVoid};

 fn main() {
    println!("Listing all on-screen windows...");
@@ -22,7 +22,7 @@ fn main() {
            
            // Get window ID
            let window_id_key = CFString::from_static_string("kCGWindowNumber");
-            let window_id: i64 = if let Some(value) = dict.find(window_id_key.as_concrete_TypeRef()) {
+            let window_id: i64 = if let Some(value) = dict.find(window_id_key.to_void()) {
                let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
                num.to_i64().unwrap_or(0)
            } else {
@@ -31,7 +31,7 @@ fn main() {
            
            // Get owner name
            let owner_key = CFString::from_static_string("kCGWindowOwnerName");
-            let owner: String = if let Some(value) = dict.find(owner_key.as_concrete_TypeRef()) {
+            let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
                let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
                s.to_string()
            } else {
@@ -40,7 +40,7 @@ fn main() {
            
            // Get window name/title
            let name_key = CFString::from_static_string("kCGWindowName");
-            let title: String = if let Some(value) = dict.find(name_key.as_concrete_TypeRef()) {
+            let title: String = if let Some(value) = dict.find(name_key.to_void()) {
                let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
                s.to_string()
            } else {
--- a/crates/g3-computer-control/tests/integration_test.rs
+++ b/crates/g3-computer-control/tests/integration_test.rs
@@ -1,23 +1,5 @@
 use g3_computer_control::*;

-#[tokio::test]
-async fn test_mouse_movement() {
-    let controller = create_controller().expect("Failed to create controller");
-    
-    // Move mouse to center of screen (assuming 1920x1080)
-    let result = controller.move_mouse(960, 540).await;
-    assert!(result.is_ok(), "Failed to move mouse: {:?}", result.err());
-}
-
-#[tokio::test]
-async fn test_typing() {
-    let controller = create_controller().expect("Failed to create controller");
-    
-    // Type some text
-    let result = controller.type_text("Hello, World!").await;
-    assert!(result.is_ok(), "Failed to type text: {:?}", result.err());
-}
-
 #[tokio::test]
 async fn test_screenshot() {
    let controller = create_controller().expect("Failed to create controller");
@@ -33,30 +15,3 @@ async fn test_screenshot() {
    // Clean up
    let _ = std::fs::remove_file(path);
 }
-
-#[tokio::test]
-async fn test_click() {
-    let controller = create_controller().expect("Failed to create controller");
-    
-    // Click at a safe location
-    let result = controller.click(types::MouseButton::Left).await;
-    assert!(result.is_ok(), "Failed to click: {:?}", result.err());
-}
-
-#[tokio::test]
-async fn test_double_click() {
-    let controller = create_controller().expect("Failed to create controller");
-    
-    // Double click
-    let result = controller.double_click(types::MouseButton::Left).await;
-    assert!(result.is_ok(), "Failed to double click: {:?}", result.err());
-}
-
-#[tokio::test]
-async fn test_press_key() {
-    let controller = create_controller().expect("Failed to create controller");
-    
-    // Press escape key
-    let result = controller.press_key("escape").await;
-    assert!(result.is_ok(), "Failed to press key: {:?}", result.err());
-}
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -466,6 +466,7 @@ Format this as a detailed but concise summary that can be used to resume the con
        let first_third_end = (total_messages / 3).max(1);
        
        let mut leaned_count = 0;
+        let mut tool_call_leaned_count = 0;
        let mut chars_saved = 0;
        
        // Create ~/tmp directory if it doesn't exist
@@ -478,7 +479,7 @@ Format this as a detailed but concise summary that can be used to resume the con
        // Scan the first third of messages
        for i in 0..first_third_end {
            if let Some(message) = self.conversation_history.get_mut(i) {
-                // Only process User messages that look like tool results
+                // Process User messages that look like tool results
                if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") {
                    let content_len = message.content.len();
                    
@@ -508,6 +509,109 @@ Format this as a detailed but concise summary that can be used to resume the con
                        debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path);
                    }
                }
+                
+                // Process Assistant messages that contain tool calls with large arguments
+                if matches!(message.role, MessageRole::Assistant) {
+                    // Try to parse the message content as JSON to find tool calls
+                    let content = &message.content;
+                    
+                    // Look for JSON tool call patterns
+                    if let Some(tool_call_start) = content.find(r#"{"tool":"#)
+                        .or_else(|| content.find(r#"{ "tool":"#))
+                        .or_else(|| content.find(r#"{"tool" :"#))
+                        .or_else(|| content.find(r#"{ "tool" :"#))
+                    {
+                        // Try to extract and parse the JSON tool call
+                        let json_portion = &content[tool_call_start..];
+                        
+                        // Find the end of the JSON object
+                        if let Some(json_end) = Self::find_json_end(json_portion) {
+                            let json_str = &json_portion[..=json_end];
+                            
+                            // Try to parse as ToolCall
+                            if let Ok(mut tool_call) = serde_json::from_str::<ToolCall>(json_str) {
+                                let mut modified = false;
+                                
+                                // Handle write_file tool calls
+                                if tool_call.tool == "write_file" {
+                                    if let Some(args_obj) = tool_call.args.as_object_mut() {
+                                        // Extract content to avoid borrow issues
+                                        let content_info = args_obj.get("content")
+                                            .and_then(|v| v.as_str())
+                                            .map(|s| (s.to_string(), s.len()));
+                                        
+                                        if let Some((content_str, content_len)) = content_info {
+                                            // Only thin if content is greater than 1000 chars
+                                            if content_len > 1000 {
+                                                let timestamp = std::time::SystemTime::now()
+                                                    .duration_since(std::time::UNIX_EPOCH)
+                                                    .unwrap_or_default()
+                                                    .as_secs();
+                                                let filename = format!("leaned_write_file_content_{}_{}.txt", timestamp, i);
+                                                let file_path = format!("{}/{}", tmp_dir, filename);
+                                                
+                                                if std::fs::write(&file_path, &content_str).is_ok() {
+                                                    args_obj.insert(
+                                                        "content".to_string(),
+                                                        serde_json::Value::String(format!("<content saved to {}>", file_path))
+                                                    );
+                                                    modified = true;
+                                                    chars_saved += content_len;
+                                                    tool_call_leaned_count += 1;
+                                                    debug!("Thinned write_file content {} ({} chars) to {}", i, content_len, file_path);
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                
+                                // Handle str_replace tool calls
+                                if tool_call.tool == "str_replace" {
+                                    if let Some(args_obj) = tool_call.args.as_object_mut() {
+                                        // Extract diff to avoid borrow issues
+                                        let diff_info = args_obj.get("diff")
+                                            .and_then(|v| v.as_str())
+                                            .map(|s| (s.to_string(), s.len()));
+                                        
+                                        if let Some((diff_str, diff_len)) = diff_info {
+                                            // Only thin if diff is greater than 1000 chars
+                                            if diff_len > 1000 {
+                                                let timestamp = std::time::SystemTime::now()
+                                                    .duration_since(std::time::UNIX_EPOCH)
+                                                    .unwrap_or_default()
+                                                    .as_secs();
+                                                let filename = format!("leaned_str_replace_diff_{}_{}.txt", timestamp, i);
+                                                let file_path = format!("{}/{}", tmp_dir, filename);
+                                                
+                                                if std::fs::write(&file_path, &diff_str).is_ok() {
+                                                    args_obj.insert(
+                                                        "diff".to_string(),
+                                                        serde_json::Value::String(format!("<diff saved to {}>", file_path))
+                                                    );
+                                                    modified = true;
+                                                    chars_saved += diff_len;
+                                                    tool_call_leaned_count += 1;
+                                                    debug!("Thinned str_replace diff {} ({} chars) to {}", i, diff_len, file_path);
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                                
+                                // If we modified the tool call, reconstruct the message
+                                if modified {
+                                    let prefix = &content[..tool_call_start];
+                                    let suffix = &content[tool_call_start + json_str.len()..];
+                                    
+                                    // Serialize the modified tool call
+                                    if let Ok(new_json) = serde_json::to_string(&tool_call) {
+                                        message.content = format!("{}{}{}", prefix, new_json, suffix);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
            }
        }
        
@@ -515,10 +619,18 @@ Format this as a detailed but concise summary that can be used to resume the con
        self.recalculate_tokens();
        
        if leaned_count > 0 {
+            if tool_call_leaned_count > 0 {
+                (format!("🥒 Context thinned at {}%: {} tool results + {} tool calls, ~{} chars saved", 
+                        current_threshold, leaned_count, tool_call_leaned_count, chars_saved), chars_saved)
+            } else {
                (format!("🥒 Context thinned at {}%: {} tool results, ~{} chars saved", 
                        current_threshold, leaned_count, chars_saved), chars_saved)
+            }
+        } else if tool_call_leaned_count > 0 {
+            (format!("🥒 Context thinned at {}%: {} tool calls, ~{} chars saved", 
+                    current_threshold, tool_call_leaned_count, chars_saved), chars_saved)
        } else {
-            (format!("ℹ Context thinning triggered at {}% but no large tool results found in first third", 
+            (format!("ℹ Context thinning triggered at {}% but no large tool results or tool calls found in first third", 
                    current_threshold), 0)
        }
    }
@@ -533,6 +645,35 @@ Format this as a detailed but concise summary that can be used to resume the con
        
        debug!("Recalculated tokens after thinning: {} tokens", total);
    }
+    
+    /// Helper function to find the end of a JSON object
+    fn find_json_end(json_str: &str) -> Option<usize> {
+        let mut brace_count = 0;
+        let mut in_string = false;
+        let mut escape_next = false;
+        
+        for (i, ch) in json_str.char_indices() {
+            if escape_next {
+                escape_next = false;
+                continue;
+            }
+            
+            match ch {
+                '\\' => escape_next = true,
+                '"' if !escape_next => in_string = !in_string,
+                '{' if !in_string => brace_count += 1,
+                '}' if !in_string => {
+                    brace_count -= 1;
+                    if brace_count == 0 {
+                        return Some(i);
+                    }
+                }
+                _ => {}
+            }
+        }
+        
+        None
+    }
 }

 pub struct Agent<W: UiWriter> {
--- a/crates/g3-core/tests/test_context_thinning.rs
+++ b/crates/g3-core/tests/test_context_thinning.rs
@@ -72,7 +72,7 @@ fn test_thin_context_basic() {
    
    // Trigger thinning at 50%
    context.used_tokens = 5000;
-    let summary = context.thin_context();
+    let (summary, _chars_saved) = context.thin_context();
    
    println!("Thinning summary: {}", summary);
    
@@ -93,6 +93,119 @@ fn test_thin_context_basic() {
    }
 }

+#[test]
+fn test_thin_write_file_tool_calls() {
+    let mut context = ContextWindow::new(10000);
+    
+    // Add some messages including a write_file tool call with large content
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: "Please create a large file".to_string(),
+    });
+    
+    // Add an assistant message with a write_file tool call containing large content
+    let large_content = "x".repeat(1500);
+    let tool_call_json = format!(
+        r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
+        large_content
+    );
+    context.add_message(Message {
+        role: MessageRole::Assistant,
+        content: format!("I'll create that file.\n\n{}", tool_call_json),
+    });
+    
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
+    });
+    
+    // Add more messages to ensure we have enough for "first third" logic
+    for i in 0..6 {
+        context.add_message(Message {
+            role: MessageRole::Assistant,
+            content: format!("Response {}", i),
+        });
+    }
+    
+    // Trigger thinning at 50%
+    context.used_tokens = 5000;
+    let (summary, _chars_saved) = context.thin_context();
+    
+    println!("Thinning summary: {}", summary);
+    
+    // Should have thinned the write_file tool call
+    assert!(summary.contains("tool call") || summary.contains("chars saved"));
+    
+    // Check that the large content was replaced with a file reference
+    let first_third_end = context.conversation_history.len() / 3;
+    for i in 0..first_third_end {
+        if let Some(msg) = context.conversation_history.get(i) {
+            if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("write_file") {
+                // The content should now reference an external file
+                assert!(msg.content.contains("<content saved to"));
+                assert!(!msg.content.contains(&large_content));
+            }
+        }
+    }
+}
+
+#[test]
+fn test_thin_str_replace_tool_calls() {
+    let mut context = ContextWindow::new(10000);
+    
+    // Add some messages including a str_replace tool call with large diff
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: "Please update the file".to_string(),
+    });
+    
+    // Add an assistant message with a str_replace tool call containing large diff
+    let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
+    let tool_call_json = format!(
+        r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
+        large_diff.replace('\n', "\\n")
+    );
+    context.add_message(Message {
+        role: MessageRole::Assistant,
+        content: format!("I'll update that file.\n\n{}", tool_call_json),
+    });
+    
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: "Tool result: ✅ applied unified diff".to_string(),
+    });
+    
+    // Add more messages to ensure we have enough for "first third" logic
+    for i in 0..6 {
+        context.add_message(Message {
+            role: MessageRole::Assistant,
+            content: format!("Response {}", i),
+        });
+    }
+    
+    // Trigger thinning at 50%
+    context.used_tokens = 5000;
+    let (summary, _chars_saved) = context.thin_context();
+    
+    println!("Thinning summary: {}", summary);
+    
+    // Should have thinned the str_replace tool call
+    assert!(summary.contains("tool call") || summary.contains("chars saved"));
+    
+    // Check that the large diff was replaced with a file reference
+    let first_third_end = context.conversation_history.len() / 3;
+    for i in 0..first_third_end {
+        if let Some(msg) = context.conversation_history.get(i) {
+            if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("str_replace") {
+                // The diff should now reference an external file
+                assert!(msg.content.contains("<diff saved to"));
+                // Should not contain the large diff content
+                assert!(!msg.content.contains("old line"));
+            }
+        }
+    }
+}
+
 #[test]
 fn test_thin_context_no_large_results() {
    let mut context = ContextWindow::new(10000);
@@ -106,10 +219,10 @@ fn test_thin_context_no_large_results() {
    }
    
    context.used_tokens = 5000;
-    let summary = context.thin_context();
+    let (summary, _chars_saved) = context.thin_context();
    
    // Should report no large results found
-    assert!(summary.contains("no large tool results found"));
+    assert!(summary.contains("no large tool results or tool calls found"));
 }

 #[test]
@@ -135,7 +248,7 @@ fn test_thin_context_only_affects_first_third() {
    }
    
    context.used_tokens = 5000;
-    let summary = context.thin_context();
+    let (summary, _chars_saved) = context.thin_context();
    
    // First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
    // That's 2 tool results