coach rigor +++

2025-10-24 10:11:43 +11:00
parent 0be4829ca9
commit e1e732150a
5 changed files with 278 additions and 59 deletions
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -243,6 +243,10 @@ pub struct Cli {
    /// Enable macOS Accessibility API tools for native app automation
    #[arg(long)]
    pub macax: bool,
    /// Enable WebDriver browser automation tools
    #[arg(long)]
    pub webdriver: bool,
 }
 pub async fn run() -> Result<()> {
@@ -451,6 +455,11 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
        }
    }
    // Apply webdriver flag override
    if cli.webdriver {
        config.webdriver.enabled = true;
    }
    // Validate provider if specified
    if let Some(ref provider) = cli.provider {
        let valid_providers = ["anthropic", "databricks", "embedded", "openai"];
@@ -1630,6 +1639,7 @@ Review the current state of the project and provide a concise critique focusing
 2. Whether the project compiles successfully
 3. What requirements are missing or incorrect
 4. Specific improvements needed to satisfy requirements
 5. Use UI tools such as webdriver to test functionality thoroughly
 CRITICAL INSTRUCTIONS:
 1. You MUST use the final_output tool to provide your feedback
@@ -1637,13 +1647,13 @@ CRITICAL INSTRUCTIONS:
 3. Focus ONLY on what needs to be fixed or improved
 4. Do NOT include your analysis process, file contents, or compilation output in the summary
-If the implementation generally meets all requirements and compiles without errors:
+If the implementation thoroughly meets all requirements, compiles and is fully tested (especially UI flows) *WITHOUT* gaps or errors:
 - Call final_output with summary: 'IMPLEMENTATION_APPROVED'
 If improvements are needed:
 - Call final_output with a brief summary listing ONLY the specific issues to fix
-Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and generally fits the requirements. Don't be picky.",
+Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and thoroughly fits the requirements (implementation > 95% complete). Be rigorous, especially by testing that all UI features work.",
            requirements
        );
--- a/crates/g3-computer-control/examples/list_windows.rs
+++ b/crates/g3-computer-control/examples/list_windows.rs
@@ -1,7 +1,7 @@
 use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
 use core_foundation::dictionary::CFDictionary;
 use core_foundation::string::CFString;
-use core_foundation::base::TCFType;
+use core_foundation::base::{TCFType, ToVoid};
 fn main() {
    println!("Listing all on-screen windows...");
@@ -22,7 +22,7 @@ fn main() {
            // Get window ID
            let window_id_key = CFString::from_static_string("kCGWindowNumber");
-            let window_id: i64 = if let Some(value) = dict.find(window_id_key.as_concrete_TypeRef()) {
+            let window_id: i64 = if let Some(value) = dict.find(window_id_key.to_void()) {
                let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
                num.to_i64().unwrap_or(0)
            } else {
@@ -31,7 +31,7 @@ fn main() {
            // Get owner name
            let owner_key = CFString::from_static_string("kCGWindowOwnerName");
-            let owner: String = if let Some(value) = dict.find(owner_key.as_concrete_TypeRef()) {
+            let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
                let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
                s.to_string()
            } else {
@@ -40,7 +40,7 @@ fn main() {
            // Get window name/title
            let name_key = CFString::from_static_string("kCGWindowName");
-            let title: String = if let Some(value) = dict.find(name_key.as_concrete_TypeRef()) {
+            let title: String = if let Some(value) = dict.find(name_key.to_void()) {
                let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
                s.to_string()
            } else {
--- a/crates/g3-computer-control/tests/integration_test.rs
+++ b/crates/g3-computer-control/tests/integration_test.rs
@@ -1,23 +1,5 @@
 use g3_computer_control::*;
 #[tokio::test]
 async fn test_mouse_movement() {
    let controller = create_controller().expect("Failed to create controller");
    // Move mouse to center of screen (assuming 1920x1080)
    let result = controller.move_mouse(960, 540).await;
    assert!(result.is_ok(), "Failed to move mouse: {:?}", result.err());
 }
 #[tokio::test]
 async fn test_typing() {
    let controller = create_controller().expect("Failed to create controller");
    // Type some text
    let result = controller.type_text("Hello, World!").await;
    assert!(result.is_ok(), "Failed to type text: {:?}", result.err());
 }
 #[tokio::test]
 async fn test_screenshot() {
    let controller = create_controller().expect("Failed to create controller");
@@ -33,30 +15,3 @@ async fn test_screenshot() {
    // Clean up
    let _ = std::fs::remove_file(path);
 }
 #[tokio::test]
 async fn test_click() {
    let controller = create_controller().expect("Failed to create controller");
    // Click at a safe location
    let result = controller.click(types::MouseButton::Left).await;
    assert!(result.is_ok(), "Failed to click: {:?}", result.err());
 }
 #[tokio::test]
 async fn test_double_click() {
    let controller = create_controller().expect("Failed to create controller");
    // Double click
    let result = controller.double_click(types::MouseButton::Left).await;
    assert!(result.is_ok(), "Failed to double click: {:?}", result.err());
 }
 #[tokio::test]
 async fn test_press_key() {
    let controller = create_controller().expect("Failed to create controller");
    // Press escape key
    let result = controller.press_key("escape").await;
    assert!(result.is_ok(), "Failed to press key: {:?}", result.err());
 }
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -466,6 +466,7 @@ Format this as a detailed but concise summary that can be used to resume the con
        let first_third_end = (total_messages / 3).max(1);
        let mut leaned_count = 0;
        let mut tool_call_leaned_count = 0;
        let mut chars_saved = 0;
        // Create ~/tmp directory if it doesn't exist
@@ -478,7 +479,7 @@ Format this as a detailed but concise summary that can be used to resume the con
        // Scan the first third of messages
        for i in 0..first_third_end {
            if let Some(message) = self.conversation_history.get_mut(i) {
-                // Only process User messages that look like tool results
+                // Process User messages that look like tool results
                if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") {
                    let content_len = message.content.len();
@@ -508,6 +509,109 @@ Format this as a detailed but concise summary that can be used to resume the con
                        debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path);
                    }
                }
                // Process Assistant messages that contain tool calls with large arguments
                if matches!(message.role, MessageRole::Assistant) {
                    // Try to parse the message content as JSON to find tool calls
                    let content = &message.content;
                    // Look for JSON tool call patterns
                    if let Some(tool_call_start) = content.find(r#"{"tool":"#)
                        .or_else(|| content.find(r#"{ "tool":"#))
                        .or_else(|| content.find(r#"{"tool" :"#))
                        .or_else(|| content.find(r#"{ "tool" :"#))
                    {
                        // Try to extract and parse the JSON tool call
                        let json_portion = &content[tool_call_start..];
                        // Find the end of the JSON object
                        if let Some(json_end) = Self::find_json_end(json_portion) {
                            let json_str = &json_portion[..=json_end];
                            // Try to parse as ToolCall
                            if let Ok(mut tool_call) = serde_json::from_str::<ToolCall>(json_str) {
                                let mut modified = false;
                                // Handle write_file tool calls
                                if tool_call.tool == "write_file" {
                                    if let Some(args_obj) = tool_call.args.as_object_mut() {
                                        // Extract content to avoid borrow issues
                                        let content_info = args_obj.get("content")
                                            .and_then(|v| v.as_str())
                                            .map(|s| (s.to_string(), s.len()));
                                        if let Some((content_str, content_len)) = content_info {
                                            // Only thin if content is greater than 1000 chars
                                            if content_len > 1000 {
                                                let timestamp = std::time::SystemTime::now()
                                                    .duration_since(std::time::UNIX_EPOCH)
                                                    .unwrap_or_default()
                                                    .as_secs();
                                                let filename = format!("leaned_write_file_content_{}_{}.txt", timestamp, i);
                                                let file_path = format!("{}/{}", tmp_dir, filename);
                                                if std::fs::write(&file_path, &content_str).is_ok() {
                                                    args_obj.insert(
                                                        "content".to_string(),
                                                        serde_json::Value::String(format!("<content saved to {}>", file_path))
                                                    );
                                                    modified = true;
                                                    chars_saved += content_len;
                                                    tool_call_leaned_count += 1;
                                                    debug!("Thinned write_file content {} ({} chars) to {}", i, content_len, file_path);
                                                }
                                            }
                                        }
                                    }
                                }
                                // Handle str_replace tool calls
                                if tool_call.tool == "str_replace" {
                                    if let Some(args_obj) = tool_call.args.as_object_mut() {
                                        // Extract diff to avoid borrow issues
                                        let diff_info = args_obj.get("diff")
                                            .and_then(|v| v.as_str())
                                            .map(|s| (s.to_string(), s.len()));
                                        if let Some((diff_str, diff_len)) = diff_info {
                                            // Only thin if diff is greater than 1000 chars
                                            if diff_len > 1000 {
                                                let timestamp = std::time::SystemTime::now()
                                                    .duration_since(std::time::UNIX_EPOCH)
                                                    .unwrap_or_default()
                                                    .as_secs();
                                                let filename = format!("leaned_str_replace_diff_{}_{}.txt", timestamp, i);
                                                let file_path = format!("{}/{}", tmp_dir, filename);
                                                if std::fs::write(&file_path, &diff_str).is_ok() {
                                                    args_obj.insert(
                                                        "diff".to_string(),
                                                        serde_json::Value::String(format!("<diff saved to {}>", file_path))
                                                    );
                                                    modified = true;
                                                    chars_saved += diff_len;
                                                    tool_call_leaned_count += 1;
                                                    debug!("Thinned str_replace diff {} ({} chars) to {}", i, diff_len, file_path);
                                                }
                                            }
                                        }
                                    }
                                }
                                // If we modified the tool call, reconstruct the message
                                if modified {
                                    let prefix = &content[..tool_call_start];
                                    let suffix = &content[tool_call_start + json_str.len()..];
                                    // Serialize the modified tool call
                                    if let Ok(new_json) = serde_json::to_string(&tool_call) {
                                        message.content = format!("{}{}{}", prefix, new_json, suffix);
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
@@ -515,10 +619,18 @@ Format this as a detailed but concise summary that can be used to resume the con
        self.recalculate_tokens();
        if leaned_count > 0 {
            if tool_call_leaned_count > 0 {
                (format!("🥒 Context thinned at {}%: {} tool results + {} tool calls, ~{} chars saved", 
                        current_threshold, leaned_count, tool_call_leaned_count, chars_saved), chars_saved)
            } else {
                (format!("🥒 Context thinned at {}%: {} tool results, ~{} chars saved", 
                        current_threshold, leaned_count, chars_saved), chars_saved)
            }
        } else if tool_call_leaned_count > 0 {
            (format!("🥒 Context thinned at {}%: {} tool calls, ~{} chars saved", 
                    current_threshold, tool_call_leaned_count, chars_saved), chars_saved)
        } else {
-            (format!("ℹ Context thinning triggered at {}% but no large tool results found in first third", 
+            (format!("ℹ Context thinning triggered at {}% but no large tool results or tool calls found in first third", 
                    current_threshold), 0)
        }
    }
@@ -533,6 +645,35 @@ Format this as a detailed but concise summary that can be used to resume the con
        debug!("Recalculated tokens after thinning: {} tokens", total);
    }
    /// Helper function to find the end of a JSON object
    fn find_json_end(json_str: &str) -> Option<usize> {
        let mut brace_count = 0;
        let mut in_string = false;
        let mut escape_next = false;
        for (i, ch) in json_str.char_indices() {
            if escape_next {
                escape_next = false;
                continue;
            }
            match ch {
                '\\' => escape_next = true,
                '"' if !escape_next => in_string = !in_string,
                '{' if !in_string => brace_count += 1,
                '}' if !in_string => {
                    brace_count -= 1;
                    if brace_count == 0 {
                        return Some(i);
                    }
                }
                _ => {}
            }
        }
        None
    }
 }
 pub struct Agent<W: UiWriter> {
--- a/crates/g3-core/tests/test_context_thinning.rs
+++ b/crates/g3-core/tests/test_context_thinning.rs
@@ -72,7 +72,7 @@ fn test_thin_context_basic() {
    // Trigger thinning at 50%
    context.used_tokens = 5000;
-    let summary = context.thin_context();
+    let (summary, _chars_saved) = context.thin_context();
    println!("Thinning summary: {}", summary);
@@ -93,6 +93,119 @@ fn test_thin_context_basic() {
    }
 }
 #[test]
 fn test_thin_write_file_tool_calls() {
    let mut context = ContextWindow::new(10000);
    // Add some messages including a write_file tool call with large content
    context.add_message(Message {
        role: MessageRole::User,
        content: "Please create a large file".to_string(),
    });
    // Add an assistant message with a write_file tool call containing large content
    let large_content = "x".repeat(1500);
    let tool_call_json = format!(
        r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
        large_content
    );
    context.add_message(Message {
        role: MessageRole::Assistant,
        content: format!("I'll create that file.\n\n{}", tool_call_json),
    });
    context.add_message(Message {
        role: MessageRole::User,
        content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
    });
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
        context.add_message(Message {
            role: MessageRole::Assistant,
            content: format!("Response {}", i),
        });
    }
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
    println!("Thinning summary: {}", summary);
    // Should have thinned the write_file tool call
    assert!(summary.contains("tool call") || summary.contains("chars saved"));
    // Check that the large content was replaced with a file reference
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
        if let Some(msg) = context.conversation_history.get(i) {
            if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("write_file") {
                // The content should now reference an external file
                assert!(msg.content.contains("<content saved to"));
                assert!(!msg.content.contains(&large_content));
            }
        }
    }
 }
 #[test]
 fn test_thin_str_replace_tool_calls() {
    let mut context = ContextWindow::new(10000);
    // Add some messages including a str_replace tool call with large diff
    context.add_message(Message {
        role: MessageRole::User,
        content: "Please update the file".to_string(),
    });
    // Add an assistant message with a str_replace tool call containing large diff
    let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
    let tool_call_json = format!(
        r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
        large_diff.replace('\n', "\\n")
    );
    context.add_message(Message {
        role: MessageRole::Assistant,
        content: format!("I'll update that file.\n\n{}", tool_call_json),
    });
    context.add_message(Message {
        role: MessageRole::User,
        content: "Tool result: ✅ applied unified diff".to_string(),
    });
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
        context.add_message(Message {
            role: MessageRole::Assistant,
            content: format!("Response {}", i),
        });
    }
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
    println!("Thinning summary: {}", summary);
    // Should have thinned the str_replace tool call
    assert!(summary.contains("tool call") || summary.contains("chars saved"));
    // Check that the large diff was replaced with a file reference
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
        if let Some(msg) = context.conversation_history.get(i) {
            if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("str_replace") {
                // The diff should now reference an external file
                assert!(msg.content.contains("<diff saved to"));
                // Should not contain the large diff content
                assert!(!msg.content.contains("old line"));
            }
        }
    }
 }
 #[test]
 fn test_thin_context_no_large_results() {
    let mut context = ContextWindow::new(10000);
@@ -106,10 +219,10 @@ fn test_thin_context_no_large_results() {
    }
    context.used_tokens = 5000;
-    let summary = context.thin_context();
+    let (summary, _chars_saved) = context.thin_context();
    // Should report no large results found
-    assert!(summary.contains("no large tool results found"));
+    assert!(summary.contains("no large tool results or tool calls found"));
 }
 #[test]
@@ -135,7 +248,7 @@ fn test_thin_context_only_affects_first_third() {
    }
    context.used_tokens = 5000;
-    let summary = context.thin_context();
+    let (summary, _chars_saved) = context.thin_context();
    // First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
    // That's 2 tool results