disable thinking if there is no token budget

small fix to provider name check
fix for thinking budget and hardcoded max token on summary
2025-12-09 16:45:28 +11:00 · 2025-12-09 14:43:35 +11:00 · 2025-12-09 12:41:52 +11:00 · 2025-12-09 10:15:32 +11:00 · 2025-12-08 11:05:01 +11:00 · 2025-12-05 15:32:13 +11:00
111 changed files with 12061 additions and 3160 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -576,6 +576,26 @@ dependencies = [
 "tiny-keccak",
 ]

+[[package]]
+name = "const_format"
+version = "0.2.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7faa7469a93a566e9ccc1c73fe783b4a65c274c5ace346038dca9c39fe0030ad"
+dependencies = [
+ "const_format_proc_macros",
+]
+
+[[package]]
+name = "const_format_proc_macros"
+version = "0.2.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
+]
+
 [[package]]
 name = "convert_case"
 version = "0.4.0"
@@ -1331,6 +1351,8 @@ version = "0.1.0"
 dependencies = [
 "anyhow",
 "g3-cli",
+ "g3-providers",
+ "serde_json",
 "tokio",
 ]

@@ -1345,11 +1367,17 @@ dependencies = [
 "dirs 5.0.1",
 "g3-config",
 "g3-core",
+ "g3-ensembles",
+ "g3-planner",
+ "g3-providers",
+ "hex",
 "indicatif",
 "ratatui",
 "rustyline",
 "serde",
 "serde_json",
+ "sha2",
+ "tempfile",
 "termimad",
 "tokio",
 "tokio-util",
@@ -1389,6 +1417,7 @@ dependencies = [
 "config",
 "dirs 5.0.1",
 "serde",
+ "serde_json",
 "shellexpand",
 "tempfile",
 "thiserror 1.0.69",
@@ -1427,6 +1456,7 @@ dependencies = [
 "anyhow",
 "async-trait",
 "chrono",
+ "const_format",
 "futures-util",
 "g3-computer-control",
 "g3-config",
@@ -1462,6 +1492,23 @@ dependencies = [
 "walkdir",
 ]

+[[package]]
+name = "g3-ensembles"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "clap",
+ "g3-config",
+ "g3-core",
+ "serde",
+ "serde_json",
+ "tempfile",
+ "tokio",
+ "tracing",
+ "uuid",
+]
+
 [[package]]
 name = "g3-execution"
 version = "0.1.0"
@@ -1475,6 +1522,19 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "g3-planner"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "const_format",
+ "g3-providers",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
 [[package]]
 name = "g3-providers"
 version = "0.1.0"
@@ -1489,6 +1549,7 @@ dependencies = [
 "futures-util",
 "llama_cpp",
 "nanoid",
+ "rand",
 "reqwest",
 "serde",
 "serde_json",
@@ -1631,6 +1692,12 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"

+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
 [[package]]
 name = "home"
 version = "0.5.9"
@@ -4090,6 +4157,12 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"

+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
 [[package]]
 name = "unsafe-libyaml"
 version = "0.2.11"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,11 +2,13 @@
 members = [
    "crates/g3-cli",
    "crates/g3-core", 
+    "crates/g3-planner",
    "crates/g3-providers",
    "crates/g3-config",
    "crates/g3-execution",
    "crates/g3-computer-control",
-    "crates/g3-console"
+    "crates/g3-console",
+    "crates/g3-ensembles"
 ]
 resolver = "2"

@@ -43,3 +45,9 @@ license = "MIT"
 g3-cli = { path = "crates/g3-cli" }
 tokio = { workspace = true }
 anyhow = { workspace = true }
+g3-providers = { path = "crates/g3-providers" }
+serde_json = { workspace = true }
+
+[[example]]
+name = "verify_message_id"
+path = "examples/verify_message_id.rs"
--- a/README.md
+++ b/README.md
@@ -76,6 +76,7 @@ G3 includes robust error handling with automatic retry logic:
 G3's interactive CLI includes control commands for manual context management:
 - **`/compact`**: Manually trigger summarization to compact conversation history
 - **`/thinnify`**: Manually trigger context thinning to replace large tool results with file references
+- **`/skinnify`**: Manually trigger full context thinning (like `/thinnify` but processes the entire context window, not just the first third)
 - **`/readme`**: Reload README.md and AGENTS.md from disk without restarting
 - **`/stats`**: Show detailed context and performance statistics
 - **`/help`**: Display all available control commands
@@ -96,6 +97,7 @@ These commands give you fine-grained control over context management, allowing y
  - Window listing and identification
 - **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
 - **Final Output**: Formatted result presentation
+- **Flock Mode**: Parallel multi-agent development for large projects - see [Flock Mode Guide](docs/FLOCK_MODE.md)

 ### Provider Flexibility
 - Support for multiple LLM providers through a unified interface
@@ -129,6 +131,7 @@ G3 is designed for:
 - API integration and testing
 - Documentation generation
 - Complex multi-step workflows
+- Parallel development of modular architectures
 - Desktop application automation and testing

 ## Getting Started
--- a/config.coach-player.example.toml
+++ b/config.coach-player.example.toml
@@ -11,14 +11,27 @@ model = "databricks-claude-sonnet-4"
 max_tokens = 4096
 temperature = 0.1
 use_oauth = true
+# cache_config = "ephemeral"  # Optional: Enable prompt caching for Claude models
+                              # Options: "ephemeral", "5minute", "1hour"
+                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+                                # The cache control will be automatically applied to:
+                                # - The system prompt at the start of each session
+                                # - Assistant responses after every 10 tool calls
+                                # - 5minute costs $3/mtok, more details below
+                                # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing

 [providers.anthropic]
 api_key = "your-anthropic-api-key"
-model = "claude-3-haiku-20240307"  # Using a faster model for player
+model = "claude-sonnet-4-5"
 max_tokens = 4096
 temperature = 0.3  # Slightly higher temperature for more creative implementations
+# cache_config = "ephemeral"  # Optional: Enable prompt caching
+                              # Options: "ephemeral", "5minute", "1hour"
+                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+# enable_1m_context = true    # optional, more expensive

 [agent]
 fallback_default_max_tokens = 8192
 enable_streaming = true
-timeout_seconds = 60
+timeout_seconds = 60
+allow_multiple_tool_calls = true  # Enable multiple tool calls, will usually only work with Anthropic
--- a/config.example.toml
+++ b/config.example.toml
@@ -15,6 +15,19 @@ max_tokens = 4096  # Per-request output limit (how many tokens the model can gen
 temperature = 0.1
 use_oauth = true

+[providers.anthropic]
+api_key = "your-anthropic-api-key"
+model = "claude-sonnet-4-5"
+max_tokens = 4096
+temperature = 0.3  # Slightly higher temperature for more creative implementations
+# cache_config = "ephemeral"  # Optional: Enable prompt caching
+# Options: "ephemeral", "5minute", "1hour"
+# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+# enable_1m_context = true    # optional, more expensive
+# thinking_budget_tokens = 10000  # Optional: Enable extended thinking mode with token budget
+# Allows the model to "think" before responding. Useful for complex reasoning tasks.
+
+
 # Multiple OpenAI-compatible providers can be configured with custom names
 # Each provider gets its own section under [providers.openai_compatible.<name>]
 # [providers.openai_compatible.openrouter]
@@ -46,6 +59,7 @@ timeout_seconds = 60
 # Retry configuration for recoverable errors (timeouts, rate limits, etc.)
 max_retry_attempts = 3              # Default mode retry attempts
 autonomous_max_retry_attempts = 6   # Autonomous mode retry attempts (higher for long-running tasks)
+allow_multiple_tool_calls = true  # Enable multiple tool calls

 [computer_control]
 enabled = false  # Set to true to enable computer control (requires OS permissions)
--- a/crates/g3-cli/Cargo.toml
+++ b/crates/g3-cli/Cargo.toml
@@ -7,7 +7,10 @@ description = "CLI interface for G3 AI coding agent"
 [dependencies]
 g3-core = { path = "../g3-core" }
 g3-config = { path = "../g3-config" }
+g3-planner = { path = "../g3-planner" }
+g3-providers = { path = "../g3-providers" }
 clap = { workspace = true }
+g3-ensembles = { path = "../g3-ensembles" }
 tokio = { workspace = true }
 anyhow = { workspace = true }
 tracing = { workspace = true }
@@ -17,8 +20,13 @@ serde_json = { workspace = true }
 rustyline = "17.0.1"
 dirs = "5.0"
 tokio-util = "0.7"
+sha2 = "0.10"
+hex = "0.4"
 indicatif = "0.17"
 chrono = { version = "0.4", features = ["serde"] }
 crossterm = "0.29.0"
 ratatui = "0.29"
 termimad = "0.34.0"
+
+[dev-dependencies]
+tempfile = "3.8"
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
--- a/crates/g3-cli/src/machine_ui_writer.rs
+++ b/crates/g3-cli/src/machine_ui_writer.rs
@@ -87,8 +87,27 @@ impl UiWriter for MachineUiWriter {
    fn flush(&self) {
        let _ = io::stdout().flush();
    }
-    
+
    fn wants_full_output(&self) -> bool {
-        true  // Machine mode wants complete, untruncated output
+        true // Machine mode wants complete, untruncated output
+    }
+
+    fn prompt_user_yes_no(&self, message: &str) -> bool {
+        // In machine mode, we can't interactively prompt, so we log the request and return true
+        // to allow automation to proceed.
+        println!("PROMPT_USER_YES_NO: {}", message);
+        true
+    }
+
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
+        println!("PROMPT_USER_CHOICE: {}", message);
+        println!("OPTIONS: {:?}", options);
+        // Default to first option (index 0) for automation
+        0
+    }
+
+    fn print_final_output(&self, summary: &str) {
+        println!("FINAL_OUTPUT:");
+        println!("{}", summary);
    }
 }
--- a/crates/g3-cli/src/simple_output.rs
+++ b/crates/g3-cli/src/simple_output.rs
@@ -1,11 +1,14 @@
 /// Simple output helper for printing messages
+#[derive(Clone)]
 pub struct SimpleOutput {
    machine_mode: bool,
 }

 impl SimpleOutput {
    pub fn new() -> Self {
-        SimpleOutput { machine_mode: false }
+        SimpleOutput {
+            machine_mode: false,
+        }
    }

    pub fn new_with_mode(machine_mode: bool) -> Self {
--- a/crates/g3-cli/src/ui_writer_impl.rs
+++ b/crates/g3-cli/src/ui_writer_impl.rs
@@ -1,77 +1,22 @@
 use g3_core::ui_writer::UiWriter;
 use std::io::{self, Write};
-use std::sync::Mutex;
+use termimad::MadSkin;

 /// Console implementation of UiWriter that prints to stdout
 pub struct ConsoleUiWriter {
-    current_tool_name: Mutex<Option<String>>,
-    current_tool_args: Mutex<Vec<(String, String)>>,
-    current_output_line: Mutex<Option<String>>,
-    output_line_printed: Mutex<bool>,
-    in_todo_tool: Mutex<bool>,
+    current_tool_name: std::sync::Mutex<Option<String>>,
+    current_tool_args: std::sync::Mutex<Vec<(String, String)>>,
+    current_output_line: std::sync::Mutex<Option<String>>,
+    output_line_printed: std::sync::Mutex<bool>,
 }

 impl ConsoleUiWriter {
    pub fn new() -> Self {
        Self {
-            current_tool_name: Mutex::new(None),
-            current_tool_args: Mutex::new(Vec::new()),
-            current_output_line: Mutex::new(None),
-            output_line_printed: Mutex::new(false),
-            in_todo_tool: Mutex::new(false),
-        }
-    }
-
-    fn print_todo_line(&self, line: &str) {
-        // Transform and print todo list lines elegantly
-        let trimmed = line.trim();
-        
-        // Skip the "📝 TODO list:" prefix line
-        if trimmed.starts_with("📝 TODO list:") || trimmed == "📝 TODO list is empty" {
-            return;
-        }
-        
-        // Handle empty lines
-        if trimmed.is_empty() {
-            println!();
-            return;
-        }
-        
-        // Detect indentation level
-        let indent_count = line.chars().take_while(|c| c.is_whitespace()).count();
-        let indent = "  ".repeat(indent_count / 2); // Convert spaces to visual indent
-        
-        // Format based on line type
-        if trimmed.starts_with("- [ ]") {
-            // Incomplete task
-            let task = trimmed.strip_prefix("- [ ]").unwrap_or(trimmed).trim();
-            println!("{}☐ {}", indent, task);
-        } else if trimmed.starts_with("- [x]") || trimmed.starts_with("- [X]") {
-            // Completed task
-            let task = trimmed.strip_prefix("- [x]")
-                .or_else(|| trimmed.strip_prefix("- [X]"))
-                .unwrap_or(trimmed)
-                .trim();
-            println!("{}\x1b[2m☑ {}\x1b[0m", indent, task);
-        } else if trimmed.starts_with("- ") {
-            // Regular bullet point
-            let item = trimmed.strip_prefix("- ").unwrap_or(trimmed).trim();
-            println!("{}• {}", indent, item);
-        } else if trimmed.starts_with("# ") {
-            // Heading
-            let heading = trimmed.strip_prefix("# ").unwrap_or(trimmed).trim();
-            println!("\n\x1b[1m{}\x1b[0m", heading);
-        } else if trimmed.starts_with("## ") {
-            // Subheading
-            let subheading = trimmed.strip_prefix("## ").unwrap_or(trimmed).trim();
-            println!("\n\x1b[1m{}\x1b[0m", subheading);
-        } else if trimmed.starts_with("**") && trimmed.ends_with("**") {
-            // Bold text (section marker)
-            let text = trimmed.trim_start_matches("**").trim_end_matches("**");
-            println!("{}\x1b[1m{}\x1b[0m", indent, text);
-        } else {
-            // Regular text or note
-            println!("{}{}", indent, trimmed);
+            current_tool_name: std::sync::Mutex::new(None),
+            current_tool_args: std::sync::Mutex::new(Vec::new()),
+            current_output_line: std::sync::Mutex::new(None),
+            output_line_printed: std::sync::Mutex::new(false),
        }
    }
 }
@@ -105,31 +50,31 @@ impl UiWriter for ConsoleUiWriter {
    fn print_context_thinning(&self, message: &str) {
        // Animated highlight for context thinning
        // Use bright cyan/green with a quick flash animation
-        
+
        // Flash animation: print with bright background, then normal
        let frames = vec![
-            "\x1b[1;97;46m",  // Frame 1: Bold white on cyan background
-            "\x1b[1;97;42m",  // Frame 2: Bold white on green background
-            "\x1b[1;96;40m",  // Frame 3: Bold cyan on black background
+            "\x1b[1;97;46m", // Frame 1: Bold white on cyan background
+            "\x1b[1;97;42m", // Frame 2: Bold white on green background
+            "\x1b[1;96;40m", // Frame 3: Bold cyan on black background
        ];
-        
+
        println!();
-        
+
        // Quick flash animation
        for frame in &frames {
            print!("\r{} ✨ {} ✨\x1b[0m", frame, message);
            let _ = io::stdout().flush();
            std::thread::sleep(std::time::Duration::from_millis(80));
        }
-        
+
        // Final display with bright cyan and sparkle emojis
        print!("\r\x1b[1;96m✨ {} ✨\x1b[0m", message);
        println!();
-        
+
        // Add a subtle "success" indicator line
        println!("\x1b[2;36m   └─ Context optimized successfully\x1b[0m");
        println!();
-        
+
        let _ = io::stdout().flush();
    }

@@ -137,14 +82,6 @@ impl UiWriter for ConsoleUiWriter {
        // Store the tool name and clear args for collection
        *self.current_tool_name.lock().unwrap() = Some(tool_name.to_string());
        self.current_tool_args.lock().unwrap().clear();
-        
-        // Check if this is a todo tool call
-        let is_todo = tool_name == "todo_read" || tool_name == "todo_write";
-        *self.in_todo_tool.lock().unwrap() = is_todo;
-        
-        // For todo tools, we'll skip the normal header and print a custom one later
-        if is_todo {
-        }
    }

    fn print_tool_arg(&self, key: &str, value: &str) {
@@ -167,13 +104,10 @@ impl UiWriter for ConsoleUiWriter {
    }

    fn print_tool_output_header(&self) {
-        // Skip normal header for todo tools
-        if *self.in_todo_tool.lock().unwrap() {
-            println!(); // Just add a newline
-            return;
-        }
-        
        println!();
+        // Reset output_line_printed at the start of a new tool output
+        // This ensures the header isn't cleared by update_tool_output_line
+        *self.output_line_printed.lock().unwrap() = false;
        // Now print the tool header with the most important arg in bold green
        if let Some(tool_name) = self.current_tool_name.lock().unwrap().as_ref() {
            let args = self.current_tool_args.lock().unwrap();
@@ -192,7 +126,8 @@ impl UiWriter for ConsoleUiWriter {
                // Truncate long values for display
                let display_value = if first_line.len() > 80 {
                    // Use char_indices to safely truncate at character boundary
-                    let truncate_at = first_line.char_indices()
+                    let truncate_at = first_line
+                        .char_indices()
                        .nth(77)
                        .map(|(i, _)| i)
                        .unwrap_or(first_line.len());
@@ -206,10 +141,18 @@ impl UiWriter for ConsoleUiWriter {
                    // Check if start or end parameters are present
                    let has_start = args.iter().any(|(k, _)| k == "start");
                    let has_end = args.iter().any(|(k, _)| k == "end");
-                    
+
                    if has_start || has_end {
-                        let start_val = args.iter().find(|(k, _)| k == "start").map(|(_, v)| v.as_str()).unwrap_or("0");
-                        let end_val = args.iter().find(|(k, _)| k == "end").map(|(_, v)| v.as_str()).unwrap_or("end");
+                        let start_val = args
+                            .iter()
+                            .find(|(k, _)| k == "start")
+                            .map(|(_, v)| v.as_str())
+                            .unwrap_or("0");
+                        let end_val = args
+                            .iter()
+                            .find(|(k, _)| k == "end")
+                            .map(|(_, v)| v.as_str())
+                            .unwrap_or("end");
                        format!(" [{}..{}]", start_val, end_val)
                    } else {
                        String::new()
@@ -219,7 +162,10 @@ impl UiWriter for ConsoleUiWriter {
                };

                // Print with bold green tool name, purple (non-bold) for pipe and args
-                println!("┌─\x1b[1;32m {}\x1b[0m\x1b[35m | {}{}\x1b[0m", tool_name, display_value, header_suffix);
+                println!(
+                    "┌─\x1b[1;32m {}\x1b[0m\x1b[35m | {}{}\x1b[0m",
+                    tool_name, display_value, header_suffix
+                );
            } else {
                // Print with bold green formatting using ANSI escape codes
                println!("┌─\x1b[1;32m {}\x1b[0m", tool_name);
@@ -247,21 +193,14 @@ impl UiWriter for ConsoleUiWriter {
    }

    fn print_tool_output_line(&self, line: &str) {
-        // Special handling for todo tools
-        if *self.in_todo_tool.lock().unwrap() {
-            self.print_todo_line(line);
+        // Skip the TODO list header line
+        if line.starts_with("📝 TODO list:") {
            return;
        }
-        
        println!("│ \x1b[2m{}\x1b[0m", line);
    }

    fn print_tool_output_summary(&self, count: usize) {
-        // Skip for todo tools
-        if *self.in_todo_tool.lock().unwrap() {
-            return;
-        }
-        
        println!(
            "│ \x1b[2m({} line{})\x1b[0m",
            count,
@@ -270,13 +209,6 @@ impl UiWriter for ConsoleUiWriter {
    }

    fn print_tool_timing(&self, duration_str: &str) {
-        // For todo tools, just print a simple completion message
-        if *self.in_todo_tool.lock().unwrap() {
-            println!();
-            *self.in_todo_tool.lock().unwrap() = false;
-            return;
-        }
-        
        // Parse the duration string to determine color
        // Format is like "1.5s", "500ms", "2m 30.0s"
        let color_code = if duration_str.ends_with("ms") {
@@ -343,5 +275,79 @@ impl UiWriter for ConsoleUiWriter {
    fn flush(&self) {
        let _ = io::stdout().flush();
    }
-}

+    fn prompt_user_yes_no(&self, message: &str) -> bool {
+        print!("{} [y/N] ", message);
+        let _ = io::stdout().flush();
+
+        let mut input = String::new();
+        if io::stdin().read_line(&mut input).is_ok() {
+            let trimmed = input.trim().to_lowercase();
+            trimmed == "y" || trimmed == "yes"
+        } else {
+            false
+        }
+    }
+
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
+        println!("{} ", message);
+        for (i, option) in options.iter().enumerate() {
+            println!("  [{}] {}", i + 1, option);
+        }
+        print!("Select an option (1-{}): ", options.len());
+        let _ = io::stdout().flush();
+
+        loop {
+            let mut input = String::new();
+            if io::stdin().read_line(&mut input).is_ok() {
+                if let Ok(choice) = input.trim().parse::<usize>() {
+                    if choice > 0 && choice <= options.len() {
+                        return choice - 1;
+                    }
+                }
+            }
+            print!("Invalid choice. Please select (1-{}): ", options.len());
+            let _ = io::stdout().flush();
+        }
+    }
+
+    fn print_final_output(&self, summary: &str) {
+        // Show spinner while "formatting"
+        let spinner_frames = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
+        let message = "summarizing work done...";
+
+        // Brief spinner animation (about 0.5 seconds)
+        for i in 0..5 {
+            let frame = spinner_frames[i % spinner_frames.len()];
+            print!("\r\x1b[36m{} {}\x1b[0m", frame, message);
+            let _ = io::stdout().flush();
+            std::thread::sleep(std::time::Duration::from_millis(100));
+        }
+
+        // Clear the spinner line
+        print!("\r\x1b[2K");
+        let _ = io::stdout().flush();
+
+        // Create a styled markdown skin
+        let mut skin = MadSkin::default();
+        // Customize colors for better terminal appearance
+        skin.bold.set_fg(termimad::crossterm::style::Color::Green);
+        skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
+        skin.headers[0].set_fg(termimad::crossterm::style::Color::Magenta);
+        skin.headers[1].set_fg(termimad::crossterm::style::Color::Magenta);
+        skin.code_block.set_fg(termimad::crossterm::style::Color::Yellow);
+        skin.inline_code.set_fg(termimad::crossterm::style::Color::Yellow);
+
+        // Print a header separator
+        println!("\x1b[1;35m━━━ Summary ━━━\x1b[0m");
+        println!();
+
+        // Render the markdown
+        let rendered = skin.term_text(summary);
+        print!("{}", rendered);
+
+        // Print a footer separator
+        println!();
+        println!("\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
+    }
+}
--- a/crates/g3-cli/tests/coach_feedback_extraction_test.rs
+++ b/crates/g3-cli/tests/coach_feedback_extraction_test.rs
@@ -0,0 +1,336 @@
+use serde_json::json;
+use std::fs;
+use tempfile::TempDir;
+
+#[test]
+fn test_extract_coach_feedback_with_timing_message() {
+    // Create a temporary directory for logs
+    let temp_dir = TempDir::new().unwrap();
+    let logs_dir = temp_dir.path().join("logs");
+    fs::create_dir(&logs_dir).unwrap();
+
+    // Create a mock session log with the problematic conversation history
+    // where timing message appears after the tool result
+    let session_id = "test_session_123";
+    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
+
+    let log_content = json!({
+        "session_id": session_id,
+        "context_window": {
+            "conversation_history": [
+                {
+                    "role": "assistant",
+                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"IMPLEMENTATION_APPROVED\"}}"
+                },
+                {
+                    "role": "user",
+                    "content": "Tool result: IMPLEMENTATION_APPROVED"
+                },
+                {
+                    "role": "assistant",
+                    "content": "🕝 27.7s | 💭 7.5s"
+                }
+            ]
+        }
+    });
+
+    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
+
+    // Now test the extraction logic
+    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
+    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
+
+    if let Some(context_window) = log_json.get("context_window") {
+        if let Some(conversation_history) = context_window.get("conversation_history") {
+            if let Some(messages) = conversation_history.as_array() {
+                // This is the key logic we're testing - find the last USER message with "Tool result:"
+                let last_tool_result = messages.iter().rev().find(|msg| {
+                    if let Some(role) = msg.get("role") {
+                        if let Some(role_str) = role.as_str() {
+                            if role_str == "User" || role_str == "user" {
+                                if let Some(content) = msg.get("content") {
+                                    if let Some(content_str) = content.as_str() {
+                                        return content_str.starts_with("Tool result:");
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    false
+                });
+
+                // Verify we found the correct message
+                assert!(last_tool_result.is_some(), "Should find the tool result message");
+
+                if let Some(last_message) = last_tool_result {
+                    if let Some(content) = last_message.get("content") {
+                        if let Some(content_str) = content.as_str() {
+                            let feedback = if content_str.starts_with("Tool result: ") {
+                                content_str.strip_prefix("Tool result: ").unwrap_or(content_str)
+                            } else {
+                                content_str
+                            };
+
+                            // Verify we extracted the correct feedback
+                            assert_eq!(feedback, "IMPLEMENTATION_APPROVED", "Should extract the actual feedback, not timing");
+                            
+                            // Verify the feedback is NOT the timing message
+                            assert!(!feedback.contains("🕝"), "Feedback should not be the timing message");
+                            
+                            println!("✅ Successfully extracted coach feedback: {}", feedback);
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    panic!("Failed to extract coach feedback");
+}
+
+#[test]
+fn test_extract_only_final_output_tool_results() {
+    // Test that we only extract tool results from final_output, not from other tools
+    let temp_dir = TempDir::new().unwrap();
+    let logs_dir = temp_dir.path().join("logs");
+    fs::create_dir(&logs_dir).unwrap();
+
+    let session_id = "test_session_final_output_only";
+    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
+
+    let log_content = json!({
+        "session_id": session_id,
+        "context_window": {
+            "conversation_history": [
+                {
+                    "role": "assistant",
+                    "content": "{\"tool\": \"shell\", \"args\": {\"command\":\"ls\"}}"
+                },
+                {
+                    "role": "user",
+                    "content": "Tool result: file1.txt\nfile2.txt"
+                },
+                {
+                    "role": "assistant",
+                    "content": "{\"tool\": \"read_file\", \"args\": {\"file_path\":\"test.txt\"}}"
+                },
+                {
+                    "role": "user",
+                    "content": "Tool result: This is test content"
+                },
+                {
+                    "role": "assistant",
+                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"APPROVED_RESULT\"}}"
+                },
+                {
+                    "role": "user",
+                    "content": "Tool result: APPROVED_RESULT"
+                },
+                {
+                    "role": "assistant",
+                    "content": "🕝 20.5s | 💭 5.2s"
+                }
+            ]
+        }
+    });
+
+    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
+
+    // Test the new extraction logic that verifies the tool is final_output
+    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
+    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
+
+    if let Some(context_window) = log_json.get("context_window") {
+        if let Some(conversation_history) = context_window.get("conversation_history") {
+            if let Some(messages) = conversation_history.as_array() {
+                // Go backwards through messages to find final_output tool result
+                for i in (0..messages.len()).rev() {
+                    let msg = &messages[i];
+                    
+                    if let Some(role) = msg.get("role") {
+                        if let Some(role_str) = role.as_str() {
+                            if role_str == "User" || role_str == "user" {
+                                if let Some(content) = msg.get("content") {
+                                    if let Some(content_str) = content.as_str() {
+                                        if content_str.starts_with("Tool result:") {
+                                            // Check if preceding message was final_output
+                                            if i > 0 {
+                                                let prev_msg = &messages[i - 1];
+                                                if let Some(prev_content) = prev_msg.get("content") {
+                                                    if let Some(prev_content_str) = prev_content.as_str() {
+                                                        if prev_content_str.contains("\"tool\": \"final_output\"") {
+                                                            let feedback = content_str.strip_prefix("Tool result: ").unwrap_or(content_str);
+                                                            assert_eq!(feedback, "APPROVED_RESULT", "Should extract only final_output result");
+                                                            println!("✅ Correctly extracted only final_output tool result: {}", feedback);
+                                                            return;
+                                                        }
+                                                    }
+                                                }
+                                            }
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    panic!("Failed to extract final_output tool result");
+}
+
+#[test]
+fn test_extract_coach_feedback_without_timing_message() {
+    // Create a temporary directory for logs
+    let temp_dir = TempDir::new().unwrap();
+    let logs_dir = temp_dir.path().join("logs");
+    fs::create_dir(&logs_dir).unwrap();
+
+    // Test the case where there's no timing message (backward compatibility)
+    let session_id = "test_session_456";
+    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
+
+    let log_content = json!({
+        "session_id": session_id,
+        "context_window": {
+            "conversation_history": [
+                {
+                    "role": "assistant",
+                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"TEST_FEEDBACK\"}}"
+                },
+                {
+                    "role": "user",
+                    "content": "Tool result: TEST_FEEDBACK"
+                }
+            ]
+        }
+    });
+
+    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
+
+    // Test extraction
+    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
+    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
+
+    if let Some(context_window) = log_json.get("context_window") {
+        if let Some(conversation_history) = context_window.get("conversation_history") {
+            if let Some(messages) = conversation_history.as_array() {
+                let last_tool_result = messages.iter().rev().find(|msg| {
+                    if let Some(role) = msg.get("role") {
+                        if let Some(role_str) = role.as_str() {
+                            if role_str == "User" || role_str == "user" {
+                                if let Some(content) = msg.get("content") {
+                                    if let Some(content_str) = content.as_str() {
+                                        return content_str.starts_with("Tool result:");
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    false
+                });
+
+                assert!(last_tool_result.is_some());
+
+                if let Some(last_message) = last_tool_result {
+                    if let Some(content) = last_message.get("content") {
+                        if let Some(content_str) = content.as_str() {
+                            let feedback = content_str.strip_prefix("Tool result: ").unwrap_or(content_str);
+                            assert_eq!(feedback, "TEST_FEEDBACK");
+                            println!("✅ Successfully extracted coach feedback without timing: {}", feedback);
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    panic!("Failed to extract coach feedback");
+}
+
+#[test]
+fn test_extract_coach_feedback_with_multiple_tool_results() {
+    // Test that we get the LAST tool result when there are multiple
+    let temp_dir = TempDir::new().unwrap();
+    let logs_dir = temp_dir.path().join("logs");
+    fs::create_dir(&logs_dir).unwrap();
+
+    let session_id = "test_session_789";
+    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
+
+    let log_content = json!({
+        "session_id": session_id,
+        "context_window": {
+            "conversation_history": [
+                {
+                    "role": "assistant",
+                    "content": "{\"tool\": \"shell\", \"args\": {\"command\":\"ls\"}}"
+                },
+                {
+                    "role": "user",
+                    "content": "Tool result: file1.txt\nfile2.txt"
+                },
+                {
+                    "role": "assistant",
+                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"FINAL_RESULT\"}}"
+                },
+                {
+                    "role": "user",
+                    "content": "Tool result: FINAL_RESULT"
+                },
+                {
+                    "role": "assistant",
+                    "content": "🕝 15.2s | 💭 3.1s"
+                }
+            ]
+        }
+    });
+
+    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
+
+    // Test extraction
+    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
+    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
+
+    if let Some(context_window) = log_json.get("context_window") {
+        if let Some(conversation_history) = context_window.get("conversation_history") {
+            if let Some(messages) = conversation_history.as_array() {
+                let last_tool_result = messages.iter().rev().find(|msg| {
+                    if let Some(role) = msg.get("role") {
+                        if let Some(role_str) = role.as_str() {
+                            if role_str == "User" || role_str == "user" {
+                                if let Some(content) = msg.get("content") {
+                                    if let Some(content_str) = content.as_str() {
+                                        return content_str.starts_with("Tool result:");
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    false
+                });
+
+                assert!(last_tool_result.is_some());
+
+                if let Some(last_message) = last_tool_result {
+                    if let Some(content) = last_message.get("content") {
+                        if let Some(content_str) = content.as_str() {
+                            let feedback = content_str.strip_prefix("Tool result: ").unwrap_or(content_str);
+                            // Should get the LAST tool result (final_output), not the first one (shell)
+                            assert_eq!(feedback, "FINAL_RESULT", "Should extract the last tool result");
+                            assert!(!feedback.contains("file1.txt"), "Should not extract earlier tool results");
+                            println!("✅ Successfully extracted last tool result: {}", feedback);
+                            return;
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    panic!("Failed to extract coach feedback");
+}
--- a/crates/g3-computer-control/build.rs
+++ b/crates/g3-computer-control/build.rs
@@ -34,18 +34,40 @@ fn main() {
        .expect("Failed to find .build/release directory");

    // Copy the dylib to the output directory so it can be found at runtime
-    let target_dir = manifest_dir.parent().unwrap().parent().unwrap().join("target");
+    let target_dir = manifest_dir
+        .parent()
+        .unwrap()
+        .parent()
+        .unwrap()
+        .join("target");
    let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string());
-    let output_dir = target_dir.join(&profile);
-    
+
+    // Determine the actual target directory (could be llvm-cov-target or regular target)
+    let target_dir_name =
+        env::var("CARGO_TARGET_DIR").unwrap_or_else(|_| target_dir.to_string_lossy().to_string());
+    let actual_target_dir = PathBuf::from(&target_dir_name);
+    let output_dir = actual_target_dir.join(&profile);
+
    let dylib_src = lib_path.join("libVisionBridge.dylib");
    let dylib_dst = output_dir.join("libVisionBridge.dylib");
-    
-    std::fs::copy(&dylib_src, &dylib_dst)
-        .expect(&format!("Failed to copy dylib from {} to {}", dylib_src.display(), dylib_dst.display()));
-    
-    println!("cargo:warning=Copied libVisionBridge.dylib to {}", dylib_dst.display());
-    
+
+    // Create output directory if it doesn't exist
+    std::fs::create_dir_all(&output_dir).expect(&format!(
+        "Failed to create output directory {}",
+        output_dir.display()
+    ));
+
+    std::fs::copy(&dylib_src, &dylib_dst).expect(&format!(
+        "Failed to copy dylib from {} to {}",
+        dylib_src.display(),
+        dylib_dst.display()
+    ));
+
+    println!(
+        "cargo:warning=Copied libVisionBridge.dylib to {}",
+        dylib_dst.display()
+    );
+
    // Add rpath so the dylib can be found at runtime
    println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
    println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
@@ -59,5 +81,8 @@ fn main() {
    println!("cargo:rustc-link-lib=framework=CoreGraphics");
    println!("cargo:rustc-link-lib=framework=CoreImage");

-    println!("cargo:warning=VisionBridge built successfully at {}", lib_path.display());
+    println!(
+        "cargo:warning=VisionBridge built successfully at {}",
+        lib_path.display()
+    );
 }
--- a/crates/g3-computer-control/examples/debug_screenshot.rs
+++ b/crates/g3-computer-control/examples/debug_screenshot.rs
@@ -3,19 +3,19 @@ use core_graphics::display::CGDisplay;
 fn main() {
    let display = CGDisplay::main();
    let image = display.image().expect("Failed to capture screen");
-    
+
    println!("CGImage properties:");
    println!("  Width: {}", image.width());
    println!("  Height: {}", image.height());
    println!("  Bits per component: {}", image.bits_per_component());
    println!("  Bits per pixel: {}", image.bits_per_pixel());
    println!("  Bytes per row: {}", image.bytes_per_row());
-    
+
    let data = image.data();
    let expected_size = image.width() * image.height() * 4;
    println!("  Data length: {}", data.len());
    println!("  Expected (w*h*4): {}", expected_size);
-    
+
    // Check if there's padding in rows
    let bytes_per_row = image.bytes_per_row();
    let width = image.width();
@@ -23,16 +23,25 @@ fn main() {
    println!("\nRow alignment:");
    println!("  Actual bytes per row: {}", bytes_per_row);
    println!("  Expected (width * 4): {}", expected_bytes_per_row);
-    println!("  Padding per row: {}", bytes_per_row - expected_bytes_per_row);
-    
+    println!(
+        "  Padding per row: {}",
+        bytes_per_row - expected_bytes_per_row
+    );
+
    // Sample some pixels from different locations
    println!("\nFirst 3 pixels (raw bytes):");
    for i in 0..3 {
        let offset = i * 4;
-        println!("  Pixel {}: [{:3}, {:3}, {:3}, {:3}]", 
-                 i, data[offset], data[offset+1], data[offset+2], data[offset+3]);
+        println!(
+            "  Pixel {}: [{:3}, {:3}, {:3}, {:3}]",
+            i,
+            data[offset],
+            data[offset + 1],
+            data[offset + 2],
+            data[offset + 3]
+        );
    }
-    
+
    // Check a pixel from the middle
    let mid_row = image.height() / 2;
    let mid_col = image.width() / 2;
@@ -40,7 +49,12 @@ fn main() {
    println!("\nMiddle pixel (row {}, col {}):", mid_row, mid_col);
    println!("  Offset: {}", mid_offset);
    if mid_offset + 3 < data.len() as usize {
-        println!("  Bytes: [{:3}, {:3}, {:3}, {:3}]", 
-                 data[mid_offset], data[mid_offset+1], data[mid_offset+2], data[mid_offset+3]);
+        println!(
+            "  Bytes: [{:3}, {:3}, {:3}, {:3}]",
+            data[mid_offset],
+            data[mid_offset + 1],
+            data[mid_offset + 2],
+            data[mid_offset + 3]
+        );
    }
 }
--- a/crates/g3-computer-control/examples/list_windows.rs
+++ b/crates/g3-computer-control/examples/list_windows.rs
@@ -1,34 +1,38 @@
-use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
+use core_foundation::base::{TCFType, ToVoid};
 use core_foundation::dictionary::CFDictionary;
 use core_foundation::string::CFString;
-use core_foundation::base::{TCFType, ToVoid};
+use core_graphics::window::{
+    kCGNullWindowID, kCGWindowListOptionOnScreenOnly, CGWindowListCopyWindowInfo,
+};

 fn main() {
    println!("Listing all on-screen windows...");
    println!("{:<10} {:<25} {}", "Window ID", "Owner", "Title");
    println!("{}", "-".repeat(80));
-    
+
    unsafe {
-        let window_list = CGWindowListCopyWindowInfo(
-            kCGWindowListOptionOnScreenOnly,
-            kCGNullWindowID
-        );
-        
-        let count = core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list).len();
-        let array = core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
-        
+        let window_list =
+            CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID);
+
+        let count =
+            core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list)
+                .len();
+        let array =
+            core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
+
        for i in 0..count {
            let dict = array.get(i).unwrap();
-            
+
            // Get window ID
            let window_id_key = CFString::from_static_string("kCGWindowNumber");
            let window_id: i64 = if let Some(value) = dict.find(window_id_key.to_void()) {
-                let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
+                let num: core_foundation::number::CFNumber =
+                    TCFType::wrap_under_get_rule(*value as *const _);
                num.to_i64().unwrap_or(0)
            } else {
                0
            };
-            
+
            // Get owner name
            let owner_key = CFString::from_static_string("kCGWindowOwnerName");
            let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
@@ -37,7 +41,7 @@ fn main() {
            } else {
                "Unknown".to_string()
            };
-            
+
            // Get window name/title
            let name_key = CFString::from_static_string("kCGWindowName");
            let title: String = if let Some(value) = dict.find(name_key.to_void()) {
@@ -46,7 +50,7 @@ fn main() {
            } else {
                "".to_string()
            };
-            
+
            // Show all windows
            if !owner.is_empty() {
                println!("{:<10} {:<25} {}", window_id, owner, title);
--- a/crates/g3-computer-control/examples/macax_demo.rs
+++ b/crates/g3-computer-control/examples/macax_demo.rs
@@ -11,11 +11,11 @@ use g3_computer_control::MacAxController;
 async fn main() -> Result<()> {
    println!("🍎 macOS Accessibility API Demo\n");
    println!("This demo shows how to control macOS applications using the Accessibility API.\n");
-    
+
    // Create controller
    let controller = MacAxController::new()?;
    println!("✅ MacAxController initialized\n");
-    
+
    // List running applications
    println!("📱 Listing running applications:");
    match controller.list_applications() {
@@ -30,7 +30,7 @@ async fn main() -> Result<()> {
        Err(e) => println!("  ❌ Error: {}", e),
    }
    println!();
-    
+
    // Get frontmost app
    println!("🎯 Getting frontmost application:");
    match controller.get_frontmost_app() {
@@ -38,16 +38,16 @@ async fn main() -> Result<()> {
        Err(e) => println!("  ❌ Error: {}", e),
    }
    println!();
-    
+
    // Example: Activate Finder and get its UI tree
    println!("📂 Activating Finder and inspecting UI:");
    match controller.activate_app("Finder") {
        Ok(_) => {
            println!("  ✅ Finder activated");
-            
+
            // Wait a moment for activation
            tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
-            
+
            // Get UI tree
            match controller.get_ui_tree("Finder", 2) {
                Ok(tree) => {
@@ -62,13 +62,13 @@ async fn main() -> Result<()> {
        Err(e) => println!("  ❌ Error: {}", e),
    }
    println!();
-    
+
    println!("✨ Demo complete!\n");
    println!("💡 Tips:");
    println!("  - Use --macax flag with g3 to enable these tools");
    println!("  - Grant accessibility permissions in System Preferences");
    println!("  - Add accessibility identifiers to your apps for easier automation");
    println!("  - See docs/macax-tools.md for full documentation\n");
-    
+
    Ok(())
 }
--- a/crates/g3-computer-control/examples/safari_demo.rs
+++ b/crates/g3-computer-control/examples/safari_demo.rs
@@ -1,64 +1,66 @@
-use g3_computer_control::SafariDriver;
-use g3_computer_control::webdriver::WebDriverController;
 use anyhow::Result;
+use g3_computer_control::webdriver::WebDriverController;
+use g3_computer_control::SafariDriver;

 #[tokio::main]
 async fn main() -> Result<()> {
    println!("Safari WebDriver Demo");
    println!("=====================\n");
-    
+
    println!("Make sure to:");
    println!("1. Enable 'Allow Remote Automation' in Safari's Develop menu");
    println!("2. Run: /usr/bin/safaridriver --enable");
    println!("3. Start safaridriver in another terminal: safaridriver --port 4444\n");
-    
+
    println!("Connecting to SafariDriver...");
    let mut driver = SafariDriver::new().await?;
    println!("✅ Connected!\n");
-    
+
    // Navigate to a website
    println!("Navigating to example.com...");
    driver.navigate("https://example.com").await?;
    println!("✅ Navigated\n");
-    
+
    // Get page title
    let title = driver.title().await?;
    println!("Page title: {}\n", title);
-    
+
    // Get current URL
    let url = driver.current_url().await?;
    println!("Current URL: {}\n", url);
-    
+
    // Find an element
    println!("Finding h1 element...");
    let h1 = driver.find_element("h1").await?;
    let h1_text = h1.text().await?;
    println!("H1 text: {}\n", h1_text);
-    
+
    // Find all paragraphs
    println!("Finding all paragraphs...");
    let paragraphs = driver.find_elements("p").await?;
    println!("Found {} paragraphs\n", paragraphs.len());
-    
+
    // Get page source
    println!("Getting page source...");
    let source = driver.page_source().await?;
    println!("Page source length: {} bytes\n", source.len());
-    
+
    // Execute JavaScript
    println!("Executing JavaScript...");
-    let result = driver.execute_script("return document.title", vec![]).await?;
+    let result = driver
+        .execute_script("return document.title", vec![])
+        .await?;
    println!("JS result: {:?}\n", result);
-    
+
    // Take a screenshot
    println!("Taking screenshot...");
    driver.screenshot("/tmp/safari_demo.png").await?;
    println!("✅ Screenshot saved to /tmp/safari_demo.png\n");
-    
+
    // Close the browser
    println!("Closing browser...");
    driver.quit().await?;
    println!("✅ Done!");
-    
+
    Ok(())
 }
--- a/crates/g3-computer-control/examples/test_permission_prompt.rs
+++ b/crates/g3-computer-control/examples/test_permission_prompt.rs
@@ -3,10 +3,13 @@ use g3_computer_control::create_controller;
 #[tokio::main]
 async fn main() {
    println!("Testing screenshot with permission prompt...");
-    
+
    let controller = create_controller().expect("Failed to create controller");
-    
-    match controller.take_screenshot("/tmp/test_with_prompt.png", None, None).await {
+
+    match controller
+        .take_screenshot("/tmp/test_with_prompt.png", None, None)
+        .await
+    {
        Ok(_) => {
            println!("\n✅ Screenshot saved to /tmp/test_with_prompt.png");
            println!("Opening screenshot...");
--- a/crates/g3-computer-control/examples/test_screencapture_direct.rs
+++ b/crates/g3-computer-control/examples/test_screencapture_direct.rs
@@ -2,29 +2,33 @@ use std::process::Command;

 fn main() {
    let path = "/tmp/rust_screencapture_test.png";
-    
+
    println!("Testing screencapture command from Rust...");
-    
+
    let mut cmd = Command::new("screencapture");
    cmd.arg("-x"); // No sound
    cmd.arg(path);
-    
+
    println!("Command: {:?}", cmd);
-    
+
    match cmd.output() {
        Ok(output) => {
            println!("Exit status: {}", output.status);
            println!("Stdout: {}", String::from_utf8_lossy(&output.stdout));
            println!("Stderr: {}", String::from_utf8_lossy(&output.stderr));
-            
+
            if output.status.success() {
                println!("\n✅ Screenshot saved to: {}", path);
-                
+
                // Check file exists and size
                if let Ok(metadata) = std::fs::metadata(path) {
-                    println!("File size: {} bytes ({:.1} MB)", metadata.len(), metadata.len() as f64 / 1_000_000.0);
+                    println!(
+                        "File size: {} bytes ({:.1} MB)",
+                        metadata.len(),
+                        metadata.len() as f64 / 1_000_000.0
+                    );
                }
-                
+
                // Open it
                let _ = Command::new("open").arg(path).spawn();
                println!("\nOpened screenshot - please verify it looks correct!");
--- a/crates/g3-computer-control/examples/test_screenshot_fix.rs
+++ b/crates/g3-computer-control/examples/test_screenshot_fix.rs
@@ -4,17 +4,23 @@ use image::{ImageBuffer, RgbaImage};
 fn main() {
    let display = CGDisplay::main();
    let image = display.image().expect("Failed to capture screen");
-    
+
    let width = image.width() as u32;
    let height = image.height() as u32;
    let bytes_per_row = image.bytes_per_row() as usize;
    let data = image.data();
-    
+
    println!("Testing screenshot fix...");
-    println!("Image: {}x{}, bytes_per_row: {}", width, height, bytes_per_row);
+    println!(
+        "Image: {}x{}, bytes_per_row: {}",
+        width, height, bytes_per_row
+    );
    println!("Expected bytes per row: {}", width * 4);
-    println!("Padding per row: {} bytes", bytes_per_row - (width as usize * 4));
-    
+    println!(
+        "Padding per row: {} bytes",
+        bytes_per_row - (width as usize * 4)
+    );
+
    // OLD METHOD (broken) - treating data as continuous
    println!("\n=== OLD METHOD (BROKEN) ===");
    let mut old_rgba = Vec::with_capacity(data.len() as usize);
@@ -26,14 +32,14 @@ fn main() {
    }
    println!("Converted {} pixels", old_rgba.len() / 4);
    println!("Expected {} pixels", width * height);
-    
+
    // NEW METHOD (fixed) - handling row padding
    println!("\n=== NEW METHOD (FIXED) ===");
    let mut new_rgba = Vec::with_capacity((width * height * 4) as usize);
    for row in 0..height as usize {
        let row_start = row * bytes_per_row;
        let row_end = row_start + (width as usize * 4);
-        
+
        for chunk in data[row_start..row_end].chunks_exact(4) {
            new_rgba.push(chunk[2]); // R
            new_rgba.push(chunk[1]); // G
@@ -43,26 +49,34 @@ fn main() {
    }
    println!("Converted {} pixels", new_rgba.len() / 4);
    println!("Expected {} pixels", width * height);
-    
+
    // Save a small crop from both methods
    let crop_size = 200;
-    
+
    // Old method crop
-    let old_crop: Vec<u8> = old_rgba.iter().take((crop_size * crop_size * 4) as usize).copied().collect();
+    let old_crop: Vec<u8> = old_rgba
+        .iter()
+        .take((crop_size * crop_size * 4) as usize)
+        .copied()
+        .collect();
    if let Some(old_img) = ImageBuffer::from_raw(crop_size, crop_size, old_crop) {
        let old_img: RgbaImage = old_img;
        old_img.save("/tmp/screenshot_old_method.png").unwrap();
        println!("\nSaved OLD method crop to: /tmp/screenshot_old_method.png");
    }
-    
+
    // New method crop
-    let new_crop: Vec<u8> = new_rgba.iter().take((crop_size * crop_size * 4) as usize).copied().collect();
+    let new_crop: Vec<u8> = new_rgba
+        .iter()
+        .take((crop_size * crop_size * 4) as usize)
+        .copied()
+        .collect();
    if let Some(new_img) = ImageBuffer::from_raw(crop_size, crop_size, new_crop) {
        let new_img: RgbaImage = new_img;
        new_img.save("/tmp/screenshot_new_method.png").unwrap();
        println!("Saved NEW method crop to: /tmp/screenshot_new_method.png");
    }
-    
+
    println!("\nOpen both images to compare:");
    println!("  open /tmp/screenshot_old_method.png /tmp/screenshot_new_method.png");
 }
--- a/crates/g3-computer-control/examples/test_type_text.rs
+++ b/crates/g3-computer-control/examples/test_type_text.rs
@@ -6,43 +6,43 @@ use g3_computer_control::MacAxController;
 #[tokio::main]
 async fn main() -> Result<()> {
    println!("🧪 Testing macax type_text functionality\n");
-    
+
    let controller = MacAxController::new()?;
    println!("✅ Controller initialized\n");
-    
+
    // Test 1: Type simple text
    println!("Test 1: Typing simple text into TextEdit");
    println!("  Please open TextEdit and create a new document...");
    std::thread::sleep(std::time::Duration::from_secs(3));
-    
+
    match controller.type_text("TextEdit", "Hello, World!") {
        Ok(_) => println!("  ✅ Successfully typed simple text\n"),
        Err(e) => println!("  ❌ Failed: {}\n", e),
    }
-    
+
    std::thread::sleep(std::time::Duration::from_secs(1));
-    
+
    // Test 2: Type unicode and emojis
    println!("Test 2: Typing unicode and emojis");
    match controller.type_text("TextEdit", "\n🌟 Unicode test: café, naïve, 日本語 🎉") {
        Ok(_) => println!("  ✅ Successfully typed unicode text\n"),
        Err(e) => println!("  ❌ Failed: {}\n", e),
    }
-    
+
    std::thread::sleep(std::time::Duration::from_secs(1));
-    
+
    // Test 3: Type special characters
    println!("Test 3: Typing special characters");
    match controller.type_text("TextEdit", "\nSpecial: @#$%^&*()_+-=[]{}|;':,.<>?/") {
        Ok(_) => println!("  ✅ Successfully typed special characters\n"),
        Err(e) => println!("  ❌ Failed: {}\n", e),
    }
-    
+
    println!("\n✨ Tests complete!");
    println!("\n💡 Now try with Things3:");
    println!("   1. Open Things3");
    println!("   2. Press Cmd+N to create a new task");
    println!("   3. Run: g3 --macax 'type \"🌟 My awesome task\" into Things'");
-    
+
    Ok(())
 }
--- a/crates/g3-computer-control/examples/test_vision.rs
+++ b/crates/g3-computer-control/examples/test_vision.rs
@@ -1,63 +1,67 @@
-use g3_computer_control::ocr::{OCREngine, DefaultOCR};
 use anyhow::Result;
+use g3_computer_control::ocr::{DefaultOCR, OCREngine};

 #[tokio::main]
 async fn main() -> Result<()> {
    println!("🧪 Testing Apple Vision OCR");
    println!("===========================\n");
-    
+
    // Initialize OCR engine
    println!("📦 Initializing OCR engine...");
    let ocr = DefaultOCR::new()?;
    println!("✅ OCR engine: {}\n", ocr.name());
-    
+
    // Check if test image exists
    let test_image = "/tmp/safari_test.png";
    if !std::path::Path::new(test_image).exists() {
        println!("⚠️  Test image not found: {}", test_image);
        println!("   Creating a screenshot...");
-        
+
        let status = std::process::Command::new("screencapture")
            .arg("-x")
            .arg("-R")
            .arg("0,0,1200,800")
            .arg(test_image)
            .status()?;
-        
+
        if !status.success() {
            anyhow::bail!("Failed to create screenshot");
        }
-        
+
        println!("✅ Screenshot created\n");
    }
-    
+
    // Run OCR
    println!("🔍 Running Apple Vision OCR on {}...", test_image);
    let start = std::time::Instant::now();
    let locations = ocr.extract_text_with_locations(test_image).await?;
    let duration = start.elapsed();
-    
+
    println!("✅ OCR completed in {:.3}s\n", duration.as_secs_f64());
-    
+
    // Display results
    println!("📊 Results:");
    println!("   Found {} text elements\n", locations.len());
-    
+
    if locations.is_empty() {
        println!("⚠️  No text found in image");
    } else {
        println!("   Top 20 results:");
-        println!("   {:<4} {:<40} {:<15} {:<12} {:<8}", "#", "Text", "Position", "Size", "Conf");
+        println!(
+            "   {:<4} {:<40} {:<15} {:<12} {:<8}",
+            "#", "Text", "Position", "Size", "Conf"
+        );
        println!("   {}", "-".repeat(85));
-        
+
        for (i, loc) in locations.iter().take(20).enumerate() {
            let text = if loc.text.len() > 37 {
                format!("{}...", &loc.text[..37])
            } else {
                loc.text.clone()
            };
-            
-            println!("   {:<4} {:<40} ({:>4},{:>4})    {:>4}x{:<4}  {:.2}",
+
+            println!(
+                "   {:<4} {:<40} ({:>4},{:>4})    {:>4}x{:<4}  {:.2}",
                i + 1,
                text,
                loc.x,
@@ -67,19 +71,22 @@ async fn main() -> Result<()> {
                loc.confidence
            );
        }
-        
+
        if locations.len() > 20 {
            println!("\n   ... and {} more", locations.len() - 20);
        }
-        
+
        // Performance comparison
        println!("\n📈 Performance:");
        println!("   OCR Speed: {:.3}s", duration.as_secs_f64());
        println!("   Text elements: {}", locations.len());
-        println!("   Avg per element: {:.1}ms", duration.as_millis() as f64 / locations.len() as f64);
+        println!(
+            "   Avg per element: {:.1}ms",
+            duration.as_millis() as f64 / locations.len() as f64
+        );
    }
-    
+
    println!("\n✅ Test complete!");
-    
+
    Ok(())
 }
--- a/crates/g3-computer-control/examples/test_window_capture.rs
+++ b/crates/g3-computer-control/examples/test_window_capture.rs
@@ -3,36 +3,46 @@ use g3_computer_control::create_controller;
 #[tokio::main]
 async fn main() {
    println!("Testing window-specific screenshot capture...");
-    
+
    let controller = create_controller().expect("Failed to create controller");
-    
+
    // Test 1: Capture iTerm2 window
    println!("\n1. Capturing iTerm2 window...");
-    match controller.take_screenshot("/tmp/iterm_window.png", None, Some("iTerm2")).await {
+    match controller
+        .take_screenshot("/tmp/iterm_window.png", None, Some("iTerm2"))
+        .await
+    {
        Ok(_) => {
            println!("   ✅ iTerm2 window captured to /tmp/iterm_window.png");
-            let _ = std::process::Command::new("open").arg("/tmp/iterm_window.png").spawn();
+            let _ = std::process::Command::new("open")
+                .arg("/tmp/iterm_window.png")
+                .spawn();
        }
        Err(e) => println!("   ❌ Failed: {}", e),
    }
-    
+
    // Wait a moment for the image to open
    tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
-    
+
    // Test 2: Full screen capture for comparison
    println!("\n2. Capturing full screen for comparison...");
-    match controller.take_screenshot("/tmp/fullscreen.png", None, None).await {
+    match controller
+        .take_screenshot("/tmp/fullscreen.png", None, None)
+        .await
+    {
        Ok(_) => {
            println!("   ✅ Full screen captured to /tmp/fullscreen.png");
-            let _ = std::process::Command::new("open").arg("/tmp/fullscreen.png").spawn();
+            let _ = std::process::Command::new("open")
+                .arg("/tmp/fullscreen.png")
+                .spawn();
        }
        Err(e) => println!("   ❌ Failed: {}", e),
    }
-    
+
    println!("\n=== Comparison ===");
    println!("iTerm window:  /tmp/iterm_window.png (should show ONLY iTerm window)");
    println!("Full screen:   /tmp/fullscreen.png (should show entire desktop)");
-    
+
    // Show file sizes
    if let Ok(meta1) = std::fs::metadata("/tmp/iterm_window.png") {
        if let Ok(meta2) = std::fs::metadata("/tmp/fullscreen.png") {
--- a/crates/g3-computer-control/src/lib.rs
+++ b/crates/g3-computer-control/src/lib.rs
@@ -1,17 +1,17 @@
 // Suppress warnings from objc crate macros
 #![allow(unexpected_cfgs)]

-pub mod types;
-pub mod platform;
-pub mod ocr;
-pub mod webdriver;
 pub mod macax;
+pub mod ocr;
+pub mod platform;
+pub mod types;
+pub mod webdriver;

 // Re-export webdriver types for convenience
-pub use webdriver::{WebDriverController, WebElement, safari::SafariDriver};
+pub use webdriver::{safari::SafariDriver, WebDriverController, WebElement};

 // Re-export macax types for convenience
-pub use macax::{MacAxController, AXElement, AXApplication};
+pub use macax::{AXApplication, AXElement, MacAxController};

 use anyhow::Result;
 use async_trait::async_trait;
@@ -20,14 +20,23 @@ use types::*;
 #[async_trait]
 pub trait ComputerController: Send + Sync {
    // Screen capture
-    async fn take_screenshot(&self, path: &str, region: Option<Rect>, window_id: Option<&str>) -> Result<()>;
-    
+    async fn take_screenshot(
+        &self,
+        path: &str,
+        region: Option<Rect>,
+        window_id: Option<&str>,
+    ) -> Result<()>;
+
    // OCR operations
    async fn extract_text_from_screen(&self, region: Rect, window_id: &str) -> Result<String>;
    async fn extract_text_from_image(&self, path: &str) -> Result<String>;
    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>>;
-    async fn find_text_in_app(&self, app_name: &str, search_text: &str) -> Result<Option<TextLocation>>;
-    
+    async fn find_text_in_app(
+        &self,
+        app_name: &str,
+        search_text: &str,
+    ) -> Result<Option<TextLocation>>;
+
    // Mouse operations
    fn move_mouse(&self, x: i32, y: i32) -> Result<()>;
    fn click_at(&self, x: i32, y: i32, app_name: Option<&str>) -> Result<()>;
@@ -37,13 +46,13 @@ pub trait ComputerController: Send + Sync {
 pub fn create_controller() -> Result<Box<dyn ComputerController>> {
    #[cfg(target_os = "macos")]
    return Ok(Box::new(platform::macos::MacOSController::new()?));
-    
+
    #[cfg(target_os = "linux")]
    return Ok(Box::new(platform::linux::LinuxController::new()?));
-    
+
    #[cfg(target_os = "windows")]
    return Ok(Box::new(platform::windows::WindowsController::new()?));
-    
+
    #[cfg(not(any(target_os = "macos", target_os = "linux", target_os = "windows")))]
    anyhow::bail!("Unsupported platform")
 }
--- a/crates/g3-computer-control/src/macax/controller.rs
+++ b/crates/g3-computer-control/src/macax/controller.rs
@@ -3,7 +3,9 @@ use anyhow::{Context, Result};
 use std::collections::HashMap;

 #[cfg(target_os = "macos")]
-use accessibility::{AXUIElement, AXUIElementAttributes, ElementFinder, TreeVisitor, TreeWalker, TreeWalkerFlow};
+use accessibility::{
+    AXUIElement, AXUIElementAttributes, ElementFinder, TreeVisitor, TreeWalker, TreeWalkerFlow,
+};

 #[cfg(target_os = "macos")]
 use core_foundation::base::TCFType;
@@ -23,46 +25,46 @@ impl MacAxController {
        {
            // Check if we have accessibility permissions by trying to get system-wide element
            let _system = AXUIElement::system_wide();
-            
+
            Ok(Self {
                app_cache: std::sync::Mutex::new(HashMap::new()),
            })
        }
-        
+
        #[cfg(not(target_os = "macos"))]
        {
            anyhow::bail!("macOS Accessibility API is only available on macOS")
        }
    }
-    
+
    /// List all running applications
    #[cfg(target_os = "macos")]
    pub fn list_applications(&self) -> Result<Vec<AXApplication>> {
        let apps = Self::get_running_applications()?;
        Ok(apps)
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn list_applications(&self) -> Result<Vec<AXApplication>> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    #[cfg(target_os = "macos")]
    fn get_running_applications() -> Result<Vec<AXApplication>> {
        use cocoa::appkit::NSApplicationActivationPolicy;
        use cocoa::base::{id, nil};
        use objc::{class, msg_send, sel, sel_impl};
-        
+
        unsafe {
            let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
            let running_apps: id = msg_send![workspace, runningApplications];
            let count: usize = msg_send![running_apps, count];
-            
+
            let mut apps = Vec::new();
-            
+
            for i in 0..count {
                let app: id = msg_send![running_apps, objectAtIndex: i];
-                
+
                // Get app name
                let localized_name: id = msg_send![app, localizedName];
                if localized_name == nil {
@@ -76,7 +78,7 @@ impl MacAxController {
                } else {
                    continue;
                };
-                
+
                // Get bundle ID
                let bundle_id_obj: id = msg_send![app, bundleIdentifier];
                let bundle_id = if bundle_id_obj != nil {
@@ -93,13 +95,15 @@ impl MacAxController {
                } else {
                    None
                };
-                
+
                // Get PID
                let pid: i32 = msg_send![app, processIdentifier];
-                
+
                // Skip background-only apps
                let activation_policy: i64 = msg_send![app, activationPolicy];
-                if activation_policy == NSApplicationActivationPolicy::NSApplicationActivationPolicyRegular as i64 {
+                if activation_policy
+                    == NSApplicationActivationPolicy::NSApplicationActivationPolicyRegular as i64
+                {
                    apps.push(AXApplication {
                        name,
                        bundle_id,
@@ -107,32 +111,32 @@ impl MacAxController {
                    });
                }
            }
-            
+
            Ok(apps)
        }
    }
-    
+
    /// Get the frontmost (active) application
    #[cfg(target_os = "macos")]
    pub fn get_frontmost_app(&self) -> Result<AXApplication> {
        use cocoa::base::{id, nil};
        use objc::{class, msg_send, sel, sel_impl};
-        
+
        unsafe {
            let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
            let frontmost_app: id = msg_send![workspace, frontmostApplication];
-            
+
            if frontmost_app == nil {
                anyhow::bail!("No frontmost application");
            }
-            
+
            // Get app name
            let localized_name: id = msg_send![frontmost_app, localizedName];
            let name_ptr: *const i8 = msg_send![localized_name, UTF8String];
            let name = std::ffi::CStr::from_ptr(name_ptr)
                .to_string_lossy()
                .to_string();
-            
+
            // Get bundle ID
            let bundle_id_obj: id = msg_send![frontmost_app, bundleIdentifier];
            let bundle_id = if bundle_id_obj != nil {
@@ -149,10 +153,10 @@ impl MacAxController {
            } else {
                None
            };
-            
+
            // Get PID
            let pid: i32 = msg_send![frontmost_app, processIdentifier];
-            
+
            Ok(AXApplication {
                name,
                bundle_id,
@@ -160,12 +164,12 @@ impl MacAxController {
            })
        }
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn get_frontmost_app(&self) -> Result<AXApplication> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    /// Get AXUIElement for an application by name or PID
    #[cfg(target_os = "macos")]
    fn get_app_element(&self, app_name: &str) -> Result<AXUIElement> {
@@ -176,79 +180,79 @@ impl MacAxController {
                return Ok(element.clone());
            }
        }
-        
+
        // Find the app by name
        let apps = Self::get_running_applications()?;
        let app = apps
            .iter()
            .find(|a| a.name == app_name)
            .ok_or_else(|| anyhow::anyhow!("Application '{}' not found", app_name))?;
-        
+
        // Create AXUIElement for the app
        let element = AXUIElement::application(app.pid);
-        
+
        // Cache it
        {
            let mut cache = self.app_cache.lock().unwrap();
            cache.insert(app_name.to_string(), element.clone());
        }
-        
+
        Ok(element)
    }
-    
+
    /// Activate (bring to front) an application
    #[cfg(target_os = "macos")]
    pub fn activate_app(&self, app_name: &str) -> Result<()> {
        use cocoa::base::id;
        use objc::{class, msg_send, sel, sel_impl};
-        
+
        // Find the app
        let apps = Self::get_running_applications()?;
        let app = apps
            .iter()
            .find(|a| a.name == app_name)
            .ok_or_else(|| anyhow::anyhow!("Application '{}' not found", app_name))?;
-        
+
        unsafe {
            let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
            let running_apps: id = msg_send![workspace, runningApplications];
            let count: usize = msg_send![running_apps, count];
-            
+
            for i in 0..count {
                let running_app: id = msg_send![running_apps, objectAtIndex: i];
                let pid: i32 = msg_send![running_app, processIdentifier];
-                
+
                if pid == app.pid {
                    let _: bool = msg_send![running_app, activateWithOptions: 0];
                    return Ok(());
                }
            }
        }
-        
+
        anyhow::bail!("Failed to activate application")
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn activate_app(&self, _app_name: &str) -> Result<()> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    /// Get the UI hierarchy of an application
    #[cfg(target_os = "macos")]
    pub fn get_ui_tree(&self, app_name: &str, max_depth: usize) -> Result<String> {
        let app_element = self.get_app_element(app_name)?;
        let mut output = format!("Application: {}\n", app_name);
-        
+
        Self::build_ui_tree(&app_element, &mut output, 0, max_depth)?;
-        
+
        Ok(output)
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn get_ui_tree(&self, _app_name: &str, _max_depth: usize) -> Result<String> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    #[cfg(target_os = "macos")]
    fn build_ui_tree(
        element: &AXUIElement,
@@ -259,21 +263,22 @@ impl MacAxController {
        if depth >= max_depth {
            return Ok(());
        }
-        
+
        let indent = "  ".repeat(depth);
-        
+
        // Get role
-        let role = element.role().ok().map(|s| s.to_string())
+        let role = element
+            .role()
+            .ok()
+            .map(|s| s.to_string())
            .unwrap_or_else(|| "Unknown".to_string());
-        
+
        // Get title
-        let title = element.title().ok()
-            .map(|s| s.to_string());
-        
+        let title = element.title().ok().map(|s| s.to_string());
+
        // Get identifier
-        let identifier = element.identifier().ok()
-            .map(|s| s.to_string());
-        
+        let identifier = element.identifier().ok().map(|s| s.to_string());
+
        // Format output
        output.push_str(&format!("{}Role: {}", indent, role));
        if let Some(t) = title {
@@ -283,7 +288,7 @@ impl MacAxController {
            output.push_str(&format!(", ID: {}", id));
        }
        output.push('\n');
-        
+
        // Get children
        if let Ok(children) = element.children() {
            for i in 0..children.len() {
@@ -292,10 +297,10 @@ impl MacAxController {
                }
            }
        }
-        
+
        Ok(())
    }
-    
+
    /// Find UI elements in an application
    #[cfg(target_os = "macos")]
    pub fn find_elements(
@@ -307,7 +312,7 @@ impl MacAxController {
    ) -> Result<Vec<AXElement>> {
        let app_element = self.get_app_element(app_name)?;
        let mut found_elements = Vec::new();
-        
+
        let visitor = ElementCollector {
            role_filter: role.map(|s| s.to_string()),
            title_filter: title.map(|s| s.to_string()),
@@ -315,13 +320,13 @@ impl MacAxController {
            results: std::cell::RefCell::new(&mut found_elements),
            depth: std::cell::Cell::new(0),
        };
-        
+
        let walker = TreeWalker::new();
        walker.walk(&app_element, &visitor);
-        
+
        Ok(found_elements)
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn find_elements(
        &self,
@@ -332,7 +337,7 @@ impl MacAxController {
    ) -> Result<Vec<AXElement>> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    /// Find a single element (helper for click, set_value, etc.)
    #[cfg(target_os = "macos")]
    fn find_element(
@@ -343,19 +348,17 @@ impl MacAxController {
        identifier: Option<&str>,
    ) -> Result<AXUIElement> {
        let app_element = self.get_app_element(app_name)?;
-        
+
        let role_str = role.to_string();
        let title_str = title.map(|s| s.to_string());
        let identifier_str = identifier.map(|s| s.to_string());
-        
+
        let finder = ElementFinder::new(
            &app_element,
            move |element| {
                // Check role
-                let elem_role = element.role()
-                    .ok()
-                    .map(|s| s.to_string());
-                
+                let elem_role = element.role().ok().map(|s| s.to_string());
+
                if let Some(r) = elem_role {
                    if !r.contains(&role_str) {
                        return false;
@@ -363,13 +366,11 @@ impl MacAxController {
                } else {
                    return false;
                }
-                
+
                // Check title if specified
                if let Some(ref title_filter) = title_str {
-                    let elem_title = element.title()
-                        .ok()
-                        .map(|s| s.to_string());
-                    
+                    let elem_title = element.title().ok().map(|s| s.to_string());
+
                    if let Some(t) = elem_title {
                        if !t.contains(title_filter) {
                            return false;
@@ -378,13 +379,11 @@ impl MacAxController {
                        return false;
                    }
                }
-                
+
                // Check identifier if specified
                if let Some(ref id_filter) = identifier_str {
-                    let elem_id = element.identifier()
-                        .ok()
-                        .map(|s| s.to_string());
-                    
+                    let elem_id = element.identifier().ok().map(|s| s.to_string());
+
                    if let Some(id) = elem_id {
                        if !id.contains(id_filter) {
                            return false;
@@ -393,15 +392,15 @@ impl MacAxController {
                        return false;
                    }
                }
-                
+
                true
            },
            Some(std::time::Duration::from_secs(2)),
        );
-        
+
        finder.find().context("Element not found")
    }
-    
+
    /// Click on a UI element
    #[cfg(target_os = "macos")]
    pub fn click_element(
@@ -412,16 +411,16 @@ impl MacAxController {
        identifier: Option<&str>,
    ) -> Result<()> {
        let element = self.find_element(app_name, role, title, identifier)?;
-        
+
        // Perform the press action
        let action_name = CFString::new("AXPress");
        element
            .perform_action(&action_name)
            .map_err(|e| anyhow::anyhow!("Failed to perform press action: {:?}", e))?;
-        
+
        Ok(())
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn click_element(
        &self,
@@ -432,7 +431,7 @@ impl MacAxController {
    ) -> Result<()> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    /// Set the value of a UI element
    #[cfg(target_os = "macos")]
    pub fn set_value(
@@ -444,16 +443,17 @@ impl MacAxController {
        identifier: Option<&str>,
    ) -> Result<()> {
        let element = self.find_element(app_name, role, title, identifier)?;
-        
+
        // Set the value - convert CFString to CFType
        let cf_value = CFString::new(value);
-        
-        element.set_value(cf_value.as_CFType())
+
+        element
+            .set_value(cf_value.as_CFType())
            .map_err(|e| anyhow::anyhow!("Failed to set value: {:?}", e))?;
-        
+
        Ok(())
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn set_value(
        &self,
@@ -465,7 +465,7 @@ impl MacAxController {
    ) -> Result<()> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    /// Get the value of a UI element
    #[cfg(target_os = "macos")]
    pub fn get_value(
@@ -476,11 +476,12 @@ impl MacAxController {
        identifier: Option<&str>,
    ) -> Result<String> {
        let element = self.find_element(app_name, role, title, identifier)?;
-        
+
        // Get the value
-        let value_type = element.value()
+        let value_type = element
+            .value()
            .map_err(|e| anyhow::anyhow!("Failed to get value: {:?}", e))?;
-        
+
        // Try to downcast to CFString
        if let Some(cf_string) = value_type.downcast::<CFString>() {
            Ok(cf_string.to_string())
@@ -489,7 +490,7 @@ impl MacAxController {
            Ok(format!("<non-string value>"))
        }
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn get_value(
        &self,
@@ -500,52 +501,52 @@ impl MacAxController {
    ) -> Result<String> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    /// Type text into the currently focused element (uses system text input)
    #[cfg(target_os = "macos")]
    pub fn type_text(&self, app_name: &str, text: &str) -> Result<()> {
        use cocoa::base::{id, nil};
        use cocoa::foundation::NSString;
        use objc::{class, msg_send, sel, sel_impl};
-        
+
        // First, make sure the app is active
        self.activate_app(app_name)?;
-        
+
        // Wait for app to fully activate
        std::thread::sleep(std::time::Duration::from_millis(500));
-        
+
        // Send a Tab key to try to focus on a text field
        // This helps ensure something is focused before we paste
        let _ = self.press_key(app_name, "tab", vec![]);
        std::thread::sleep(std::time::Duration::from_millis(800));
-        
+
        // Save old clipboard, set new content, paste, then restore
        let old_content: id;
        unsafe {
            // Get the general pasteboard
            let pasteboard: id = msg_send![class!(NSPasteboard), generalPasteboard];
-            
+
            // Save current clipboard content
            let ns_string_type = NSString::alloc(nil).init_str("public.utf8-plain-text");
            old_content = msg_send![pasteboard, stringForType: ns_string_type];
-            
+
            // Clear and set new content
            let _: () = msg_send![pasteboard, clearContents];
-            
+
            let ns_string = NSString::alloc(nil).init_str(text);
            let ns_type = NSString::alloc(nil).init_str("public.utf8-plain-text");
            let _: bool = msg_send![pasteboard, setString:ns_string forType:ns_type];
        }
-        
+
        // Wait a moment for clipboard to update
        std::thread::sleep(std::time::Duration::from_millis(200));
-        
+
        // Paste using Cmd+V (outside unsafe block)
        self.press_key(app_name, "v", vec!["command"])?;
-        
+
        // Wait for paste to complete
        std::thread::sleep(std::time::Duration::from_millis(300));
-        
+
        // Restore old clipboard content if it existed
        unsafe {
            if old_content != nil {
@@ -555,15 +556,15 @@ impl MacAxController {
                let _: bool = msg_send![pasteboard, setString:old_content forType:ns_type];
            }
        }
-        
+
        Ok(())
    }
-    
+
    #[cfg(not(target_os = "macos"))]
    pub fn type_text(&self, _app_name: &str, _text: &str) -> Result<()> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    /// Focus on a text field or text area element
    #[cfg(target_os = "macos")]
    pub fn focus_element(
@@ -574,40 +575,34 @@ impl MacAxController {
        identifier: Option<&str>,
    ) -> Result<()> {
        let element = self.find_element(app_name, role, title, identifier)?;
-        
+
        // Set focused attribute to true
        use core_foundation::boolean::CFBoolean;
        let cf_true = CFBoolean::true_value();
-        
-        element.set_attribute(&accessibility::AXAttribute::focused(), cf_true)
+
+        element
+            .set_attribute(&accessibility::AXAttribute::focused(), cf_true)
            .map_err(|e| anyhow::anyhow!("Failed to focus element: {:?}", e))?;
-        
+
        Ok(())
    }
-    
+
    /// Press a keyboard shortcut
    #[cfg(target_os = "macos")]
-    pub fn press_key(
-        &self,
-        app_name: &str,
-        key: &str,
-        modifiers: Vec<&str>,
-    ) -> Result<()> {
-        use core_graphics::event::{
-            CGEvent, CGEventFlags, CGEventTapLocation,
-        };
+    pub fn press_key(&self, app_name: &str, key: &str, modifiers: Vec<&str>) -> Result<()> {
+        use core_graphics::event::{CGEvent, CGEventFlags, CGEventTapLocation};
        use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
-        
+
        // First, make sure the app is active
        self.activate_app(app_name)?;
-        
+
        // Wait a bit for activation
        std::thread::sleep(std::time::Duration::from_millis(100));
-        
+
        // Map key string to key code
-        let key_code = Self::key_to_keycode(key)
-            .ok_or_else(|| anyhow::anyhow!("Unknown key: {}", key))?;
-        
+        let key_code =
+            Self::key_to_keycode(key).ok_or_else(|| anyhow::anyhow!("Unknown key: {}", key))?;
+
        // Map modifiers to flags
        let mut flags = CGEventFlags::CGEventFlagNull;
        for modifier in modifiers {
@@ -619,39 +614,37 @@ impl MacAxController {
                _ => {}
            }
        }
-        
+
        // Create event source
        let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState)
-            .ok().context("Failed to create event source")?;
-        
+            .ok()
+            .context("Failed to create event source")?;
+
        // Create key down event
        let key_down = CGEvent::new_keyboard_event(source.clone(), key_code, true)
-            .ok().context("Failed to create key down event")?;
+            .ok()
+            .context("Failed to create key down event")?;
        key_down.set_flags(flags);
-        
+
        // Create key up event
        let key_up = CGEvent::new_keyboard_event(source, key_code, false)
-            .ok().context("Failed to create key up event")?;
+            .ok()
+            .context("Failed to create key up event")?;
        key_up.set_flags(flags);
-        
+
        // Post events
        key_down.post(CGEventTapLocation::HID);
        std::thread::sleep(std::time::Duration::from_millis(50));
        key_up.post(CGEventTapLocation::HID);
-        
+
        Ok(())
    }
-    
+
    #[cfg(not(target_os = "macos"))]
-    pub fn press_key(
-        &self,
-        _app_name: &str,
-        _key: &str,
-        _modifiers: Vec<&str>,
-    ) -> Result<()> {
+    pub fn press_key(&self, _app_name: &str, _key: &str, _modifiers: Vec<&str>) -> Result<()> {
        anyhow::bail!("Not supported on this platform")
    }
-    
+
    #[cfg(target_os = "macos")]
    fn key_to_keycode(key: &str) -> Option<u16> {
        // Map common keys to keycodes
@@ -743,62 +736,55 @@ struct ElementCollector<'a> {
 impl<'a> TreeVisitor for ElementCollector<'a> {
    fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow {
        self.depth.set(self.depth.get() + 1);
-        
+
        if self.depth.get() > 20 {
            return TreeWalkerFlow::SkipSubtree;
        }
-        
+
        // Get element properties
-        let role = element.role()
+        let role = element
+            .role()
            .ok()
            .map(|s| s.to_string())
            .unwrap_or_else(|| "Unknown".to_string());
-        
-        let title = element.title()
-            .ok()
-            .map(|s| s.to_string());
-        
-        let identifier = element.identifier()
-            .ok()
-            .map(|s| s.to_string());
-        
+
+        let title = element.title().ok().map(|s| s.to_string());
+
+        let identifier = element.identifier().ok().map(|s| s.to_string());
+
        // Check if this element matches the filters
        let role_matches = self.role_filter.as_ref().map_or(true, |r| role.contains(r));
        let title_matches = self.title_filter.as_ref().map_or(true, |t| {
-            title.as_ref().map_or(false, |title_str| title_str.contains(t))
+            title
+                .as_ref()
+                .map_or(false, |title_str| title_str.contains(t))
        });
        let identifier_matches = self.identifier_filter.as_ref().map_or(true, |id| {
-            identifier.as_ref().map_or(false, |id_str| id_str.contains(id))
+            identifier
+                .as_ref()
+                .map_or(false, |id_str| id_str.contains(id))
        });
-        
+
        if role_matches && title_matches && identifier_matches {
            // Get additional properties
-            let value = element.value()
+            let value = element
+                .value()
                .ok()
-                .and_then(|v| {
-                    v.downcast::<CFString>().map(|s| s.to_string())
-                });
-            
-            let label = element.description()
-                .ok()
-                .map(|s| s.to_string());
-            
-            let enabled = element.enabled()
-                .ok()
-                .map(|b| b.into())
-                .unwrap_or(false);
-            
-            let focused = element.focused()
-                .ok()
-                .map(|b| b.into())
-                .unwrap_or(false);
-            
+                .and_then(|v| v.downcast::<CFString>().map(|s| s.to_string()));
+
+            let label = element.description().ok().map(|s| s.to_string());
+
+            let enabled = element.enabled().ok().map(|b| b.into()).unwrap_or(false);
+
+            let focused = element.focused().ok().map(|b| b.into()).unwrap_or(false);
+
            // Count children
-            let children_count = element.children()
+            let children_count = element
+                .children()
                .ok()
                .map(|arr| arr.len() as usize)
                .unwrap_or(0);
-            
+
            self.results.borrow_mut().push(AXElement {
                role,
                title,
@@ -812,10 +798,10 @@ impl<'a> TreeVisitor for ElementCollector<'a> {
                children_count,
            });
        }
-        
+
        TreeWalkerFlow::Continue
    }
-    
+
    fn exit_element(&self, _element: &AXUIElement) {
        self.depth.set(self.depth.get() - 1);
    }
--- a/crates/g3-computer-control/src/macax/mod.rs
+++ b/crates/g3-computer-control/src/macax/mod.rs
@@ -34,7 +34,7 @@ impl AXElement {
    /// Convert to a human-readable string representation
    pub fn to_string(&self) -> String {
        let mut parts = vec![format!("Role: {}", self.role)];
-        
+
        if let Some(ref title) = self.title {
            parts.push(format!("Title: {}", title));
        }
@@ -47,19 +47,19 @@ impl AXElement {
        if let Some(ref id) = self.identifier {
            parts.push(format!("ID: {}", id));
        }
-        
+
        parts.push(format!("Enabled: {}", self.enabled));
        parts.push(format!("Focused: {}", self.focused));
-        
+
        if let Some((x, y)) = self.position {
            parts.push(format!("Position: ({:.0}, {:.0})", x, y));
        }
        if let Some((w, h)) = self.size {
            parts.push(format!("Size: ({:.0}, {:.0})", w, h));
        }
-        
+
        parts.push(format!("Children: {}", self.children_count));
-        
+
        parts.join(", ")
    }
 }
--- a/crates/g3-computer-control/src/ocr/mod.rs
+++ b/crates/g3-computer-control/src/ocr/mod.rs
@@ -7,7 +7,7 @@ use async_trait::async_trait;
 pub trait OCREngine: Send + Sync {
    /// Extract text with locations from an image file
    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>>;
-    
+
    /// Get the name of the OCR engine
    fn name(&self) -> &str;
 }
--- a/crates/g3-computer-control/src/ocr/tesseract.rs
+++ b/crates/g3-computer-control/src/ocr/tesseract.rs
@@ -12,16 +12,18 @@ impl TesseractOCR {
        let tesseract_check = std::process::Command::new("which")
            .arg("tesseract")
            .output();
-        
+
        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
+            anyhow::bail!(
+                "Tesseract OCR is not installed on your system.\n\n\
                To install tesseract:\n  macOS:   brew install tesseract\n  \
                Linux:   sudo apt-get install tesseract-ocr (Ubuntu/Debian)\n           \
                sudo yum install tesseract (RHEL/CentOS)\n  \
                Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki\n\n\
-                After installation, restart your terminal and try again.");
+                After installation, restart your terminal and try again."
+            );
        }
-        
+
        Ok(Self)
    }
 }
@@ -36,18 +38,23 @@ impl OCREngine for TesseractOCR {
            .arg("tsv")
            .output()
            .map_err(|e| anyhow::anyhow!("Failed to run tesseract: {}", e))?;
-        
+
        if !output.status.success() {
-            anyhow::bail!("Tesseract failed: {}", String::from_utf8_lossy(&output.stderr));
+            anyhow::bail!(
+                "Tesseract failed: {}",
+                String::from_utf8_lossy(&output.stderr)
+            );
        }
-        
+
        let tsv_text = String::from_utf8_lossy(&output.stdout);
        let mut locations = Vec::new();
-        
+
        // Parse TSV output (skip header line)
        for (i, line) in tsv_text.lines().enumerate() {
-            if i == 0 { continue; } // Skip header
-            
+            if i == 0 {
+                continue;
+            } // Skip header
+
            let parts: Vec<&str> = line.split('\t').collect();
            if parts.len() >= 12 {
                // TSV format: level, page_num, block_num, par_num, line_num, word_num,
@@ -74,10 +81,10 @@ impl OCREngine for TesseractOCR {
                }
            }
        }
-        
+
        Ok(locations)
    }
-    
+
    fn name(&self) -> &str {
        "Tesseract OCR"
    }
--- a/crates/g3-computer-control/src/ocr/vision.rs
+++ b/crates/g3-computer-control/src/ocr/vision.rs
@@ -1,6 +1,6 @@
 use super::OCREngine;
 use crate::types::TextLocation;
-use anyhow::{Result, Context};
+use anyhow::{Context, Result};
 use async_trait::async_trait;
 use std::ffi::{CStr, CString};
 use std::os::raw::{c_char, c_float, c_uint};
@@ -24,7 +24,7 @@ extern "C" {
        out_boxes: *mut *mut std::ffi::c_void,
        out_count: *mut c_uint,
    ) -> bool;
-    
+
    fn vision_free_boxes(boxes: *mut std::ffi::c_void, count: c_uint);
 }

@@ -41,12 +41,11 @@ impl AppleVisionOCR {
 impl OCREngine for AppleVisionOCR {
    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
        // Convert path to C string
-        let c_path = CString::new(path)
-            .context("Failed to convert path to C string")?;
-        
+        let c_path = CString::new(path).context("Failed to convert path to C string")?;
+
        let mut boxes_ptr: *mut std::ffi::c_void = std::ptr::null_mut();
        let mut count: c_uint = 0;
-        
+
        // Call Swift Vision API
        let success = unsafe {
            vision_recognize_text(
@@ -56,28 +55,26 @@ impl OCREngine for AppleVisionOCR {
                &mut count,
            )
        };
-        
+
        if !success || boxes_ptr.is_null() {
            anyhow::bail!("Apple Vision OCR failed");
        }
-        
+
        // Convert C array to Rust Vec
        let mut locations = Vec::new();
-        
+
        unsafe {
            let typed_boxes = boxes_ptr as *const VisionTextBox;
            let boxes_slice = std::slice::from_raw_parts(typed_boxes, count as usize);
-            
+
            for box_data in boxes_slice {
                // Convert C string to Rust String
                let text = if !box_data.text.is_null() {
-                    CStr::from_ptr(box_data.text)
-                        .to_string_lossy()
-                        .into_owned()
+                    CStr::from_ptr(box_data.text).to_string_lossy().into_owned()
                } else {
                    String::new()
                };
-                
+
                if !text.is_empty() {
                    locations.push(TextLocation {
                        text,
@@ -89,14 +86,14 @@ impl OCREngine for AppleVisionOCR {
                    });
                }
            }
-            
+
            // Free the C array
            vision_free_boxes(boxes_ptr, count);
        }
-        
+
        Ok(locations)
    }
-    
+
    fn name(&self) -> &str {
        "Apple Vision Framework"
    }
--- a/crates/g3-computer-control/src/platform/linux.rs
+++ b/crates/g3-computer-control/src/platform/linux.rs
@@ -1,4 +1,4 @@
-use crate::{ComputerController, types::*};
+use crate::{types::*, ComputerController};
 use anyhow::Result;
 use async_trait::async_trait;
 use tesseract::Tesseract;
@@ -21,48 +21,53 @@ impl ComputerController for LinuxController {
    async fn move_mouse(&self, _x: i32, _y: i32) -> Result<()> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn click(&self, _button: MouseButton) -> Result<()> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn double_click(&self, _button: MouseButton) -> Result<()> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn type_text(&self, _text: &str) -> Result<()> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn press_key(&self, _key: &str) -> Result<()> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn list_windows(&self) -> Result<Vec<Window>> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn focus_window(&self, _window_id: &str) -> Result<()> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn get_window_bounds(&self, _window_id: &str) -> Result<Rect> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn find_element(&self, _selector: &ElementSelector) -> Result<Option<UIElement>> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn get_element_text(&self, _element_id: &str) -> Result<String> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn get_element_bounds(&self, _element_id: &str) -> Result<Rect> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
-    async fn take_screenshot(&self, _path: &str, _region: Option<Rect>, _window_id: Option<&str>) -> Result<()> {
+
+    async fn take_screenshot(
+        &self,
+        _path: &str,
+        _region: Option<Rect>,
+        _window_id: Option<&str>,
+    ) -> Result<()> {
        // Enforce that window_id must be provided
        if _window_id.is_none() {
            anyhow::bail!("window_id is required. You must specify which window to capture (e.g., 'Firefox', 'Terminal', 'gedit'). Use list_windows to see available windows.");
@@ -70,94 +75,111 @@ impl ComputerController for LinuxController {

        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn extract_text_from_screen(&self, _region: Rect, _window_id: &str) -> Result<String> {
        anyhow::bail!("Linux implementation not yet available")
    }
-    
+
    async fn extract_text_from_image(&self, _path: &str) -> Result<OCRResult> {
        // Check if tesseract is available on the system
        let tesseract_check = std::process::Command::new("which")
            .arg("tesseract")
            .output();
-        
+
        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
+            anyhow::bail!(
+                "Tesseract OCR is not installed on your system.\n\n\
                To install tesseract:\n  \
                Ubuntu/Debian: sudo apt-get install tesseract-ocr\n  \
                RHEL/CentOS:   sudo yum install tesseract\n  \
                Arch Linux:    sudo pacman -S tesseract\n\n\
-                After installation, restart your terminal and try again.");
+                After installation, restart your terminal and try again."
+            );
        }
-        
+
        // Initialize Tesseract
-        let tess = Tesseract::new(None, Some("eng"))
-            .map_err(|e| {
-                anyhow::anyhow!("Failed to initialize Tesseract: {}\n\n\
+        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
+            anyhow::anyhow!(
+                "Failed to initialize Tesseract: {}\n\n\
                    This usually means:\n1. Tesseract is not properly installed\n\
                    2. Language data files are missing\n\nTo fix:\n  \
                    Ubuntu/Debian: sudo apt-get install tesseract-ocr-eng\n  \
                    RHEL/CentOS:   sudo yum install tesseract-langpack-eng\n  \
-                    Arch Linux:    sudo pacman -S tesseract-data-eng", e)
-            })?;
-        
-        let text = tess.set_image(_path)
+                    Arch Linux:    sudo pacman -S tesseract-data-eng",
+                e
+            )
+        })?;
+
+        let text = tess
+            .set_image(_path)
            .map_err(|e| anyhow::anyhow!("Failed to load image '{}': {}", _path, e))?
            .get_text()
            .map_err(|e| anyhow::anyhow!("Failed to extract text from image: {}", e))?;
-        
+
        // Get confidence (simplified - would need more complex API calls for per-word confidence)
        let confidence = 0.85; // Placeholder
-        
+
        Ok(OCRResult {
            text,
            confidence,
-            bounds: Rect { x: 0, y: 0, width: 0, height: 0 }, // Would need image dimensions
+            bounds: Rect {
+                x: 0,
+                y: 0,
+                width: 0,
+                height: 0,
+            }, // Would need image dimensions
        })
    }
-    
+
    async fn find_text_on_screen(&self, _text: &str) -> Result<Option<Point>> {
        // Check if tesseract is available on the system
        let tesseract_check = std::process::Command::new("which")
            .arg("tesseract")
            .output();
-        
+
        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
+            anyhow::bail!(
+                "Tesseract OCR is not installed on your system.\n\n\
                To install tesseract:\n  \
                Ubuntu/Debian: sudo apt-get install tesseract-ocr\n  \
                RHEL/CentOS:   sudo yum install tesseract\n  \
                Arch Linux:    sudo pacman -S tesseract\n\n\
-                After installation, restart your terminal and try again.");
+                After installation, restart your terminal and try again."
+            );
        }
-        
+
        // Take full screen screenshot
        let temp_path = format!("/tmp/g3_ocr_search_{}.png", uuid::Uuid::new_v4());
        self.take_screenshot(&temp_path, None, None).await?;
-        
+
        // Use Tesseract to find text with bounding boxes
-        let tess = Tesseract::new(None, Some("eng"))
-            .map_err(|e| {
-                anyhow::anyhow!("Failed to initialize Tesseract: {}\n\n\
+        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
+            anyhow::anyhow!(
+                "Failed to initialize Tesseract: {}\n\n\
                    This usually means:\n1. Tesseract is not properly installed\n\
                    2. Language data files are missing\n\nTo fix:\n  \
                    Ubuntu/Debian: sudo apt-get install tesseract-ocr-eng\n  \
                    RHEL/CentOS:   sudo yum install tesseract-langpack-eng\n  \
-                    Arch Linux:    sudo pacman -S tesseract-data-eng", e)
-            })?;
-        
-        let full_text = tess.set_image(temp_path.as_str())
+                    Arch Linux:    sudo pacman -S tesseract-data-eng",
+                e
+            )
+        })?;
+
+        let full_text = tess
+            .set_image(temp_path.as_str())
            .map_err(|e| anyhow::anyhow!("Failed to load screenshot: {}", e))?
            .get_text()
            .map_err(|e| anyhow::anyhow!("Failed to extract text from screen: {}", e))?;
-        
+
        // Clean up temp file
        let _ = std::fs::remove_file(&temp_path);
-        
+
        // Simple text search - full implementation would use get_component_images
        // to get bounding boxes for each word
        if full_text.contains(_text) {
-            tracing::warn!("Text found but precise coordinates not available in simplified implementation");
+            tracing::warn!(
+                "Text found but precise coordinates not available in simplified implementation"
+            );
            Ok(Some(Point { x: 0, y: 0 }))
        } else {
            Ok(None)
--- a/crates/g3-computer-control/src/platform/macos.rs
+++ b/crates/g3-computer-control/src/platform/macos.rs
@@ -1,13 +1,18 @@
-use crate::{ComputerController, types::{Rect, TextLocation}};
-use crate::ocr::{OCREngine, DefaultOCR};
-use anyhow::{Result, Context};
+use crate::ocr::{DefaultOCR, OCREngine};
+use crate::{
+    types::{Rect, TextLocation},
+    ComputerController,
+};
+use anyhow::{Context, Result};
 use async_trait::async_trait;
-use std::path::Path;
-use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
+use core_foundation::array::CFArray;
+use core_foundation::base::{TCFType, ToVoid};
 use core_foundation::dictionary::CFDictionary;
 use core_foundation::string::CFString;
-use core_foundation::base::{TCFType, ToVoid};
-use core_foundation::array::CFArray;
+use core_graphics::window::{
+    kCGNullWindowID, kCGWindowListOptionOnScreenOnly, CGWindowListCopyWindowInfo,
+};
+use std::path::Path;

 pub struct MacOSController {
    ocr_engine: Box<dyn OCREngine>,
@@ -20,13 +25,21 @@ impl MacOSController {
        let ocr = Box::new(DefaultOCR::new()?);
        let ocr_name = ocr.name().to_string();
        tracing::info!("Initialized macOS controller with OCR engine: {}", ocr_name);
-        Ok(Self { ocr_engine: ocr, ocr_name })
+        Ok(Self {
+            ocr_engine: ocr,
+            ocr_name,
+        })
    }
 }

 #[async_trait]
 impl ComputerController for MacOSController {
-    async fn take_screenshot(&self, path: &str, region: Option<Rect>, window_id: Option<&str>) -> Result<()> {
+    async fn take_screenshot(
+        &self,
+        path: &str,
+        region: Option<Rect>,
+        window_id: Option<&str>,
+    ) -> Result<()> {
        // Enforce that window_id must be provided
        if window_id.is_none() {
            return Err(anyhow::anyhow!("window_id is required. You must specify which window to capture (e.g., 'Safari', 'Terminal', 'Google Chrome'). Use list_windows to see available windows."));
@@ -36,40 +49,38 @@ impl ComputerController for MacOSController {
        let temp_dir = std::env::var("TMPDIR")
            .or_else(|_| std::env::var("HOME").map(|h| format!("{}/tmp", h)))
            .unwrap_or_else(|_| "/tmp".to_string());
-        
+
        // Ensure temp directory exists
        std::fs::create_dir_all(&temp_dir)?;
-        
+
        // If path is relative or doesn't specify a directory, use temp_dir
        let final_path = if path.starts_with('/') {
            path.to_string()
        } else {
            format!("{}/{}", temp_dir.trim_end_matches('/'), path)
        };
-        
+
        let path_obj = Path::new(&final_path);
        if let Some(parent) = path_obj.parent() {
            std::fs::create_dir_all(parent)?;
        }
-        
+
        let app_name = window_id.unwrap(); // Safe because we checked is_none() above
-        
+
        // Get the window ID for the specified application
        let cg_window_id = unsafe {
-            let window_list = CGWindowListCopyWindowInfo(
-                kCGWindowListOptionOnScreenOnly,
-                kCGNullWindowID
-            );
-            
+            let window_list =
+                CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID);
+
            let array = CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
            let count = array.len();
-            
+
            let mut found_window_id: Option<(u32, String)> = None; // (id, owner)
            let app_name_lower = app_name.to_lowercase();
-            
+
            for i in 0..count {
                let dict = array.get(i).unwrap();
-                
+
                // Get owner name
                let owner_key = CFString::from_static_string("kCGWindowOwnerName");
                let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
@@ -78,57 +89,68 @@ impl ComputerController for MacOSController {
                } else {
                    continue;
                };
-                
-                tracing::debug!("Checking window: owner='{}', looking for '{}'", owner, app_name);
+
+                tracing::debug!(
+                    "Checking window: owner='{}', looking for '{}'",
+                    owner,
+                    app_name
+                );
                let owner_lower = owner.to_lowercase();
-                
+
                // Normalize by removing spaces for exact matching
                let app_name_normalized = app_name_lower.replace(" ", "");
                let owner_normalized = owner_lower.replace(" ", "");
-                
+
                // ONLY accept exact matches (case-insensitive, with or without spaces)
                // This prevents "Goose" from matching "GooseStudio"
-                let is_match = owner_lower == app_name_lower || owner_normalized == app_name_normalized;
-                
+                let is_match =
+                    owner_lower == app_name_lower || owner_normalized == app_name_normalized;
+
                if is_match {
                    // Get window ID
                    let window_id_key = CFString::from_static_string("kCGWindowNumber");
                    if let Some(value) = dict.find(window_id_key.to_void()) {
-                        let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
+                        let num: core_foundation::number::CFNumber =
+                            TCFType::wrap_under_get_rule(*value as *const _);
                        if let Some(id) = num.to_i64() {
                            // Get window layer to filter out menu bar windows
                            let layer_key = CFString::from_static_string("kCGWindowLayer");
                            let layer: i32 = if let Some(value) = dict.find(layer_key.to_void()) {
-                                let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
+                                let num: core_foundation::number::CFNumber =
+                                    TCFType::wrap_under_get_rule(*value as *const _);
                                num.to_i32().unwrap_or(0)
                            } else {
                                0
                            };
-                            
+
                            // Get window bounds to verify it's a real window
                            let bounds_key = CFString::from_static_string("kCGWindowBounds");
-                            let has_real_bounds = if let Some(value) = dict.find(bounds_key.to_void()) {
-                                let bounds_dict: CFDictionary = TCFType::wrap_under_get_rule(*value as *const _);
-                                let width_key = CFString::from_static_string("Width");
-                                let height_key = CFString::from_static_string("Height");
-                                
-                                if let (Some(w_val), Some(h_val)) = (
-                                    bounds_dict.find(width_key.to_void()),
-                                    bounds_dict.find(height_key.to_void()),
-                                ) {
-                                    let w_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*w_val as *const _);
-                                    let h_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*h_val as *const _);
-                                    let width = w_num.to_f64().unwrap_or(0.0);
-                                    let height = h_num.to_f64().unwrap_or(0.0);
-                                    // Real windows should be at least 100x100 pixels
-                                    width >= 100.0 && height >= 100.0
+                            let has_real_bounds =
+                                if let Some(value) = dict.find(bounds_key.to_void()) {
+                                    let bounds_dict: CFDictionary =
+                                        TCFType::wrap_under_get_rule(*value as *const _);
+                                    let width_key = CFString::from_static_string("Width");
+                                    let height_key = CFString::from_static_string("Height");
+
+                                    if let (Some(w_val), Some(h_val)) = (
+                                        bounds_dict.find(width_key.to_void()),
+                                        bounds_dict.find(height_key.to_void()),
+                                    ) {
+                                        let w_num: core_foundation::number::CFNumber =
+                                            TCFType::wrap_under_get_rule(*w_val as *const _);
+                                        let h_num: core_foundation::number::CFNumber =
+                                            TCFType::wrap_under_get_rule(*h_val as *const _);
+                                        let width = w_num.to_f64().unwrap_or(0.0);
+                                        let height = h_num.to_f64().unwrap_or(0.0);
+                                        // Real windows should be at least 100x100 pixels
+                                        width >= 100.0 && height >= 100.0
+                                    } else {
+                                        false
+                                    }
                                } else {
                                    false
-                                }
-                            } else {
-                                false
-                            };
-                            
+                                };
+
                            // Only accept windows that are:
                            // 1. At layer 0 (normal windows, not menu bar)
                            // 2. Have real bounds (width and height >= 100)
@@ -137,189 +159,222 @@ impl ComputerController for MacOSController {
                                found_window_id = Some((id as u32, owner.clone()));
                                break;
                            } else {
-                                tracing::debug!("Skipping window ID {} for '{}': layer={}, has_real_bounds={}", id, owner, layer, has_real_bounds);
+                                tracing::debug!(
+                                    "Skipping window ID {} for '{}': layer={}, has_real_bounds={}",
+                                    id,
+                                    owner,
+                                    layer,
+                                    has_real_bounds
+                                );
                            }
                        }
                    }
                }
            }
-            
+
            found_window_id
        };
-        
+
        let (cg_window_id, matched_owner) = cg_window_id.ok_or_else(|| {
            anyhow::anyhow!("Could not find window for application '{}'. Use list_windows to see available windows.", app_name)
        })?;
-            tracing::info!("Taking screenshot of window ID {} for app '{}'", cg_window_id, matched_owner);
-        
+        tracing::info!(
+            "Taking screenshot of window ID {} for app '{}'",
+            cg_window_id,
+            matched_owner
+        );
+
        // Use screencapture with the window ID for now
        // TODO: Implement direct CGWindowListCreateImage approach with proper image saving
        let mut cmd = std::process::Command::new("screencapture");
        cmd.arg("-x"); // No sound
        cmd.arg("-l");
        cmd.arg(cg_window_id.to_string());
-        
+
        if let Some(region) = region {
            cmd.arg("-R");
-            cmd.arg(format!("{},{},{},{}", region.x, region.y, region.width, region.height));
+            cmd.arg(format!(
+                "{},{},{},{}",
+                region.x, region.y, region.width, region.height
+            ));
        }
-        
+
        cmd.arg(&final_path);
-        
+
        let screenshot_result = cmd.output()?;
-        
+
        if !screenshot_result.status.success() {
            let stderr = String::from_utf8_lossy(&screenshot_result.stderr);
-            return Err(anyhow::anyhow!("screencapture failed for window {}: {}", cg_window_id, stderr));
+            return Err(anyhow::anyhow!(
+                "screencapture failed for window {}: {}",
+                cg_window_id,
+                stderr
+            ));
        }
-        
+
        Ok(())
    }
-    
+
    async fn extract_text_from_screen(&self, region: Rect, window_id: &str) -> Result<String> {
        // Take screenshot of region first
        let temp_path = format!("/tmp/g3_ocr_{}.png", uuid::Uuid::new_v4());
-        self.take_screenshot(&temp_path, Some(region), Some(window_id)).await?;
-        
+        self.take_screenshot(&temp_path, Some(region), Some(window_id))
+            .await?;
+
        // Extract text from the screenshot
        let result = self.extract_text_from_image(&temp_path).await?;
-        
+
        // Clean up temp file
        let _ = std::fs::remove_file(&temp_path);
-        
+
        Ok(result)
    }
-    
+
    async fn extract_text_from_image(&self, path: &str) -> Result<String> {
        // Extract all text and concatenate
        let locations = self.ocr_engine.extract_text_with_locations(path).await?;
-        Ok(locations.iter().map(|loc| loc.text.as_str()).collect::<Vec<_>>().join(" "))
+        Ok(locations
+            .iter()
+            .map(|loc| loc.text.as_str())
+            .collect::<Vec<_>>()
+            .join(" "))
    }
-    
+
    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
        // Use the OCR engine
        self.ocr_engine.extract_text_with_locations(path).await
    }
-    
-    async fn find_text_in_app(&self, app_name: &str, search_text: &str) -> Result<Option<TextLocation>> {
+
+    async fn find_text_in_app(
+        &self,
+        app_name: &str,
+        search_text: &str,
+    ) -> Result<Option<TextLocation>> {
        // Take screenshot of specific app window
        let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
-        let temp_path = format!("{}/tmp/g3_find_text_{}_{}.png", home, app_name, uuid::Uuid::new_v4());
-        self.take_screenshot(&temp_path, None, Some(app_name)).await?;
-        
+        let temp_path = format!(
+            "{}/tmp/g3_find_text_{}_{}.png",
+            home,
+            app_name,
+            uuid::Uuid::new_v4()
+        );
+        self.take_screenshot(&temp_path, None, Some(app_name))
+            .await?;
+
        // Get screenshot dimensions before we delete it
        let screenshot_dims = get_image_dimensions(&temp_path)?;
-        
+
        // Extract all text with locations
        let locations = self.extract_text_with_locations(&temp_path).await?;
-        
+
        // Get window bounds to calculate coordinate transformation
        let window_bounds = self.get_window_bounds(app_name)?;
-        
+
        // Clean up temp file
        let _ = std::fs::remove_file(&temp_path);
-        
+
        // Find matching text (case-insensitive)
        let search_lower = search_text.to_lowercase();
        for location in locations {
            if location.text.to_lowercase().contains(&search_lower) {
                // Transform coordinates from screenshot space to screen space
-                let transformed = transform_screenshot_to_screen_coords(
-                    location,
-                    window_bounds,
-                    screenshot_dims,
-                );
+                let transformed =
+                    transform_screenshot_to_screen_coords(location, window_bounds, screenshot_dims);
                return Ok(Some(transformed));
            }
        }
-        
+
        Ok(None)
    }
-    
+
    fn move_mouse(&self, x: i32, y: i32) -> Result<()> {
-        use core_graphics::event::{
-            CGEvent, CGEventTapLocation, CGEventType, CGMouseButton,
-        };
-        use core_graphics::event_source::{
-            CGEventSource, CGEventSourceStateID,
-        };
+        use core_graphics::event::{CGEvent, CGEventTapLocation, CGEventType, CGMouseButton};
+        use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
        use core_graphics::geometry::CGPoint;
-        
+
        let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState)
-            .ok().context("Failed to create event source")?;
-        
+            .ok()
+            .context("Failed to create event source")?;
+
        let event = CGEvent::new_mouse_event(
            source,
            CGEventType::MouseMoved,
            CGPoint::new(x as f64, y as f64),
            CGMouseButton::Left,
-        ).ok().context("Failed to create mouse event")?;
-        
+        )
+        .ok()
+        .context("Failed to create mouse event")?;
+
        event.post(CGEventTapLocation::HID);
-        
+
        Ok(())
    }
-    
+
    fn click_at(&self, x: i32, y: i32, _app_name: Option<&str>) -> Result<()> {
-        use core_graphics::event::{
-            CGEvent, CGEventTapLocation, CGEventType, CGMouseButton,
-        };
-        use core_graphics::event_source::{
-            CGEventSource, CGEventSourceStateID,
-        };
-        use core_graphics::geometry::CGPoint;
        use core_graphics::display::CGDisplay;
-        
+        use core_graphics::event::{CGEvent, CGEventTapLocation, CGEventType, CGMouseButton};
+        use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
+        use core_graphics::geometry::CGPoint;
+
        // IMPORTANT: Coordinates passed here are in NSScreen/CGWindowListCopyWindowInfo space
        // (Y=0 at BOTTOM, increases UPWARD)
        // But CGEvent uses a different coordinate system (Y=0 at TOP, increases DOWNWARD)
        // We need to convert: CGEvent.y = screenHeight - NSScreen.y
-        
+
        let screen_height = CGDisplay::main().pixels_high() as i32;
        let cgevent_x = x;
        let cgevent_y = screen_height - y;
-        
-        tracing::debug!("click_at: NSScreen coords ({}, {}) -> CGEvent coords ({}, {}) [screen_height={}]",
-            x, y, cgevent_x, cgevent_y, screen_height);
-        
+
+        tracing::debug!(
+            "click_at: NSScreen coords ({}, {}) -> CGEvent coords ({}, {}) [screen_height={}]",
+            x,
+            y,
+            cgevent_x,
+            cgevent_y,
+            screen_height
+        );
+
        let (global_x, global_y) = (cgevent_x, cgevent_y);
-        
+
        let point = CGPoint::new(global_x as f64, global_y as f64);
-        
+
        let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState)
-            .ok().context("Failed to create event source")?;
-        
+            .ok()
+            .context("Failed to create event source")?;
+
        // Move mouse to position first
        let move_event = CGEvent::new_mouse_event(
            source.clone(),
            CGEventType::MouseMoved,
            point,
            CGMouseButton::Left,
-        ).ok().context("Failed to create mouse move event")?;
+        )
+        .ok()
+        .context("Failed to create mouse move event")?;
        move_event.post(CGEventTapLocation::HID);
-        
+
        std::thread::sleep(std::time::Duration::from_millis(100));
-        
+
        // Mouse down
        let mouse_down = CGEvent::new_mouse_event(
            source.clone(),
            CGEventType::LeftMouseDown,
            point,
            CGMouseButton::Left,
-        ).ok().context("Failed to create mouse down event")?;
+        )
+        .ok()
+        .context("Failed to create mouse down event")?;
        mouse_down.post(CGEventTapLocation::HID);
-        
+
        std::thread::sleep(std::time::Duration::from_millis(50));
-        
+
        // Mouse up
-        let mouse_up = CGEvent::new_mouse_event(
-            source,
-            CGEventType::LeftMouseUp,
-            point,
-            CGMouseButton::Left,
-        ).ok().context("Failed to create mouse up event")?;
+        let mouse_up =
+            CGEvent::new_mouse_event(source, CGEventType::LeftMouseUp, point, CGMouseButton::Left)
+                .ok()
+                .context("Failed to create mouse up event")?;
        mouse_up.post(CGEventTapLocation::HID);
-        
+
        Ok(())
    }
 }
@@ -328,19 +383,17 @@ impl MacOSController {
    /// Get window bounds for an application (helper method)
    fn get_window_bounds(&self, app_name: &str) -> Result<(i32, i32, i32, i32)> {
        unsafe {
-            let window_list = CGWindowListCopyWindowInfo(
-                kCGWindowListOptionOnScreenOnly,
-                kCGNullWindowID
-            );
-            
+            let window_list =
+                CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID);
+
            let array = CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
            let count = array.len();
-            
+
            let app_name_lower = app_name.to_lowercase();
-            
+
            for i in 0..count {
                let dict = array.get(i).unwrap();
-                
+
                // Get owner name
                let owner_key = CFString::from_static_string("kCGWindowOwnerName");
                let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
@@ -349,65 +402,81 @@ impl MacOSController {
                } else {
                    continue;
                };
-                
+
                let owner_lower = owner.to_lowercase();
-                
+
                // Normalize by removing spaces for exact matching
                let app_name_normalized = app_name_lower.replace(" ", "");
                let owner_normalized = owner_lower.replace(" ", "");
-                
+
                // ONLY accept exact matches (case-insensitive, with or without spaces)
                // This prevents "Goose" from matching "GooseStudio"
-                let is_match = owner_lower == app_name_lower || owner_normalized == app_name_normalized;
-                
+                let is_match =
+                    owner_lower == app_name_lower || owner_normalized == app_name_normalized;
+
                if is_match {
                    // Get window layer to filter out menu bar windows
                    let layer_key = CFString::from_static_string("kCGWindowLayer");
                    let layer: i32 = if let Some(value) = dict.find(layer_key.to_void()) {
-                        let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
+                        let num: core_foundation::number::CFNumber =
+                            TCFType::wrap_under_get_rule(*value as *const _);
                        num.to_i32().unwrap_or(0)
                    } else {
                        0
                    };
-                    
+
                    // Skip menu bar windows (layer >= 20)
                    if layer >= 20 {
-                        tracing::debug!("Skipping window for '{}' at layer {} (menu bar)", owner, layer);
+                        tracing::debug!(
+                            "Skipping window for '{}' at layer {} (menu bar)",
+                            owner,
+                            layer
+                        );
                        continue;
                    }
-                    
+
                    // Get window bounds to verify it's a real window
                    let bounds_key = CFString::from_static_string("kCGWindowBounds");
                    if let Some(value) = dict.find(bounds_key.to_void()) {
-                        let bounds_dict: CFDictionary = TCFType::wrap_under_get_rule(*value as *const _);
-                        
+                        let bounds_dict: CFDictionary =
+                            TCFType::wrap_under_get_rule(*value as *const _);
+
                        let x_key = CFString::from_static_string("X");
                        let y_key = CFString::from_static_string("Y");
                        let width_key = CFString::from_static_string("Width");
                        let height_key = CFString::from_static_string("Height");
-                        
+
                        if let (Some(x_val), Some(y_val), Some(w_val), Some(h_val)) = (
                            bounds_dict.find(x_key.to_void()),
                            bounds_dict.find(y_key.to_void()),
                            bounds_dict.find(width_key.to_void()),
                            bounds_dict.find(height_key.to_void()),
                        ) {
-                            let x_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*x_val as *const _);
-                            let y_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*y_val as *const _);
-                            let w_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*w_val as *const _);
-                            let h_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*h_val as *const _);
-                            
+                            let x_num: core_foundation::number::CFNumber =
+                                TCFType::wrap_under_get_rule(*x_val as *const _);
+                            let y_num: core_foundation::number::CFNumber =
+                                TCFType::wrap_under_get_rule(*y_val as *const _);
+                            let w_num: core_foundation::number::CFNumber =
+                                TCFType::wrap_under_get_rule(*w_val as *const _);
+                            let h_num: core_foundation::number::CFNumber =
+                                TCFType::wrap_under_get_rule(*h_val as *const _);
+
                            let x: i32 = x_num.to_i64().unwrap_or(0) as i32;
                            let y: i32 = y_num.to_i64().unwrap_or(0) as i32;
                            let w: i32 = w_num.to_i64().unwrap_or(0) as i32;
                            let h: i32 = h_num.to_i64().unwrap_or(0) as i32;
-                            
+
                            // Only accept windows with real bounds (>= 100x100 pixels)
                            if w >= 100 && h >= 100 {
                                tracing::info!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
                                return Ok((x, y, w, h));
                            } else {
-                                tracing::debug!("Skipping window for '{}': too small ({}x{})", owner, w, h);
+                                tracing::debug!(
+                                    "Skipping window for '{}': too small ({}x{})",
+                                    owner,
+                                    w,
+                                    h
+                                );
                                continue;
                            }
                        } else {
@@ -417,8 +486,11 @@ impl MacOSController {
                }
            }
        }
-        
-        Err(anyhow::anyhow!("Could not find window bounds for '{}'", app_name))
+
+        Err(anyhow::anyhow!(
+            "Could not find window bounds for '{}'",
+            app_name
+        ))
    }
 }

@@ -426,72 +498,118 @@ impl MacOSController {
 fn get_image_dimensions(path: &str) -> Result<(i32, i32)> {
    use std::fs::File;
    use std::io::Read;
-    
+
    let mut file = File::open(path)?;
    let mut buffer = vec![0u8; 24];
    file.read_exact(&mut buffer)?;
-    
+
    // PNG signature check
    if &buffer[0..8] != b"\x89PNG\r\n\x1a\n" {
        anyhow::bail!("Not a valid PNG file");
    }
-    
+
    // Read IHDR chunk (width and height are at bytes 16-23)
    let width = u32::from_be_bytes([buffer[16], buffer[17], buffer[18], buffer[19]]) as i32;
    let height = u32::from_be_bytes([buffer[20], buffer[21], buffer[22], buffer[23]]) as i32;
-    
+
    Ok((width, height))
 }

 /// Transform coordinates from screenshot space to screen space
-/// 
+///
 /// The screenshot is taken of a window, and Vision OCR returns coordinates
 /// relative to the screenshot image. We need to transform these to actual
 /// screen coordinates for clicking.
-/// 
+///
 /// On Retina displays, screenshots are taken at 2x resolution, so we need
 /// to account for this scaling factor.
 fn transform_screenshot_to_screen_coords(
    location: TextLocation,
    window_bounds: (i32, i32, i32, i32), // (x, y, width, height) in screen space
-    screenshot_dims: (i32, i32), // (width, height) in pixels
+    screenshot_dims: (i32, i32),         // (width, height) in pixels
 ) -> TextLocation {
    let (win_x, win_y, win_width, win_height) = window_bounds;
    let (screenshot_width, screenshot_height) = screenshot_dims;
-    
+
    // Calculate scale factors
    // On Retina displays, screenshot is typically 2x the window size
    let scale_x = win_width as f64 / screenshot_width as f64;
    let scale_y = win_height as f64 / screenshot_height as f64;
-    
-    tracing::debug!("Transform: screenshot={}x{}, window={}x{} at ({},{}), scale=({:.2},{:.2})",
-        screenshot_width, screenshot_height, win_width, win_height, win_x, win_y, scale_x, scale_y);
-    
+
+    tracing::debug!(
+        "Transform: screenshot={}x{}, window={}x{} at ({},{}), scale=({:.2},{:.2})",
+        screenshot_width,
+        screenshot_height,
+        win_width,
+        win_height,
+        win_x,
+        win_y,
+        scale_x,
+        scale_y
+    );
+
    // Transform coordinates from image space to screen space
    // IMPORTANT: macOS screen coordinates have origin at BOTTOM-LEFT (Y increases upward)
    // Image coordinates have origin at TOP-LEFT (Y increases downward)
    // win_y is the BOTTOM of the window in screen coordinates
    // So we need to: (win_y + win_height) to get window TOP, then subtract screenshot_y
    let window_top_y = win_y + win_height;
-    
-    tracing::debug!("[transform] Input location in image space: x={}, y={}, width={}, height={}",
-        location.x, location.y, location.width, location.height);
-    tracing::debug!("[transform] Scale factors: scale_x={:.4}, scale_y={:.4}", scale_x, scale_y);
-    
+
+    tracing::debug!(
+        "[transform] Input location in image space: x={}, y={}, width={}, height={}",
+        location.x,
+        location.y,
+        location.width,
+        location.height
+    );
+    tracing::debug!(
+        "[transform] Scale factors: scale_x={:.4}, scale_y={:.4}",
+        scale_x,
+        scale_y
+    );
+
    let transformed_x = win_x + (location.x as f64 * scale_x) as i32;
    let transformed_y = window_top_y - (location.y as f64 * scale_y) as i32;
    let transformed_width = (location.width as f64 * scale_x) as i32;
    let transformed_height = (location.height as f64 * scale_y) as i32;
-    
+
    tracing::debug!("[transform] Calculation details:");
-    tracing::debug!("  - transformed_x = {} + ({} * {:.4}) = {} + {:.2} = {}", win_x, location.x, scale_x, win_x, location.x as f64 * scale_x, transformed_x);
-    tracing::debug!("  - transformed_width = ({} * {:.4}) = {:.2} -> {}", location.width, scale_x, location.width as f64 * scale_x, transformed_width);
-    tracing::debug!("  - transformed_height = ({} * {:.4}) = {:.2} -> {}", location.height, scale_y, location.height as f64 * scale_y, transformed_height);
-    
-    tracing::debug!("Transformed location: screenshot=({},{}) {}x{} -> screen=({},{}) {}x{}",
-        location.x, location.y, location.width, location.height,
-        transformed_x, transformed_y, transformed_width, transformed_height);
-    
+    tracing::debug!(
+        "  - transformed_x = {} + ({} * {:.4}) = {} + {:.2} = {}",
+        win_x,
+        location.x,
+        scale_x,
+        win_x,
+        location.x as f64 * scale_x,
+        transformed_x
+    );
+    tracing::debug!(
+        "  - transformed_width = ({} * {:.4}) = {:.2} -> {}",
+        location.width,
+        scale_x,
+        location.width as f64 * scale_x,
+        transformed_width
+    );
+    tracing::debug!(
+        "  - transformed_height = ({} * {:.4}) = {:.2} -> {}",
+        location.height,
+        scale_y,
+        location.height as f64 * scale_y,
+        transformed_height
+    );
+
+    tracing::debug!(
+        "Transformed location: screenshot=({},{}) {}x{} -> screen=({},{}) {}x{}",
+        location.x,
+        location.y,
+        location.width,
+        location.height,
+        transformed_x,
+        transformed_y,
+        transformed_width,
+        transformed_height
+    );
+
    TextLocation {
        text: location.text,
        x: transformed_x,
@@ -504,4 +622,4 @@ fn transform_screenshot_to_screen_coords(

 #[path = "macos_window_matching_test.rs"]
 #[cfg(test)]
-mod tests;
+mod tests;
--- a/crates/g3-computer-control/src/platform/macos_window_matching_test.rs
+++ b/crates/g3-computer-control/src/platform/macos_window_matching_test.rs
@@ -1,11 +1,11 @@
 #[cfg(test)]
 mod window_matching_tests {
    /// Test that window name matching handles spaces correctly
-    /// 
+    ///
    /// Issue: When a user requests a screenshot of "Goose Studio" but the actual
    /// application name is "GooseStudio" (no space), the fuzzy matching should
    /// still find the window.
-    /// 
+    ///
    /// The fix normalizes both names by removing spaces before comparing.
    #[test]
    fn test_space_normalization() {
@@ -16,25 +16,25 @@ mod window_matching_tests {
            ("Visual Studio Code", "VisualStudioCode", true),
            ("Google Chrome", "Google Chrome", true),
            ("Safari", "Safari", true),
-            ("iTerm", "iTerm2", true), // fuzzy match
+            ("iTerm", "iTerm2", true),            // fuzzy match
            ("Code", "Visual Studio Code", true), // fuzzy match
        ];

        for (user_input, app_name, should_match) in test_cases {
            let user_lower = user_input.to_lowercase();
            let app_lower = app_name.to_lowercase();
-            
+
            let user_normalized = user_lower.replace(" ", "");
            let app_normalized = app_lower.replace(" ", "");
-            
+
            let is_exact = app_lower == user_lower || app_normalized == user_normalized;
-            let is_fuzzy = app_lower.contains(&user_lower) 
+            let is_fuzzy = app_lower.contains(&user_lower)
                || user_lower.contains(&app_lower)
                || app_normalized.contains(&user_normalized)
                || user_normalized.contains(&app_normalized);
-            
+
            let matches = is_exact || is_fuzzy;
-            
+
            assert_eq!(
                matches, should_match,
                "Expected '{}' vs '{}' to match={}, but got match={}",
--- a/crates/g3-computer-control/src/platform/windows.rs
+++ b/crates/g3-computer-control/src/platform/windows.rs
@@ -1,4 +1,4 @@
-use crate::{ComputerController, types::*};
+use crate::{types::*, ComputerController};
 use anyhow::Result;
 use async_trait::async_trait;
 use tesseract::Tesseract;
@@ -20,48 +20,53 @@ impl ComputerController for WindowsController {
    async fn move_mouse(&self, _x: i32, _y: i32) -> Result<()> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn click(&self, _button: MouseButton) -> Result<()> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn double_click(&self, _button: MouseButton) -> Result<()> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn type_text(&self, _text: &str) -> Result<()> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn press_key(&self, _key: &str) -> Result<()> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn list_windows(&self) -> Result<Vec<Window>> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn focus_window(&self, _window_id: &str) -> Result<()> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn get_window_bounds(&self, _window_id: &str) -> Result<Rect> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn find_element(&self, _selector: &ElementSelector) -> Result<Option<UIElement>> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn get_element_text(&self, _element_id: &str) -> Result<String> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn get_element_bounds(&self, _element_id: &str) -> Result<Rect> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
-    async fn take_screenshot(&self, _path: &str, _region: Option<Rect>, _window_id: Option<&str>) -> Result<()> {
+
+    async fn take_screenshot(
+        &self,
+        _path: &str,
+        _region: Option<Rect>,
+        _window_id: Option<&str>,
+    ) -> Result<()> {
        // Enforce that window_id must be provided
        if _window_id.is_none() {
            anyhow::bail!("window_id is required. You must specify which window to capture (e.g., 'Chrome', 'Terminal', 'Notepad'). Use list_windows to see available windows.");
@@ -69,96 +74,113 @@ impl ComputerController for WindowsController {

        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn extract_text_from_screen(&self, _region: Rect, _window_id: &str) -> Result<String> {
        anyhow::bail!("Windows implementation not yet available")
    }
-    
+
    async fn extract_text_from_image(&self, _path: &str) -> Result<OCRResult> {
        // Check if tesseract is available on the system
        let tesseract_check = std::process::Command::new("where")
            .arg("tesseract")
            .output();
-        
+
        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
+            anyhow::bail!(
+                "Tesseract OCR is not installed on your system.\n\n\
                To install tesseract on Windows:\n  \
                1. Download the installer from: https://github.com/UB-Mannheim/tesseract/wiki\n  \
                2. Run the installer and follow the instructions\n  \
                3. Add tesseract to your PATH environment variable\n  \
                4. Restart your terminal/command prompt\n\n\
-                After installation, restart your terminal and try again.");
+                After installation, restart your terminal and try again."
+            );
        }
-        
+
        // Initialize Tesseract
-        let tess = Tesseract::new(None, Some("eng"))
-            .map_err(|e| {
-                anyhow::anyhow!("Failed to initialize Tesseract: {}\n\n\
+        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
+            anyhow::anyhow!(
+                "Failed to initialize Tesseract: {}\n\n\
                    This usually means:\n1. Tesseract is not properly installed\n\
                    2. Language data files are missing\n\nTo fix:\n  \
                    1. Reinstall tesseract from https://github.com/UB-Mannheim/tesseract/wiki\n  \
                    2. Make sure to select 'Additional language data' during installation\n  \
-                    3. Ensure tesseract is in your PATH", e)
-            })?;
-        
-        let text = tess.set_image(_path)
+                    3. Ensure tesseract is in your PATH",
+                e
+            )
+        })?;
+
+        let text = tess
+            .set_image(_path)
            .map_err(|e| anyhow::anyhow!("Failed to load image '{}': {}", _path, e))?
            .get_text()
            .map_err(|e| anyhow::anyhow!("Failed to extract text from image: {}", e))?;
-        
+
        // Get confidence (simplified - would need more complex API calls for per-word confidence)
        let confidence = 0.85; // Placeholder
-        
+
        Ok(OCRResult {
            text,
            confidence,
-            bounds: Rect { x: 0, y: 0, width: 0, height: 0 }, // Would need image dimensions
+            bounds: Rect {
+                x: 0,
+                y: 0,
+                width: 0,
+                height: 0,
+            }, // Would need image dimensions
        })
    }
-    
+
    async fn find_text_on_screen(&self, _text: &str) -> Result<Option<Point>> {
        // Check if tesseract is available on the system
        let tesseract_check = std::process::Command::new("where")
            .arg("tesseract")
            .output();
-        
+
        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
+            anyhow::bail!(
+                "Tesseract OCR is not installed on your system.\n\n\
                To install tesseract on Windows:\n  \
                1. Download the installer from: https://github.com/UB-Mannheim/tesseract/wiki\n  \
                2. Run the installer and follow the instructions\n  \
                3. Add tesseract to your PATH environment variable\n  \
                4. Restart your terminal/command prompt\n\n\
-                After installation, restart your terminal and try again.");
+                After installation, restart your terminal and try again."
+            );
        }
-        
+
        // Take full screen screenshot
        let temp_path = format!("C:\\\\Temp\\\\g3_ocr_search_{}.png", uuid::Uuid::new_v4());
        self.take_screenshot(&temp_path, None, None).await?;
-        
+
        // Use Tesseract to find text with bounding boxes
-        let tess = Tesseract::new(None, Some("eng"))
-            .map_err(|e| {
-                anyhow::anyhow!("Failed to initialize Tesseract: {}\n\n\
+        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
+            anyhow::anyhow!(
+                "Failed to initialize Tesseract: {}\n\n\
                    This usually means:\n1. Tesseract is not properly installed\n\
                    2. Language data files are missing\n\nTo fix:\n  \
                    1. Reinstall tesseract from https://github.com/UB-Mannheim/tesseract/wiki\n  \
                    2. Make sure to select 'Additional language data' during installation\n  \
-                    3. Ensure tesseract is in your PATH", e)
-            })?;
-        
-        let full_text = tess.set_image(temp_path.as_str())
+                    3. Ensure tesseract is in your PATH",
+                e
+            )
+        })?;
+
+        let full_text = tess
+            .set_image(temp_path.as_str())
            .map_err(|e| anyhow::anyhow!("Failed to load screenshot: {}", e))?
            .get_text()
            .map_err(|e| anyhow::anyhow!("Failed to extract text from screen: {}", e))?;
-        
+
        // Clean up temp file
        let _ = std::fs::remove_file(&temp_path);
-        
+
        // Simple text search - full implementation would use get_component_images
        // to get bounding boxes for each word
        if full_text.contains(_text) {
-            tracing::warn!("Text found but precise coordinates not available in simplified implementation");
+            tracing::warn!(
+                "Text found but precise coordinates not available in simplified implementation"
+            );
            Ok(Some(Point { x: 0, y: 0 }))
        } else {
            Ok(None)
--- a/crates/g3-computer-control/src/webdriver/mod.rs
+++ b/crates/g3-computer-control/src/webdriver/mod.rs
@@ -9,31 +9,31 @@ use serde_json::Value;
 pub trait WebDriverController: Send + Sync {
    /// Navigate to a URL
    async fn navigate(&mut self, url: &str) -> Result<()>;
-    
+
    /// Get the current URL
    async fn current_url(&self) -> Result<String>;
-    
+
    /// Get the page title
    async fn title(&self) -> Result<String>;
-    
+
    /// Find an element by CSS selector
    async fn find_element(&mut self, selector: &str) -> Result<WebElement>;
-    
+
    /// Find multiple elements by CSS selector
    async fn find_elements(&mut self, selector: &str) -> Result<Vec<WebElement>>;
-    
+
    /// Execute JavaScript in the browser
    async fn execute_script(&mut self, script: &str, args: Vec<Value>) -> Result<Value>;
-    
+
    /// Get the page source (HTML)
    async fn page_source(&self) -> Result<String>;
-    
+
    /// Take a screenshot and save to path
    async fn screenshot(&mut self, path: &str) -> Result<()>;
-    
+
    /// Close the current window/tab
    async fn close(&mut self) -> Result<()>;
-    
+
    /// Quit the browser session
    async fn quit(self) -> Result<()>;
 }
@@ -49,63 +49,69 @@ impl WebElement {
        self.inner.click().await?;
        Ok(())
    }
-    
+
    /// Send keys/text to the element
    pub async fn send_keys(&mut self, text: &str) -> Result<()> {
        self.inner.send_keys(text).await?;
        Ok(())
    }
-    
+
    /// Clear the element's content (for input fields)
    pub async fn clear(&mut self) -> Result<()> {
        self.inner.clear().await?;
        Ok(())
    }
-    
+
    /// Get the element's text content
    pub async fn text(&self) -> Result<String> {
        Ok(self.inner.text().await?)
    }
-    
+
    /// Get an attribute value
    pub async fn attr(&self, name: &str) -> Result<Option<String>> {
        Ok(self.inner.attr(name).await?)
    }
-    
+
    /// Get a property value
    pub async fn prop(&self, name: &str) -> Result<Option<String>> {
        Ok(self.inner.prop(name).await?)
    }
-    
+
    /// Get the element's HTML
    pub async fn html(&self, inner: bool) -> Result<String> {
        Ok(self.inner.html(inner).await?)
    }
-    
+
    /// Check if element is displayed
    pub async fn is_displayed(&self) -> Result<bool> {
        Ok(self.inner.is_displayed().await?)
    }
-    
+
    /// Check if element is enabled
    pub async fn is_enabled(&self) -> Result<bool> {
        Ok(self.inner.is_enabled().await?)
    }
-    
+
    /// Check if element is selected (for checkboxes/radio buttons)
    pub async fn is_selected(&self) -> Result<bool> {
        Ok(self.inner.is_selected().await?)
    }
-    
+
    /// Find a child element by CSS selector
    pub async fn find_element(&mut self, selector: &str) -> Result<WebElement> {
        let elem = self.inner.find(fantoccini::Locator::Css(selector)).await?;
        Ok(WebElement { inner: elem })
    }
-    
+
    /// Find multiple child elements by CSS selector
    pub async fn find_elements(&mut self, selector: &str) -> Result<Vec<WebElement>> {
-        let elems = self.inner.find_all(fantoccini::Locator::Css(selector)).await?;
-        Ok(elems.into_iter().map(|inner| WebElement { inner }).collect())
+        let elems = self
+            .inner
+            .find_all(fantoccini::Locator::Css(selector))
+            .await?;
+        Ok(elems
+            .into_iter()
+            .map(|inner| WebElement { inner })
+            .collect())
    }
 }
--- a/crates/g3-computer-control/src/webdriver/safari.rs
+++ b/crates/g3-computer-control/src/webdriver/safari.rs
@@ -12,10 +12,10 @@ pub struct SafariDriver {

 impl SafariDriver {
    /// Create a new SafariDriver instance
-    /// 
+    ///
    /// This will connect to SafariDriver running on the default port (4444).
    /// Make sure to enable "Allow Remote Automation" in Safari's Develop menu first.
-    /// 
+    ///
    /// You can start SafariDriver manually with:
    /// ```bash
    /// /usr/bin/safaridriver --enable
@@ -23,125 +23,134 @@ impl SafariDriver {
    pub async fn new() -> Result<Self> {
        Self::with_port(4444).await
    }
-    
+
    /// Create a new SafariDriver instance with a custom port
    pub async fn with_port(port: u16) -> Result<Self> {
        let url = format!("http://localhost:{}", port);
-        
+
        let mut caps = serde_json::Map::new();
-        caps.insert("browserName".to_string(), Value::String("safari".to_string()));
-        
+        caps.insert(
+            "browserName".to_string(),
+            Value::String("safari".to_string()),
+        );
+
        let client = ClientBuilder::native()
            .capabilities(caps)
            .connect(&url)
            .await
            .context("Failed to connect to SafariDriver. Make sure SafariDriver is running and 'Allow Remote Automation' is enabled in Safari's Develop menu.")?;
-        
+
        Ok(Self { client })
    }
-    
+
    /// Go back in browser history
    pub async fn back(&mut self) -> Result<()> {
        self.client.back().await?;
        Ok(())
    }
-    
+
    /// Go forward in browser history
    pub async fn forward(&mut self) -> Result<()> {
        self.client.forward().await?;
        Ok(())
    }
-    
+
    /// Refresh the current page
    pub async fn refresh(&mut self) -> Result<()> {
        self.client.refresh().await?;
        Ok(())
    }
-    
+
    /// Get all window handles
    pub async fn window_handles(&mut self) -> Result<Vec<String>> {
        let handles = self.client.windows().await?;
-        Ok(handles.into_iter()
-            .map(|h| h.into())
-            .collect())
+        Ok(handles.into_iter().map(|h| h.into()).collect())
    }
-    
+
    /// Switch to a window by handle
    pub async fn switch_to_window(&mut self, handle: &str) -> Result<()> {
        let window_handle: fantoccini::wd::WindowHandle = handle.to_string().try_into()?;
        self.client.switch_to_window(window_handle).await?;
        Ok(())
    }
-    
+
    /// Get the current window handle
    pub async fn current_window_handle(&mut self) -> Result<String> {
        Ok(self.client.window().await?.into())
    }
-    
+
    /// Close the current window
    pub async fn close_window(&mut self) -> Result<()> {
        self.client.close_window().await?;
        Ok(())
    }
-    
+
    /// Create a new window/tab
    pub async fn new_window(&mut self, is_tab: bool) -> Result<String> {
        let window_type = if is_tab { "tab" } else { "window" };
        let response = self.client.new_window(window_type == "tab").await?;
        Ok(response.handle.into())
    }
-    
+
    /// Get cookies
    pub async fn get_cookies(&mut self) -> Result<Vec<fantoccini::cookies::Cookie<'static>>> {
        Ok(self.client.get_all_cookies().await?)
    }
-    
+
    /// Add a cookie
    pub async fn add_cookie(&mut self, cookie: fantoccini::cookies::Cookie<'static>) -> Result<()> {
        self.client.add_cookie(cookie).await?;
        Ok(())
    }
-    
+
    /// Delete all cookies
    pub async fn delete_all_cookies(&mut self) -> Result<()> {
        self.client.delete_all_cookies().await?;
        Ok(())
    }
-    
+
    /// Wait for an element to appear (with timeout)
-    pub async fn wait_for_element(&mut self, selector: &str, timeout: Duration) -> Result<WebElement> {
+    pub async fn wait_for_element(
+        &mut self,
+        selector: &str,
+        timeout: Duration,
+    ) -> Result<WebElement> {
        let start = std::time::Instant::now();
        let poll_interval = Duration::from_millis(100);
-        
+
        loop {
            if let Ok(elem) = self.find_element(selector).await {
                return Ok(elem);
            }
-            
+
            if start.elapsed() >= timeout {
                anyhow::bail!("Timeout waiting for element: {}", selector);
            }
-            
+
            tokio::time::sleep(poll_interval).await;
        }
    }
-    
+
    /// Wait for an element to be visible (with timeout)
-    pub async fn wait_for_visible(&mut self, selector: &str, timeout: Duration) -> Result<WebElement> {
+    pub async fn wait_for_visible(
+        &mut self,
+        selector: &str,
+        timeout: Duration,
+    ) -> Result<WebElement> {
        let start = std::time::Instant::now();
        let poll_interval = Duration::from_millis(100);
-        
+
        loop {
            if let Ok(elem) = self.find_element(selector).await {
                if elem.is_displayed().await.unwrap_or(false) {
                    return Ok(elem);
                }
            }
-            
+
            if start.elapsed() >= timeout {
                anyhow::bail!("Timeout waiting for element to be visible: {}", selector);
            }
-            
+
            tokio::time::sleep(poll_interval).await;
        }
    }
@@ -153,58 +162,69 @@ impl WebDriverController for SafariDriver {
        self.client.goto(url).await?;
        Ok(())
    }
-    
+
    async fn current_url(&self) -> Result<String> {
        Ok(self.client.current_url().await?.to_string())
    }
-    
+
    async fn title(&self) -> Result<String> {
        Ok(self.client.title().await?)
    }
-    
+
    async fn find_element(&mut self, selector: &str) -> Result<WebElement> {
-        let elem = self.client.find(fantoccini::Locator::Css(selector)).await
-            .context(format!("Failed to find element with selector: {}", selector))?;
+        let elem = self
+            .client
+            .find(fantoccini::Locator::Css(selector))
+            .await
+            .context(format!(
+                "Failed to find element with selector: {}",
+                selector
+            ))?;
        Ok(WebElement { inner: elem })
    }
-    
+
    async fn find_elements(&mut self, selector: &str) -> Result<Vec<WebElement>> {
-        let elems = self.client.find_all(fantoccini::Locator::Css(selector)).await?;
-        Ok(elems.into_iter().map(|inner| WebElement { inner }).collect())
+        let elems = self
+            .client
+            .find_all(fantoccini::Locator::Css(selector))
+            .await?;
+        Ok(elems
+            .into_iter()
+            .map(|inner| WebElement { inner })
+            .collect())
    }
-    
+
    async fn execute_script(&mut self, script: &str, args: Vec<Value>) -> Result<Value> {
        Ok(self.client.execute(script, args).await?)
    }
-    
+
    async fn page_source(&self) -> Result<String> {
        Ok(self.client.source().await?)
    }
-    
+
    async fn screenshot(&mut self, path: &str) -> Result<()> {
        let screenshot_data = self.client.screenshot().await?;
-        
+
        // Expand tilde in path
        let expanded_path = shellexpand::tilde(path);
        let path_str = expanded_path.as_ref();
-        
+
        // Create parent directories if needed
        if let Some(parent) = std::path::Path::new(path_str).parent() {
            std::fs::create_dir_all(parent)
                .context("Failed to create parent directories for screenshot")?;
        }
-        
-        std::fs::write(path_str, screenshot_data)
-            .context("Failed to write screenshot to file")?;
-        
+
+        std::fs::write(path_str, screenshot_data).context("Failed to write screenshot to file")?;
+
        Ok(())
    }
-    
+
    async fn close(&mut self) -> Result<()> {
        self.client.close_window().await?;
        Ok(())
    }
-    
+
    async fn quit(mut self) -> Result<()> {
        self.client.close().await?;
        Ok(())
--- a/crates/g3-computer-control/tests/integration_test.rs
+++ b/crates/g3-computer-control/tests/integration_test.rs
@@ -3,29 +3,35 @@ use g3_computer_control::*;
 #[tokio::test]
 async fn test_screenshot() {
    let controller = create_controller().expect("Failed to create controller");
-    
+
    // Test that screenshot without window_id fails with appropriate error
    let path = "/tmp/test_screenshot.png";
    let result = controller.take_screenshot(path, None, None).await;
-    assert!(result.is_err(), "Expected error when window_id is not provided");
-    
+    assert!(
+        result.is_err(),
+        "Expected error when window_id is not provided"
+    );
+
    let error_msg = result.unwrap_err().to_string();
-    assert!(error_msg.contains("window_id is required"), 
-        "Expected error message about window_id being required, got: {}", error_msg);
+    assert!(
+        error_msg.contains("window_id is required"),
+        "Expected error message about window_id being required, got: {}",
+        error_msg
+    );
 }

 #[tokio::test]
 async fn test_screenshot_with_window() {
    let controller = create_controller().expect("Failed to create controller");
-    
+
    // Take screenshot of Finder (should always be available on macOS)
    let path = "/tmp/test_screenshot_finder.png";
    let result = controller.take_screenshot(path, None, Some("Finder")).await;
-    
+
    // This test may fail if Finder is not running, so we just check it doesn't panic
    // and returns a proper Result
    let _ = result; // Don't assert success since Finder might not be visible
-    
+
    // Clean up
    let _ = std::fs::remove_file(path);
 }
--- a/crates/g3-config/Cargo.toml
+++ b/crates/g3-config/Cargo.toml
@@ -15,3 +15,4 @@ dirs = "5.0"

 [dev-dependencies]
 tempfile = "3.8"
+serde_json = { workspace = true }
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -1,5 +1,5 @@
-use serde::{Deserialize, Serialize};
 use anyhow::Result;
+use serde::{Deserialize, Serialize};
 use std::path::Path;

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -21,7 +21,7 @@ pub struct ProvidersConfig {
    pub databricks: Option<DatabricksConfig>,
    pub embedded: Option<EmbeddedConfig>,
    pub default_provider: String,
-    pub coach: Option<String>,  // Provider to use for coach in autonomous mode
+    pub coach: Option<String>, // Provider to use for coach in autonomous mode
    pub player: Option<String>, // Provider to use for player in autonomous mode
 }

@@ -40,6 +40,9 @@ pub struct AnthropicConfig {
    pub model: String,
    pub max_tokens: Option<u32>,
    pub temperature: Option<f32>,
+    pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
+    pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra)
+    pub thinking_budget_tokens: Option<u32>, // Budget tokens for extended thinking
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -68,10 +71,17 @@ pub struct AgentConfig {
    pub max_context_length: Option<u32>,
    pub fallback_default_max_tokens: usize,
    pub enable_streaming: bool,
+    pub allow_multiple_tool_calls: bool,
    pub timeout_seconds: u64,
    pub auto_compact: bool,
    pub max_retry_attempts: u32,
    pub autonomous_max_retry_attempts: u32,
+    #[serde(default = "default_check_todo_staleness")]
+    pub check_todo_staleness: bool,
+}
+
+fn default_check_todo_staleness() -> bool {
+    true
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -94,9 +104,7 @@ pub struct MacAxConfig {

 impl Default for MacAxConfig {
    fn default() -> Self {
-        Self {
-            enabled: false,
-        }
+        Self { enabled: false }
    }
 }

@@ -143,10 +151,12 @@ impl Default for Config {
                max_context_length: None,
                fallback_default_max_tokens: 8192,
                enable_streaming: true,
+                allow_multiple_tool_calls: false,
                timeout_seconds: 60,
                auto_compact: true,
                max_retry_attempts: 3,
                autonomous_max_retry_attempts: 6,
+                check_todo_staleness: true,
            },
            computer_control: ComputerControlConfig::default(),
            webdriver: WebDriverConfig::default(),
@@ -162,22 +172,18 @@ impl Config {
            Path::new(path).exists()
        } else {
            // Check default locations
-            let default_paths = [
-                "./g3.toml",
-                "~/.config/g3/config.toml",
-                "~/.g3.toml",
-            ];
-            
+            let default_paths = ["./g3.toml", "~/.config/g3/config.toml", "~/.g3.toml"];
+
            default_paths.iter().any(|path| {
                let expanded_path = shellexpand::tilde(path);
                Path::new(expanded_path.as_ref()).exists()
            })
        };
-        
+
        // If no config exists, create and save a default Databricks config
        if !config_exists {
            let databricks_config = Self::default();
-            
+
            // Save to default location
            let config_dir = dirs::home_dir()
                .map(|mut path| {
@@ -186,26 +192,29 @@ impl Config {
                    path
                })
                .unwrap_or_else(|| std::path::PathBuf::from("."));
-            
+
            // Create directory if it doesn't exist
            std::fs::create_dir_all(&config_dir).ok();
-            
+
            let config_file = config_dir.join("config.toml");
            if let Err(e) = databricks_config.save(config_file.to_str().unwrap()) {
                eprintln!("Warning: Could not save default config: {}", e);
            } else {
-                println!("Created default Databricks configuration at: {}", config_file.display());
+                println!(
+                    "Created default Databricks configuration at: {}",
+                    config_file.display()
+                );
            }
-            
+
            return Ok(databricks_config);
        }
-        
+
        // Existing config loading logic
        let mut settings = config::Config::builder();
-        
+
        // Load default configuration
        settings = settings.add_source(config::Config::try_from(&Config::default())?);
-        
+
        // Load from config file if provided
        if let Some(path) = config_path {
            if Path::new(path).exists() {
@@ -213,12 +222,8 @@ impl Config {
            }
        } else {
            // Try to load from default locations
-            let default_paths = [
-                "./g3.toml",
-                "~/.config/g3/config.toml",
-                "~/.g3.toml",
-            ];
-            
+            let default_paths = ["./g3.toml", "~/.config/g3/config.toml", "~/.g3.toml"];
+
            for path in &default_paths {
                let expanded_path = shellexpand::tilde(path);
                if Path::new(expanded_path.as_ref()).exists() {
@@ -227,13 +232,10 @@ impl Config {
                }
            }
        }
-        
+
        // Override with environment variables
-        settings = settings.add_source(
-            config::Environment::with_prefix("G3")
-                .separator("_")
-        );
-        
+        settings = settings.add_source(config::Environment::with_prefix("G3").separator("_"));
+
        let config = settings.build()?.try_deserialize()?;
        Ok(config)
    }
@@ -249,7 +251,7 @@ impl Config {
                embedded: Some(EmbeddedConfig {
                    model_path: "~/.cache/g3/models/qwen2.5-7b-instruct-q3_k_m.gguf".to_string(),
                    model_type: "qwen".to_string(),
-                    context_length: Some(32768),  // Qwen2.5 supports 32k context
+                    context_length: Some(32768), // Qwen2.5 supports 32k context
                    max_tokens: Some(2048),
                    temperature: Some(0.1),
                    gpu_layers: Some(32),
@@ -263,23 +265,25 @@ impl Config {
                max_context_length: None,
                fallback_default_max_tokens: 8192,
                enable_streaming: true,
+                allow_multiple_tool_calls: false,
                timeout_seconds: 60,
                auto_compact: true,
                max_retry_attempts: 3,
                autonomous_max_retry_attempts: 6,
+                check_todo_staleness: true,
            },
            computer_control: ComputerControlConfig::default(),
            webdriver: WebDriverConfig::default(),
            macax: MacAxConfig::default(),
        }
    }
-    
+
    pub fn save(&self, path: &str) -> Result<()> {
        let toml_string = toml::to_string_pretty(self)?;
        std::fs::write(path, toml_string)?;
        Ok(())
    }
-    
+
    pub fn load_with_overrides(
        config_path: Option<&str>,
        provider_override: Option<String>,
@@ -287,12 +291,12 @@ impl Config {
    ) -> Result<Self> {
        // Load the base configuration
        let mut config = Self::load(config_path)?;
-        
+
        // Apply provider override
        if let Some(provider) = provider_override {
            config.providers.default_provider = provider;
        }
-        
+
        // Apply model override to the active provider
        if let Some(model) = model_override {
            match config.providers.default_provider.as_str() {
@@ -332,28 +336,34 @@ impl Config {
                        ));
                    }
                }
-                _ => return Err(anyhow::anyhow!("Unknown provider: {}", 
-                    config.providers.default_provider)),
+                _ => {
+                    return Err(anyhow::anyhow!(
+                        "Unknown provider: {}",
+                        config.providers.default_provider
+                    ))
+                }
            }
        }
-        
+
        Ok(config)
    }
-    
+
    /// Get the provider to use for coach mode in autonomous execution
    pub fn get_coach_provider(&self) -> &str {
-        self.providers.coach
+        self.providers
+            .coach
            .as_deref()
            .unwrap_or(&self.providers.default_provider)
    }
-    
+
    /// Get the provider to use for player mode in autonomous execution
    pub fn get_player_provider(&self) -> &str {
-        self.providers.player
+        self.providers
+            .player
            .as_deref()
            .unwrap_or(&self.providers.default_provider)
    }
-    
+
    /// Create a copy of the config with a different default provider
    pub fn with_provider_override(&self, provider: &str) -> Result<Self> {
        // Validate that the provider is configured
@@ -384,17 +394,17 @@ impl Config {
            }
            _ => {} // Provider is configured or unknown (will be caught later)
        }
-        
+
        let mut config = self.clone();
        config.providers.default_provider = provider.to_string();
        Ok(config)
    }
-    
+
    /// Create a copy of the config for coach mode in autonomous execution
    pub fn for_coach(&self) -> Result<Self> {
        self.with_provider_override(self.get_coach_provider())
    }
-    
+
    /// Create a copy of the config for player mode in autonomous execution
    pub fn for_player(&self) -> Result<Self> {
        self.with_provider_override(self.get_player_provider())
--- a/crates/g3-config/src/tests.rs
+++ b/crates/g3-config/src/tests.rs
@@ -9,7 +9,7 @@ mod tests {
        // Create a temporary directory for the test config
        let temp_dir = TempDir::new().unwrap();
        let config_path = temp_dir.path().join("test_config.toml");
-        
+
        // Write a test configuration with coach and player providers
        let config_content = r#"
 [providers]
@@ -35,32 +35,32 @@ fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
 "#;
-        
+
        fs::write(&config_path, config_content).unwrap();
-        
+
        // Load the configuration
        let config = Config::load(Some(config_path.to_str().unwrap())).unwrap();
-        
+
        // Test that the providers are correctly identified
        assert_eq!(config.providers.default_provider, "databricks");
        assert_eq!(config.get_coach_provider(), "anthropic");
        assert_eq!(config.get_player_provider(), "embedded");
-        
+
        // Test creating coach config
        let coach_config = config.for_coach().unwrap();
        assert_eq!(coach_config.providers.default_provider, "anthropic");
-        
+
        // Test creating player config
        let player_config = config.for_player().unwrap();
        assert_eq!(player_config.providers.default_provider, "embedded");
    }
-    
+
    #[test]
    fn test_coach_player_fallback_to_default() {
        // Create a temporary directory for the test config
        let temp_dir = TempDir::new().unwrap();
        let config_path = temp_dir.path().join("test_config.toml");
-        
+
        // Write a test configuration WITHOUT coach and player providers
        let config_content = r#"
 [providers]
@@ -76,31 +76,31 @@ fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
 "#;
-        
+
        fs::write(&config_path, config_content).unwrap();
-        
+
        // Load the configuration
        let config = Config::load(Some(config_path.to_str().unwrap())).unwrap();
-        
+
        // Test that coach and player fall back to default provider
        assert_eq!(config.get_coach_provider(), "databricks");
        assert_eq!(config.get_player_provider(), "databricks");
-        
+
        // Test creating coach config (should use default)
        let coach_config = config.for_coach().unwrap();
        assert_eq!(coach_config.providers.default_provider, "databricks");
-        
+
        // Test creating player config (should use default)
        let player_config = config.for_player().unwrap();
        assert_eq!(player_config.providers.default_provider, "databricks");
    }
-    
+
    #[test]
    fn test_invalid_provider_error() {
        // Create a temporary directory for the test config
        let temp_dir = TempDir::new().unwrap();
        let config_path = temp_dir.path().join("test_config.toml");
-        
+
        // Write a test configuration with an unconfigured provider
        let config_content = r#"
 [providers]
@@ -117,15 +117,15 @@ fallback_default_max_tokens = 8192
 enable_streaming = true
 timeout_seconds = 60
 "#;
-        
+
        fs::write(&config_path, config_content).unwrap();
-        
+
        // Load the configuration
        let config = Config::load(Some(config_path.to_str().unwrap())).unwrap();
-        
+
        // Test that trying to create a coach config with unconfigured provider fails
        let result = config.for_coach();
        assert!(result.is_err());
        assert!(result.unwrap_err().to_string().contains("not configured"));
    }
-}
+}
--- a/crates/g3-config/tests/test_multiple_tool_calls.rs
+++ b/crates/g3-config/tests/test_multiple_tool_calls.rs
@@ -0,0 +1,40 @@
+#[cfg(test)]
+mod test_multiple_tool_calls {
+    use g3_config::{AgentConfig, Config};
+
+    #[test]
+    fn test_config_has_multiple_tool_calls_field() {
+        let config = Config::default();
+
+        // Test that the field exists and defaults to false
+        assert_eq!(config.agent.allow_multiple_tool_calls, false);
+
+        // Test that we can create a config with the field set to true
+        let mut custom_config = Config::default();
+        custom_config.agent.allow_multiple_tool_calls = true;
+        assert_eq!(custom_config.agent.allow_multiple_tool_calls, true);
+    }
+
+    #[test]
+    fn test_agent_config_serialization() {
+        let agent_config = AgentConfig {
+            max_context_length: Some(100000),
+            fallback_default_max_tokens: 8192,
+            enable_streaming: true,
+            allow_multiple_tool_calls: true,
+            timeout_seconds: 60,
+            auto_compact: true,
+            max_retry_attempts: 3,
+            autonomous_max_retry_attempts: 6,
+            check_todo_staleness: true,
+        };
+
+        // Test serialization
+        let json = serde_json::to_string(&agent_config).unwrap();
+        assert!(json.contains("\"allow_multiple_tool_calls\":true"));
+
+        // Test deserialization
+        let deserialized: AgentConfig = serde_json::from_str(&json).unwrap();
+        assert_eq!(deserialized.allow_multiple_tool_calls, true);
+    }
+}
--- a/crates/g3-console/Cargo.toml
+++ b/crates/g3-console/Cargo.toml
@@ -6,6 +6,9 @@ authors = ["G3 Team"]
 description = "Web console for monitoring and managing g3 instances"
 license = "MIT"

+[lib]
+path = "src/lib.rs"
+
 [[bin]]
 name = "g3-console"
 path = "src/main.rs"
--- a/crates/g3-console/examples/debug_detector.rs
+++ b/crates/g3-console/examples/debug_detector.rs
@@ -1,19 +1,19 @@
-use sysinfo::{System, Pid};
+use sysinfo::{Pid, System};

 fn main() {
    let mut sys = System::new_all();
    sys.refresh_processes();
-    
+
    println!("Looking for g3 processes...");
-    
+
    for (pid, process) in sys.processes() {
        let cmd = process.cmd();
        if cmd.is_empty() {
            continue;
        }
-        
+
        let cmd_str = cmd.join(" ");
-        
+
        // Check if this contains 'g3'
        if cmd_str.contains("g3") {
            println!("\nFound potential g3 process:");
@@ -21,15 +21,15 @@ fn main() {
            println!("  Name: {}", process.name());
            println!("  Cmd[0]: {:?}", cmd.get(0));
            println!("  Full cmd: {:?}", cmd);
-            
+
            // Check detection logic
            let is_g3_binary = cmd.get(0).map(|s| s.ends_with("g3")).unwrap_or(false);
            let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false)
                && cmd.iter().any(|s| s == "run" || s.contains("g3"));
-            
+
            println!("  is_g3_binary: {}", is_g3_binary);
            println!("  is_cargo_run: {}", is_cargo_run);
-            
+
            // Check workspace
            let has_workspace = cmd.iter().any(|s| s == "--workspace" || s == "-w");
            println!("  has_workspace: {}", has_workspace);
--- a/crates/g3-console/examples/test_api.rs
+++ b/crates/g3-console/examples/test_api.rs
@@ -3,13 +3,15 @@ use g3_console::process::ProcessDetector;

 fn main() {
    let mut detector = ProcessDetector::new();
-    
+
    match detector.detect_instances() {
        Ok(instances) => {
            println!("Found {} instances:", instances.len());
            for instance in instances {
-                println!("  - PID: {}, Workspace: {:?}, Type: {:?}", 
-                    instance.pid, instance.workspace, instance.instance_type);
+                println!(
+                    "  - PID: {}, Workspace: {:?}, Type: {:?}",
+                    instance.pid, instance.workspace, instance.instance_type
+                );
            }
        }
        Err(e) => {
--- a/crates/g3-console/examples/test_detector.rs
+++ b/crates/g3-console/examples/test_detector.rs
@@ -1,12 +1,12 @@
-use sysinfo::{System, Pid};
+use sysinfo::{Pid, System};

 fn main() {
    let mut sys = System::new_all();
    sys.refresh_processes();
-    
+
    // Test with known PIDs
    let pids = vec![68123, 72749];
-    
+
    for pid_num in pids {
        let pid = Pid::from_u32(pid_num);
        if let Some(process) = sys.process(pid) {
--- a/crates/g3-console/src/api/control.rs
+++ b/crates/g3-console/src/api/control.rs
@@ -19,7 +19,7 @@ pub async fn kill_instance(
        .ok_or(StatusCode::BAD_REQUEST)?;

    let mut controller = controller.lock().await;
-    
+
    match controller.kill_process(pid) {
        Ok(_) => {
            info!("Successfully killed process {}", pid);
@@ -39,35 +39,38 @@ pub async fn restart_instance(
    axum::extract::Path(id): axum::extract::Path<String>,
 ) -> Result<Json<LaunchResponse>, StatusCode> {
    info!("Restarting instance: {}", id);
-    
+
    // Extract PID from instance ID (format: pid_timestamp)
    let pid: u32 = id
        .split('_')
        .next()
        .and_then(|s| s.parse().ok())
        .ok_or(StatusCode::BAD_REQUEST)?;
-    
+
    let mut controller = controller.lock().await;
-    
+
    // Get stored launch params
-    let params = controller.get_launch_params(pid)
+    let params = controller
+        .get_launch_params(pid)
        .ok_or(StatusCode::NOT_FOUND)?;
-    
+
    // Launch new instance with same parameters
-    let new_pid = controller.launch_g3(
-        params.workspace.to_str().unwrap(),
-        &params.provider,
-        &params.model,
-        &params.prompt,
-        params.autonomous,
-        params.g3_binary_path.as_deref(),
-    ).map_err(|e| {
-        error!("Failed to restart instance: {}", e);
-        StatusCode::INTERNAL_SERVER_ERROR
-    })?;
-    
+    let new_pid = controller
+        .launch_g3(
+            params.workspace.to_str().unwrap(),
+            &params.provider,
+            &params.model,
+            &params.prompt,
+            params.autonomous,
+            params.g3_binary_path.as_deref(),
+        )
+        .map_err(|e| {
+            error!("Failed to restart instance: {}", e);
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+
    let new_id = format!("{}_{}", new_pid, chrono::Utc::now().timestamp());
-    
+
    Ok(Json(LaunchResponse {
        id: new_id,
        status: "starting".to_string(),
@@ -79,7 +82,7 @@ pub async fn launch_instance(
    Json(request): Json<LaunchRequest>,
 ) -> Result<Json<LaunchResponse>, (StatusCode, Json<serde_json::Value>)> {
    info!("Launching new g3 instance: {:?}", request);
-    
+
    // Validate binary path if provided
    if let Some(ref binary_path) = request.g3_binary_path {
        // Expand relative paths and resolve to absolute
@@ -90,16 +93,19 @@ pub async fn launch_instance(
        } else {
            std::path::PathBuf::from(binary_path)
        };
-        
+
        // Check if file exists
        if !path.exists() {
            error!("G3 binary not found: {}", binary_path);
-            return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
-                "error": "G3 binary not found",
-                "message": format!("The specified g3 binary does not exist: {}", binary_path)
-            }))));
+            return Err((
+                StatusCode::BAD_REQUEST,
+                Json(serde_json::json!({
+                    "error": "G3 binary not found",
+                    "message": format!("The specified g3 binary does not exist: {}", binary_path)
+                })),
+            ));
        }
-        
+
        // Check if file is executable (Unix only)
        #[cfg(unix)]
        {
@@ -107,26 +113,32 @@ pub async fn launch_instance(
            if let Ok(metadata) = std::fs::metadata(path) {
                if metadata.permissions().mode() & 0o111 == 0 {
                    error!("G3 binary is not executable: {}", binary_path);
-                    return Err((StatusCode::BAD_REQUEST, Json(serde_json::json!({
-                        "error": "G3 binary is not executable",
-                        "message": format!("The specified g3 binary is not executable: {}", binary_path)
-                    }))));
+                    return Err((
+                        StatusCode::BAD_REQUEST,
+                        Json(serde_json::json!({
+                            "error": "G3 binary is not executable",
+                            "message": format!("The specified g3 binary is not executable: {}", binary_path)
+                        })),
+                    ));
                }
            }
        }
    }
-    
+
    let workspace = request.workspace.to_str().ok_or_else(|| {
-        (StatusCode::BAD_REQUEST, Json(serde_json::json!({
-            "error": "Invalid workspace path",
-            "message": "The workspace path contains invalid characters"
-        })))
+        (
+            StatusCode::BAD_REQUEST,
+            Json(serde_json::json!({
+                "error": "Invalid workspace path",
+                "message": "The workspace path contains invalid characters"
+            })),
+        )
    })?;
    let autonomous = request.mode == LaunchMode::Ensemble;
    let g3_binary_path = request.g3_binary_path.as_deref();
-    
+
    let mut controller = controller.lock().await;
-    
+
    match controller.launch_g3(
        workspace,
        &request.provider,
@@ -145,10 +157,13 @@ pub async fn launch_instance(
        }
        Err(e) => {
            error!("Failed to launch g3 instance: {}", e);
-            Err((StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({
-                "error": "Failed to launch instance",
-                "message": format!("Error: {}", e)
-            }))))
+            Err((
+                StatusCode::INTERNAL_SERVER_ERROR,
+                Json(serde_json::json!({
+                    "error": "Failed to launch instance",
+                    "message": format!("Error: {}", e)
+                })),
+            ))
        }
    }
 }
--- a/crates/g3-console/src/api/instances.rs
+++ b/crates/g3-console/src/api/instances.rs
@@ -1,7 +1,11 @@
 use crate::logs::{LogParser, StatsAggregator};
 use crate::models::*;
 use crate::process::ProcessDetector;
-use axum::{extract::{Query, State}, http::StatusCode, Json};
+use axum::{
+    extract::{Query, State},
+    http::StatusCode,
+    Json,
+};
 use serde::Deserialize;
 use std::sync::Arc;
 use tokio::sync::Mutex;
@@ -13,11 +17,11 @@ pub async fn list_instances(
    State(detector): State<AppState>,
 ) -> Result<Json<Vec<InstanceDetail>>, StatusCode> {
    let mut detector = detector.lock().await;
-    
+
    match detector.detect_instances() {
        Ok(instances) => {
            let mut details = Vec::new();
-            
+
            for instance in instances {
                match get_instance_detail(&instance) {
                    Ok(detail) => details.push(detail),
@@ -27,7 +31,7 @@ pub async fn list_instances(
                    }
                }
            }
-            
+
            Ok(Json(details))
        }
        Err(e) => {
@@ -42,7 +46,7 @@ pub async fn get_instance(
    axum::extract::Path(id): axum::extract::Path<String>,
 ) -> Result<Json<InstanceDetail>, StatusCode> {
    let mut detector = detector.lock().await;
-    
+
    match detector.detect_instances() {
        Ok(instances) => {
            if let Some(instance) = instances.into_iter().find(|i| i.id == id) {
@@ -69,30 +73,36 @@ fn get_instance_detail(instance: &Instance) -> anyhow::Result<InstanceDetail> {
    let log_entries = match LogParser::parse_logs(&instance.workspace) {
        Ok(entries) => entries,
        Err(e) => {
-            warn!("Failed to parse logs for instance {}: {}. Instance may be newly started.", instance.id, e);
+            warn!(
+                "Failed to parse logs for instance {}: {}. Instance may be newly started.",
+                instance.id, e
+            );
            Vec::new()
        }
    };
-    
+
    // Aggregate stats
    let is_ensemble = instance.instance_type == crate::models::InstanceType::Ensemble;
    let stats = StatsAggregator::aggregate_stats(&log_entries, instance.start_time, is_ensemble);
-    
+
    // Get latest message
    let latest_message = StatsAggregator::get_latest_message(&log_entries);
-    
+
    // Get git status - don't fail if not a git repo
    let git_status = match get_git_status(&instance.workspace) {
        Some(status) => Some(status),
        None => {
-            debug!("No git status available for workspace: {:?}", instance.workspace);
+            debug!(
+                "No git status available for workspace: {:?}",
+                instance.workspace
+            );
            None
        }
    };
-    
+
    // Get project files
    let project_files = get_project_files(&instance.workspace);
-    
+
    Ok(InstanceDetail {
        instance: instance.clone(),
        stats,
@@ -104,7 +114,7 @@ fn get_instance_detail(instance: &Instance) -> anyhow::Result<InstanceDetail> {

 fn get_git_status(workspace: &std::path::Path) -> Option<GitStatus> {
    use std::process::Command;
-    
+
    // Get current branch
    let branch = Command::new("git")
        .arg("-C")
@@ -115,7 +125,7 @@ fn get_git_status(workspace: &std::path::Path) -> Option<GitStatus> {
        .ok()
        .and_then(|output| String::from_utf8(output.stdout).ok())
        .map(|s| s.trim().to_string())?;
-    
+
    // Get status
    let status_output = Command::new("git")
        .arg("-C")
@@ -125,19 +135,19 @@ fn get_git_status(workspace: &std::path::Path) -> Option<GitStatus> {
        .output()
        .ok()
        .and_then(|output| String::from_utf8(output.stdout).ok())?;
-    
+
    let mut modified_files = Vec::new();
    let mut added_files = Vec::new();
    let mut deleted_files = Vec::new();
-    
+
    for line in status_output.lines() {
        if line.len() < 4 {
            continue;
        }
-        
+
        let status = &line[0..2];
        let file = line[3..].trim();
-        
+
        match status.trim() {
            "M" | "MM" => modified_files.push(file.to_string()),
            "A" | "AM" => added_files.push(file.to_string()),
@@ -145,9 +155,9 @@ fn get_git_status(workspace: &std::path::Path) -> Option<GitStatus> {
            _ => modified_files.push(file.to_string()),
        }
    }
-    
+
    let uncommitted_changes = modified_files.len() + added_files.len() + deleted_files.len();
-    
+
    Some(GitStatus {
        branch,
        uncommitted_changes,
@@ -161,7 +171,7 @@ fn get_project_files(workspace: &std::path::Path) -> ProjectFiles {
    let requirements = read_file_snippet(workspace, "requirements.md");
    let readme = read_file_snippet(workspace, "README.md");
    let agents = read_file_snippet(workspace, "AGENTS.md");
-    
+
    ProjectFiles {
        requirements,
        readme,
@@ -171,22 +181,16 @@ fn get_project_files(workspace: &std::path::Path) -> ProjectFiles {

 fn read_file_snippet(workspace: &std::path::Path, filename: &str) -> Option<String> {
    use std::fs;
-    
+
    let path = workspace.join(filename);
    if !path.exists() {
        return None;
    }
-    
-    fs::read_to_string(&path)
-        .ok()
-        .map(|content| {
-            // Return first 10 lines
-            content
-                .lines()
-                .take(10)
-                .collect::<Vec<_>>()
-                .join("\n")
-        })
+
+    fs::read_to_string(&path).ok().map(|content| {
+        // Return first 10 lines
+        content.lines().take(10).collect::<Vec<_>>().join("\n")
+    })
 }

 #[derive(Deserialize)]
@@ -200,20 +204,25 @@ pub async fn get_file_content(
    State(detector): State<AppState>,
 ) -> Result<Json<serde_json::Value>, StatusCode> {
    let mut detector = detector.lock().await;
-    
+
    // Find the instance
-    let instances = detector.detect_instances().map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-    let instance = instances.iter().find(|i| i.id == id).ok_or(StatusCode::NOT_FOUND)?;
-    
+    let instances = detector
+        .detect_instances()
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+    let instance = instances
+        .iter()
+        .find(|i| i.id == id)
+        .ok_or(StatusCode::NOT_FOUND)?;
+
    // Read the full file
    let file_path = instance.workspace.join(&query.name);
    if !file_path.exists() {
        return Err(StatusCode::NOT_FOUND);
    }
-    
-    let content = std::fs::read_to_string(&file_path)
-        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
-    
+
+    let content =
+        std::fs::read_to_string(&file_path).map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
    Ok(Json(serde_json::json!({
        "name": query.name,
        "content": content,
--- a/crates/g3-console/src/api/logs.rs
+++ b/crates/g3-console/src/api/logs.rs
@@ -12,7 +12,7 @@ pub async fn get_instance_logs(
    axum::extract::Path(id): axum::extract::Path<String>,
 ) -> Result<Json<serde_json::Value>, StatusCode> {
    let mut detector = detector.lock().await;
-    
+
    match detector.detect_instances() {
        Ok(instances) => {
            if let Some(instance) = instances.into_iter().find(|i| i.id == id) {
@@ -20,7 +20,7 @@ pub async fn get_instance_logs(
                    Ok(entries) => {
                        let messages = LogParser::extract_chat_messages(&entries);
                        let tool_calls = LogParser::extract_tool_calls(&entries);
-                        
+
                        Ok(Json(serde_json::json!({
                            "messages": messages,
                            "tool_calls": tool_calls,
--- a/crates/g3-console/src/api/mod.rs
+++ b/crates/g3-console/src/api/mod.rs
@@ -1,4 +1,4 @@
-pub mod instances;
 pub mod control;
+pub mod instances;
 pub mod logs;
 pub mod state;
--- a/crates/g3-console/src/api/state.rs
+++ b/crates/g3-console/src/api/state.rs
@@ -1,8 +1,8 @@
 use crate::launch::ConsoleState;
 use axum::{http::StatusCode, Json};
 use serde::{Deserialize, Serialize};
-use std::path::PathBuf;
 use std::os::unix::fs::PermissionsExt;
+use std::path::PathBuf;
 use tracing::{error, info};

 pub async fn get_state() -> Result<Json<ConsoleState>, StatusCode> {
@@ -52,24 +52,26 @@ pub async fn browse_filesystem(
    Json(request): Json<BrowseRequest>,
 ) -> Result<Json<BrowseResponse>, StatusCode> {
    use std::fs;
-    
+
    let path = if let Some(p) = request.path {
        PathBuf::from(p)
    } else {
        std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
    };
-    
-    let current_path = path.canonicalize()
+
+    let current_path = path
+        .canonicalize()
        .map_err(|_| StatusCode::BAD_REQUEST)?
        .to_string_lossy()
        .to_string();
-    
-    let parent_path = path.parent()
+
+    let parent_path = path
+        .parent()
        .and_then(|p| p.to_str())
        .map(|s| s.to_string());
-    
+
    let mut entries = Vec::new();
-    
+
    if let Ok(read_dir) = fs::read_dir(&path) {
        for entry in read_dir.flatten() {
            if let Ok(metadata) = entry.metadata() {
@@ -82,15 +84,13 @@ pub async fn browse_filesystem(
            }
        }
    }
-    
-    entries.sort_by(|a, b| {
-        match (a.is_dir, b.is_dir) {
-            (true, false) => std::cmp::Ordering::Less,
-            (false, true) => std::cmp::Ordering::Greater,
-            _ => a.name.cmp(&b.name),
-        }
+
+    entries.sort_by(|a, b| match (a.is_dir, b.is_dir) {
+        (true, false) => std::cmp::Ordering::Less,
+        (false, true) => std::cmp::Ordering::Greater,
+        _ => a.name.cmp(&b.name),
    });
-    
+
    Ok(Json(BrowseResponse {
        current_path,
        parent_path,
--- a/crates/g3-console/src/launch.rs
+++ b/crates/g3-console/src/launch.rs
@@ -27,7 +27,7 @@ impl Default for ConsoleState {
 impl ConsoleState {
    pub fn load() -> Self {
        let config_path = Self::config_path();
-        
+
        if config_path.exists() {
            if let Ok(content) = fs::read_to_string(&config_path) {
                return serde_json::from_str(&content).unwrap_or_else(|e| {
@@ -36,31 +36,29 @@ impl ConsoleState {
                });
            }
        }
-        
+
        Self::default()
    }
-    
+
    pub fn save(&self) -> anyhow::Result<()> {
        let config_path = Self::config_path();
        info!("Saving console state to: {:?}", config_path);
-        
+
        // Create parent directory if it doesn't exist
        if let Some(parent) = config_path.parent() {
            fs::create_dir_all(parent)?;
        }
-        
+
        let content = serde_json::to_string_pretty(self)?;
        fs::write(&config_path, content)?;
        info!("Console state saved successfully to: {:?}", config_path);
-        
+
        Ok(())
    }
-    
+
    fn config_path() -> PathBuf {
        // Use explicit ~/.config/g3/console.json path as per requirements
        let home = dirs::home_dir().unwrap_or_else(|| PathBuf::from("."));
-        home.join(".config")
-            .join("g3")
-            .join("console.json")
+        home.join(".config").join("g3").join("console.json")
    }
 }
--- a/crates/g3-console/src/lib.rs
+++ b/crates/g3-console/src/lib.rs
@@ -0,0 +1,5 @@
+pub mod api;
+pub mod launch;
+pub mod logs;
+pub mod models;
+pub mod process;
--- a/crates/g3-console/src/logs.rs
+++ b/crates/g3-console/src/logs.rs
@@ -36,7 +36,7 @@ impl LogParser {
    /// Parse logs from a workspace directory
    pub fn parse_logs(workspace: &Path) -> Result<Vec<LogEntry>> {
        let logs_dir = workspace.join("logs");
-        
+
        if !logs_dir.exists() {
            return Ok(Vec::new());
        }
@@ -47,7 +47,7 @@ impl LogParser {
        for entry in fs::read_dir(&logs_dir).context("Failed to read logs directory")? {
            let entry = entry?;
            let path = entry.path();
-            
+
            if path.extension().and_then(|s| s.to_str()) == Some("json") {
                if let Ok(content) = fs::read_to_string(&path) {
                    if let Ok(json) = serde_json::from_str::<Value>(&content) {
@@ -55,17 +55,21 @@ impl LogParser {
                        if let Some(messages) = json.get("messages").and_then(|m| m.as_array()) {
                            for msg in messages {
                                entries.push(LogEntry {
-                                    timestamp: msg.get("timestamp")
+                                    timestamp: msg
+                                        .get("timestamp")
                                        .and_then(|t| t.as_str())
                                        .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
                                        .map(|dt| dt.with_timezone(&Utc)),
-                                    role: msg.get("role")
+                                    role: msg
+                                        .get("role")
                                        .and_then(|r| r.as_str())
                                        .map(String::from),
-                                    content: msg.get("content")
+                                    content: msg
+                                        .get("content")
                                        .and_then(|c| c.as_str())
                                        .map(String::from),
-                                    tool_calls: msg.get("tool_calls")
+                                    tool_calls: msg
+                                        .get("tool_calls")
                                        .and_then(|tc| tc.as_array())
                                        .map(|arr| arr.clone()),
                                    raw: msg.clone(),
@@ -78,13 +82,11 @@ impl LogParser {
        }

        // Sort by timestamp
-        entries.sort_by(|a, b| {
-            match (&a.timestamp, &b.timestamp) {
-                (Some(t1), Some(t2)) => t1.cmp(t2),
-                (Some(_), None) => std::cmp::Ordering::Less,
-                (None, Some(_)) => std::cmp::Ordering::Greater,
-                (None, None) => std::cmp::Ordering::Equal,
-            }
+        entries.sort_by(|a, b| match (&a.timestamp, &b.timestamp) {
+            (Some(t1), Some(t2)) => t1.cmp(t2),
+            (Some(_), None) => std::cmp::Ordering::Less,
+            (None, Some(_)) => std::cmp::Ordering::Greater,
+            (None, None) => std::cmp::Ordering::Equal,
        });

        Ok(entries)
@@ -97,7 +99,7 @@ impl LogParser {
            .filter_map(|entry| {
                let role = entry.role.clone()?;
                let content = entry.content.clone()?;
-                
+
                Some(ChatMessage {
                    role,
                    content,
@@ -117,10 +119,12 @@ impl LogParser {
                    if let Some(name) = call.get("name").and_then(|n| n.as_str()) {
                        tool_calls.push(ToolCall {
                            name: name.to_string(),
-                            parameters: call.get("parameters")
+                            parameters: call
+                                .get("parameters")
                                .cloned()
                                .unwrap_or(Value::Object(serde_json::Map::new())),
-                            result: call.get("result")
+                            result: call
+                                .get("result")
                                .and_then(|r| r.as_str())
                                .map(String::from),
                            timestamp: entry.timestamp,
@@ -146,7 +150,7 @@ impl StatsAggregator {
        let total_tokens = Self::count_tokens(entries);
        let tool_calls = Self::count_tool_calls(entries);
        let errors = Self::count_errors(entries);
-        
+
        let duration_secs = if let Some(last_entry) = entries.last() {
            if let Some(last_time) = last_entry.timestamp {
                (last_time - start_time).num_seconds().max(0) as u64
@@ -193,7 +197,9 @@ impl StatsAggregator {
        entries
            .iter()
            .filter_map(|entry| {
-                entry.raw.get("usage")
+                entry
+                    .raw
+                    .get("usage")
                    .and_then(|u| u.get("total_tokens"))
                    .and_then(|t| t.as_u64())
            })
@@ -213,7 +219,11 @@ impl StatsAggregator {
            .iter()
            .filter(|entry| {
                entry.raw.get("error").is_some()
-                    || entry.content.as_ref().map(|c| c.to_lowercase().contains("error")).unwrap_or(false)
+                    || entry
+                        .content
+                        .as_ref()
+                        .map(|c| c.to_lowercase().contains("error"))
+                        .unwrap_or(false)
            })
            .count() as u64
    }
--- a/crates/g3-console/src/main.rs
+++ b/crates/g3-console/src/main.rs
@@ -1,13 +1,11 @@
-mod api;
-mod logs;
-mod models;
-mod process;
-mod launch;
+use g3_console::api;
+use g3_console::launch;
+use g3_console::process;

 use api::control::{kill_instance, launch_instance, restart_instance};
-use api::instances::{get_instance, get_file_content, list_instances};
+use api::instances::{get_file_content, get_instance, list_instances};
 use api::logs::get_instance_logs;
-use api::state::{get_state, save_state, browse_filesystem};
+use api::state::{browse_filesystem, get_state, save_state};
 use axum::{
    routing::{get, post},
    Router,
@@ -41,9 +39,7 @@ struct Args {
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    // Initialize tracing
-    tracing_subscriber::fmt()
-        .with_max_level(Level::INFO)
-        .init();
+    tracing_subscriber::fmt().with_max_level(Level::INFO).init();

    let args = Args::parse();

--- a/crates/g3-console/src/models/instance.rs
+++ b/crates/g3-console/src/models/instance.rs
@@ -1,6 +1,6 @@
+use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
 use std::path::PathBuf;
-use chrono::{DateTime, Utc};

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Instance {
--- a/crates/g3-console/src/models/message.rs
+++ b/crates/g3-console/src/models/message.rs
@@ -1,5 +1,5 @@
-use serde::{Deserialize, Serialize};
 use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ChatMessage {
--- a/crates/g3-console/src/process/controller.rs
+++ b/crates/g3-console/src/process/controller.rs
@@ -1,12 +1,12 @@
-use anyhow::{anyhow, Context, Result};
-use std::process::{Command, Stdio};
-use std::os::unix::process::CommandExt;
-use std::collections::HashMap;
-use std::sync::Mutex;
-use std::path::PathBuf;
-use sysinfo::{Pid, Signal, System, Process};
-use tracing::{debug, info};
 use crate::models::LaunchParams;
+use anyhow::{anyhow, Context, Result};
+use std::collections::HashMap;
+use std::os::unix::process::CommandExt;
+use std::path::PathBuf;
+use std::process::{Command, Stdio};
+use std::sync::Mutex;
+use sysinfo::{Pid, Process, Signal, System};
+use tracing::{debug, info};

 pub struct ProcessController {
    system: System,
@@ -27,15 +27,15 @@ impl ProcessController {

        if let Some(process) = self.system.process(sysinfo_pid) {
            info!("Killing process {} ({})", pid, process.name());
-            
+
            // Try SIGTERM first
            if process.kill_with(Signal::Term).is_some() {
                debug!("Sent SIGTERM to process {}", pid);
-                
+
                // Wait a bit and check if it's still running
                std::thread::sleep(std::time::Duration::from_secs(2));
                self.system.refresh_processes();
-                
+
                if self.system.process(sysinfo_pid).is_some() {
                    // Still running, send SIGKILL
                    if let Some(proc) = self.system.process(sysinfo_pid) {
@@ -43,7 +43,7 @@ impl ProcessController {
                        debug!("Sent SIGKILL to process {}", pid);
                    }
                }
-                
+
                Ok(())
            } else {
                Err(anyhow!("Failed to send signal to process {}", pid))
@@ -64,7 +64,7 @@ impl ProcessController {
        g3_binary_path: Option<&str>,
    ) -> Result<u32> {
        let binary = g3_binary_path.unwrap_or("g3");
-        
+
        let mut cmd = Command::new(binary);
        cmd.arg("--workspace")
            .arg(workspace)
@@ -108,36 +108,41 @@ impl ProcessController {
        }

        info!("Launching g3: {:?}", cmd);
-        
+
        // Spawn and wait for the intermediate process to exit
        let mut child = cmd.spawn().context("Failed to spawn g3 process")?;
        let intermediate_pid = child.id();
-        
+
        // Wait for intermediate process (it will exit immediately after forking)
-        child.wait().context("Failed to wait for intermediate process")?;
-        
+        child
+            .wait()
+            .context("Failed to wait for intermediate process")?;
+
        // The actual g3 process is now running as orphan
        // We need to scan for it by matching workspace and recent start time
-        info!("Scanning for newly launched g3 process in workspace: {}", workspace);
-        
+        info!(
+            "Scanning for newly launched g3 process in workspace: {}",
+            workspace
+        );
+
        // Wait even longer for the process to fully start and appear in process list
        std::thread::sleep(std::time::Duration::from_millis(2500));
-        
+
        // Refresh and scan for the process
        self.system.refresh_processes();
        let workspace_path = PathBuf::from(workspace);
        let mut found_pid = None;
-        
+
        for (pid, process) in self.system.processes() {
            let cmd = process.cmd();
            let cmd_str = cmd.join(" ");
-            
+
            // Check if this is a g3 process
            let is_g3 = process.name().contains("g3") || cmd_str.contains("g3");
            if !is_g3 {
                continue;
            }
-            
+
            // Check if it has our workspace
            let has_workspace = cmd.iter().any(|arg| {
                if let Ok(path) = PathBuf::from(arg).canonicalize() {
@@ -147,11 +152,12 @@ impl ProcessController {
                }
                false
            });
-            
+
            if has_workspace {
                // Check if it's recent (started within last 10 seconds)
                let now = std::time::SystemTime::now();
-                let start_time = std::time::UNIX_EPOCH + std::time::Duration::from_secs(process.start_time());
+                let start_time =
+                    std::time::UNIX_EPOCH + std::time::Duration::from_secs(process.start_time());
                if let Ok(duration) = now.duration_since(start_time) {
                    if duration.as_secs() < 10 {
                        found_pid = Some(pid.as_u32());
@@ -160,7 +166,7 @@ impl ProcessController {
                }
            }
        }
-        
+
        let pid = if let Some(found) = found_pid {
            found
        } else {
@@ -168,18 +174,18 @@ impl ProcessController {
            info!("Process not found on first scan, trying again...");
            std::thread::sleep(std::time::Duration::from_millis(2000));
            self.system.refresh_processes();
-            
+
            // Try the scan again with full logic
            let mut retry_found = None;
            for (pid, process) in self.system.processes() {
                let cmd = process.cmd();
                let cmd_str = cmd.join(" ");
-                
+
                let is_g3 = process.name().contains("g3") || cmd_str.contains("g3");
                if !is_g3 {
                    continue;
                }
-                
+
                let has_workspace = cmd.iter().any(|arg| {
                    if let Ok(path) = PathBuf::from(arg).canonicalize() {
                        if let Ok(ws) = workspace_path.canonicalize() {
@@ -188,18 +194,18 @@ impl ProcessController {
                    }
                    false
                });
-                
+
                if has_workspace {
                    retry_found = Some(pid.as_u32());
                    break;
                }
            }
-            
+
            retry_found.unwrap_or(intermediate_pid)
        };

        info!("Launched g3 process with PID {}", pid);
-        
+
        // Store launch params for restart
        let params = LaunchParams {
            workspace: workspace.into(),
@@ -209,14 +215,14 @@ impl ProcessController {
            autonomous,
            g3_binary_path: g3_binary_path.map(|s| s.to_string()),
        };
-        
+
        if let Ok(mut map) = self.launch_params.lock() {
            map.insert(pid, params);
        }
-        
+
        Ok(pid)
    }
-    
+
    pub fn get_launch_params(&mut self, pid: u32) -> Option<LaunchParams> {
        // First check if we have stored params (for console-launched instances)
        if let Ok(map) = self.launch_params.lock() {
@@ -224,19 +230,19 @@ impl ProcessController {
                return Some(params.clone());
            }
        }
-        
+
        // If not found, try to parse from process command line (for detected instances)
        self.system.refresh_processes();
        let sysinfo_pid = Pid::from_u32(pid);
-        
+
        if let Some(process) = self.system.process(sysinfo_pid) {
            let cmd = process.cmd();
            return self.parse_launch_params_from_cmd(cmd);
        }
-        
+
        None
    }
-    
+
    fn parse_launch_params_from_cmd(&self, cmd: &[String]) -> Option<LaunchParams> {
        let mut workspace = None;
        let mut provider = None;
@@ -244,7 +250,7 @@ impl ProcessController {
        let mut prompt = None;
        let mut autonomous = false;
        let mut g3_binary_path = None;
-        
+
        let mut i = 0;
        while i < cmd.len() {
            match cmd[i].as_str() {
@@ -273,7 +279,7 @@ impl ProcessController {
                }
            }
        }
-        
+
        // Try to determine binary path from cmd[0]
        if !cmd.is_empty() {
            let first = &cmd[0];
@@ -281,9 +287,10 @@ impl ProcessController {
                g3_binary_path = Some(first.clone());
            }
        }
-        
+
        // Only return params if we have the minimum required fields
-        if let (Some(ws), Some(prov), Some(mdl), Some(prmt)) = (workspace, provider, model, prompt) {
+        if let (Some(ws), Some(prov), Some(mdl), Some(prmt)) = (workspace, provider, model, prompt)
+        {
            Some(LaunchParams {
                workspace: ws,
                provider: prov,
--- a/crates/g3-console/src/process/detector.rs
+++ b/crates/g3-console/src/process/detector.rs
@@ -2,7 +2,7 @@ use crate::models::{ExecutionMethod, Instance, InstanceStatus, InstanceType};
 use anyhow::Result;
 use chrono::{DateTime, Utc};
 use std::path::PathBuf;
-use sysinfo::{System, Pid, Process};
+use sysinfo::{Pid, Process, System};
 use tracing::{debug, info, warn};

 pub struct ProcessDetector {
@@ -41,36 +41,37 @@ impl ProcessDetector {
        Ok(instances)
    }

-    fn parse_g3_process(
-        &self,
-        pid: Pid,
-        process: &Process,
-        cmd: &[String],
-    ) -> Option<Instance> {
+    fn parse_g3_process(&self, pid: Pid, process: &Process, cmd: &[String]) -> Option<Instance> {
        let cmd_str = cmd.join(" ");
-        
+
        // Exclude g3-console itself
        if cmd_str.contains("g3-console") {
            return None;
        }
-        
+
        // Check if this is a g3 binary (more comprehensive check)
-        let is_g3_binary = cmd.get(0).map(|s| {
-            (s.ends_with("g3") || s.ends_with("/g3") || s.contains("/target/release/g3") || s.contains("/target/debug/g3"))
-            && !s.contains("g3-") // Exclude other g3-* binaries
-        }).unwrap_or(false);
-        
+        let is_g3_binary = cmd
+            .get(0)
+            .map(|s| {
+                (s.ends_with("g3")
+                    || s.ends_with("/g3")
+                    || s.contains("/target/release/g3")
+                    || s.contains("/target/debug/g3"))
+                    && !s.contains("g3-") // Exclude other g3-* binaries
+            })
+            .unwrap_or(false);
+
        // Check if this is cargo run with g3 (not g3-console or other variants)
-        let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false) 
+        let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false)
            && cmd.iter().any(|s| s == "run")
            && !cmd_str.contains("g3-console");
-        
+
        // Also check if command line has g3-specific flags
        let has_g3_flags = cmd_str.contains("--workspace") || cmd_str.contains("--autonomous");
-        
+
        // Accept if it's a g3 binary or cargo run with g3, and has typical g3 patterns
        let is_g3_process = is_g3_binary || (is_cargo_run && has_g3_flags);
-        
+
        if !is_g3_process {
            return None;
        }
@@ -97,8 +98,8 @@ impl ProcessDetector {
        let model = self.extract_flag_value(cmd, "--model");

        // Get start time
-        let start_time = DateTime::from_timestamp(process.start_time() as i64, 0)
-            .unwrap_or_else(Utc::now);
+        let start_time =
+            DateTime::from_timestamp(process.start_time() as i64, 0).unwrap_or_else(Utc::now);

        // Generate instance ID from PID and start time
        let id = format!("{}_{}", pid, start_time.timestamp());
@@ -139,7 +140,7 @@ impl ProcessDetector {
                return Some(cwd);
            }
        }
-        
+
        #[cfg(target_os = "macos")]
        {
            // On macOS, use lsof to get the current working directory
@@ -156,9 +157,12 @@ impl ProcessDetector {
                }
            }
        }
-        
+
        // Final fallback: use current directory of console
-        warn!("Could not determine workspace for PID {}, using current directory", pid);
+        warn!(
+            "Could not determine workspace for PID {}, using current directory",
+            pid
+        );
        std::env::current_dir().ok()
    }

@@ -173,7 +177,7 @@ impl ProcessDetector {

    pub fn get_process_status(&mut self, pid: u32) -> Option<InstanceStatus> {
        self.system.refresh_all();
-        
+
        let sysinfo_pid = Pid::from_u32(pid);
        if self.system.process(sysinfo_pid).is_some() {
            Some(InstanceStatus::Running)
--- a/crates/g3-console/src/process/mod.rs
+++ b/crates/g3-console/src/process/mod.rs
@@ -1,5 +1,5 @@
-pub mod detector;
 pub mod controller;
+pub mod detector;

-pub use detector::*;
 pub use controller::*;
+pub use detector::*;
--- a/crates/g3-core/Cargo.toml
+++ b/crates/g3-core/Cargo.toml
@@ -43,6 +43,8 @@ tree-sitter-scheme = "0.24"
 streaming-iterator = "0.1"
 walkdir = "2.4"

+const_format = "0.2"
+
 [dev-dependencies]
 tempfile = "3.8"
 serial_test = "3.0"
--- a/crates/g3-core/examples/inspect_ast.rs
+++ b/crates/g3-core/examples/inspect_ast.rs
@@ -1,6 +1,6 @@
 //! Inspect tree-sitter AST structure for Rust code

-use tree_sitter::{Parser, Language};
+use tree_sitter::{Language, Parser};

 fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
    let indent_str = "  ".repeat(indent);
@@ -10,7 +10,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
    } else {
        node_text.to_string()
    };
-    
+
    println!(
        "{}{} [{}:{}] '{}'",
        indent_str,
@@ -19,7 +19,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
        node.start_position().column + 1,
        preview.replace('\n', "\\n")
    );
-    
+
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        print_tree(child, source, indent + 1);
@@ -48,7 +48,7 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
    println!("{}\n", "=".repeat(80));

    let mut parser = Parser::new();
-    let language: Language = tree_sitter_rust::language().into();
+    let language: Language = tree_sitter_rust::LANGUAGE.into();
    parser.set_language(&language)?;

    let tree = parser.parse(source_code, None).unwrap();
--- a/crates/g3-core/examples/inspect_python_ast.rs
+++ b/crates/g3-core/examples/inspect_python_ast.rs
@@ -1,6 +1,6 @@
 //! Inspect tree-sitter AST structure for Python code

-use tree_sitter::{Parser, Language};
+use tree_sitter::{Language, Parser};

 fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
    let indent_str = "  ".repeat(indent);
@@ -10,7 +10,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
    } else {
        node_text.to_string()
    };
-    
+
    println!(
        "{}{} [{}:{}] '{}'",
        indent_str,
@@ -19,7 +19,7 @@ fn print_tree(node: tree_sitter::Node, source: &str, indent: usize) {
        node.start_position().column + 1,
        preview.replace('\n', "\\n")
    );
-    
+
    let mut cursor = node.walk();
    for child in node.children(&mut cursor) {
        print_tree(child, source, indent + 1);
@@ -46,7 +46,7 @@ class MyClass:
    println!("{}\n", "=".repeat(80));

    let mut parser = Parser::new();
-    let language: Language = tree_sitter_python::language().into();
+    let language: Language = tree_sitter_python::LANGUAGE.into();
    parser.set_language(&language)?;

    let tree = parser.parse(source_code, None).unwrap();
--- a/crates/g3-core/examples/test_python_query.rs
+++ b/crates/g3-core/examples/test_python_query.rs
@@ -1,6 +1,7 @@
 //! Test Python async query

-use tree_sitter::{Parser, Query, QueryCursor, Language};
+use streaming_iterator::StreamingIterator;
+use tree_sitter::{Language, Parser, Query, QueryCursor};

 fn main() -> anyhow::Result<()> {
    let source_code = r#"
@@ -12,7 +13,7 @@ async def async_function():
 "#;

    let mut parser = Parser::new();
-    let language: Language = tree_sitter_python::language().into();
+    let language: Language = tree_sitter_python::LANGUAGE.into();
    parser.set_language(&language)?;

    let tree = parser.parse(source_code, None).unwrap();
--- a/crates/g3-core/src/code_search/searcher.rs
+++ b/crates/g3-core/src/code_search/searcher.rs
@@ -3,8 +3,8 @@ use anyhow::{anyhow, Result};
 use std::collections::HashMap;
 use std::fs;
 use std::path::Path;
-use tree_sitter::{Language, Parser, Query, QueryCursor};
 use streaming_iterator::StreamingIterator;
+use tree_sitter::{Language, Parser, Query, QueryCursor};
 use walkdir::WalkDir;

 pub struct TreeSitterSearcher {
@@ -47,10 +47,11 @@ impl TreeSitterSearcher {
                .set_language(&language)
                .map_err(|e| anyhow!("Failed to set JavaScript language: {}", e))?;
            parsers.insert("javascript".to_string(), parser);
-            
+
            // Create separate parser for "js" alias
            let mut parser_js = Parser::new();
-            parser_js.set_language(&language)
+            parser_js
+                .set_language(&language)
                .map_err(|e| anyhow!("Failed to set JavaScript language: {}", e))?;
            parsers.insert("js".to_string(), parser_js);
            languages.insert("javascript".to_string(), language.clone());
@@ -65,10 +66,11 @@ impl TreeSitterSearcher {
                .set_language(&language)
                .map_err(|e| anyhow!("Failed to set TypeScript language: {}", e))?;
            parsers.insert("typescript".to_string(), parser);
-            
+
            // Create separate parser for "ts" alias
            let mut parser_ts = Parser::new();
-            parser_ts.set_language(&language)
+            parser_ts
+                .set_language(&language)
                .map_err(|e| anyhow!("Failed to set TypeScript language: {}", e))?;
            parsers.insert("ts".to_string(), parser_ts);
            languages.insert("typescript".to_string(), language.clone());
@@ -215,8 +217,8 @@ impl TreeSitterSearcher {
            .ok_or_else(|| anyhow!("Language not found: {}", spec.language))?;

        // Parse query
-        let query = Query::new(language, &spec.query)
-            .map_err(|e| anyhow!("Invalid query: {}", e))?;
+        let query =
+            Query::new(language, &spec.query).map_err(|e| anyhow!("Invalid query: {}", e))?;

        let mut matches = Vec::new();
        let mut files_searched = 0;
@@ -255,11 +257,8 @@ impl TreeSitterSearcher {
                if let Ok(source_code) = fs::read_to_string(path) {
                    if let Some(tree) = parser.parse(&source_code, None) {
                        let mut cursor = QueryCursor::new();
-                        let mut query_matches = cursor.matches(
-                            &query,
-                            tree.root_node(),
-                            source_code.as_bytes(),
-                        );
+                        let mut query_matches =
+                            cursor.matches(&query, tree.root_node(), source_code.as_bytes());

                        query_matches.advance();
                        while let Some(query_match) = query_matches.get() {
@@ -308,7 +307,7 @@ impl TreeSitterSearcher {
                                captures: captures_map,
                                context,
                            });
-                            
+
                            query_matches.advance();
                        }
                    }
--- a/crates/g3-core/src/error_handling.rs
+++ b/crates/g3-core/src/error_handling.rs
@@ -106,15 +106,15 @@ impl ErrorContext {
        error!("Session ID: {:?}", self.session_id);
        error!("Context Tokens: {}", self.context_tokens);
        error!("Last Prompt: {}", self.last_prompt);
-        
+
        if let Some(ref req) = self.raw_request {
            error!("Raw Request: {}", req);
        }
-        
+
        if let Some(ref resp) = self.raw_response {
            error!("Raw Response: {}", resp);
        }
-        
+
        error!("Stack Trace:\n{}", self.stack_trace);
        error!("=== END ERROR DETAILS ===");

@@ -191,23 +191,36 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
    let error_str = error.to_string().to_lowercase();

    // Check for recoverable error patterns
-    if error_str.contains("rate limit") || error_str.contains("rate_limit") || error_str.contains("429") {
+    if error_str.contains("rate limit")
+        || error_str.contains("rate_limit")
+        || error_str.contains("429")
+    {
        return ErrorType::Recoverable(RecoverableError::RateLimit);
    }

-    if error_str.contains("network") || error_str.contains("connection") || 
-       error_str.contains("dns") || error_str.contains("refused") {
+    if error_str.contains("network")
+        || error_str.contains("connection")
+        || error_str.contains("dns")
+        || error_str.contains("refused")
+    {
        return ErrorType::Recoverable(RecoverableError::NetworkError);
    }

-    if error_str.contains("500") || error_str.contains("502") || 
-       error_str.contains("503") || error_str.contains("504") ||
-       error_str.contains("server error") || error_str.contains("internal error") {
+    if error_str.contains("500")
+        || error_str.contains("502")
+        || error_str.contains("503")
+        || error_str.contains("504")
+        || error_str.contains("server error")
+        || error_str.contains("internal error")
+    {
        return ErrorType::Recoverable(RecoverableError::ServerError);
    }

-    if error_str.contains("busy") || error_str.contains("overloaded") || 
-       error_str.contains("capacity") || error_str.contains("unavailable") {
+    if error_str.contains("busy")
+        || error_str.contains("overloaded")
+        || error_str.contains("capacity")
+        || error_str.contains("unavailable")
+    {
        return ErrorType::Recoverable(RecoverableError::ModelBusy);
    }

@@ -216,18 +229,24 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
       error_str.contains("timed out") || 
       error_str.contains("operation timed out") ||
       error_str.contains("request or response body error") ||  // Common timeout pattern
-       error_str.contains("stream error") && error_str.contains("timed out") {
+       error_str.contains("stream error") && error_str.contains("timed out")
+    {
        return ErrorType::Recoverable(RecoverableError::Timeout);
    }

    // Check for context length exceeded errors (HTTP 400 with specific messages)
-    if (error_str.contains("400") || error_str.contains("bad request")) &&
-       (error_str.contains("context length") || error_str.contains("prompt is too long") ||
-        error_str.contains("maximum context length") || error_str.contains("context_length_exceeded")) {
+    if (error_str.contains("400") || error_str.contains("bad request"))
+        && (error_str.contains("context length")
+            || error_str.contains("prompt is too long")
+            || error_str.contains("maximum context length")
+            || error_str.contains("context_length_exceeded"))
+    {
        return ErrorType::Recoverable(RecoverableError::ContextLengthExceeded);
    }

-    if error_str.contains("token") && (error_str.contains("limit") || error_str.contains("exceeded")) {
+    if error_str.contains("token")
+        && (error_str.contains("limit") || error_str.contains("exceeded"))
+    {
        return ErrorType::Recoverable(RecoverableError::TokenLimit);
    }

@@ -239,12 +258,14 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
 fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
    use rand::Rng;
    let mut rng = rand::thread_rng();
-    
+
    // Distribute 6 retries over 10 minutes (600 seconds)
    // Base delays: 10s, 30s, 60s, 120s, 180s, 200s = 600s total
    let base_delays_ms = [10000, 30000, 60000, 120000, 180000, 200000];
-    let base_delay = base_delays_ms.get(attempt.saturating_sub(1) as usize).unwrap_or(&200000);
-    
+    let base_delay = base_delays_ms
+        .get(attempt.saturating_sub(1) as usize)
+        .unwrap_or(&200000);
+
    // Add jitter of ±30% to prevent thundering herd
    let jitter = (*base_delay as f64 * 0.3 * rng.gen::<f64>()) as u64;
    let final_delay = if rng.gen_bool(0.5) {
@@ -252,7 +273,7 @@ fn calculate_autonomous_retry_delay(attempt: u32) -> Duration {
    } else {
        base_delay.saturating_sub(jitter)
    };
-    
+
    Duration::from_millis(final_delay)
 }

@@ -261,14 +282,18 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
    if is_autonomous {
        return calculate_autonomous_retry_delay(attempt);
    }
-    
+
    use rand::Rng;
-    let max_retry_delay_ms = if is_autonomous { AUTONOMOUS_MAX_RETRY_DELAY_MS } else { DEFAULT_MAX_RETRY_DELAY_MS };
-    
+    let max_retry_delay_ms = if is_autonomous {
+        AUTONOMOUS_MAX_RETRY_DELAY_MS
+    } else {
+        DEFAULT_MAX_RETRY_DELAY_MS
+    };
+
    // Exponential backoff: delay = base * 2^attempt
    let base_delay = BASE_RETRY_DELAY_MS * (2_u64.pow(attempt.saturating_sub(1)));
    let capped_delay = base_delay.min(max_retry_delay_ms);
-    
+
    // Add jitter to prevent thundering herd
    let mut rng = rand::thread_rng();
    let jitter = (capped_delay as f64 * JITTER_FACTOR * rng.gen::<f64>()) as u64;
@@ -277,7 +302,7 @@ pub fn calculate_retry_delay(attempt: u32, is_autonomous: bool) -> Duration {
    } else {
        capped_delay.saturating_sub(jitter)
    };
-    
+
    Duration::from_millis(final_delay)
 }

@@ -298,7 +323,7 @@ where

    loop {
        attempt += 1;
-        
+
        match operation().await {
            Ok(result) => {
                if attempt > 1 {
@@ -321,19 +346,19 @@ where
                            context.clone().log_error(&error);
                            return Err(error);
                        }
-                        
+
                        let delay = calculate_retry_delay(attempt, is_autonomous);
                        warn!(
                            "Recoverable error ({:?}) in '{}' (attempt {}/{}). Retrying in {:?}...",
                            recoverable_type, operation_name, attempt, max_attempts, delay
                        );
                        warn!("Error details: {}", error);
-                        
+
                        // Special handling for token limit errors
                        if matches!(recoverable_type, RecoverableError::TokenLimit) {
                            info!("Token limit error detected. Consider triggering summarization.");
                        }
-                        
+
                        tokio::time::sleep(delay).await;
                        _last_error = Some(error);
                    }
@@ -359,18 +384,22 @@ fn truncate_for_logging(s: &str, max_len: usize) -> String {
        // Find a safe UTF-8 boundary to truncate at
        // We need to ensure we don't cut in the middle of a multi-byte character
        let mut truncate_at = max_len;
-        
+
        // Walk backwards from max_len to find a character boundary
        while truncate_at > 0 && !s.is_char_boundary(truncate_at) {
            truncate_at -= 1;
        }
-        
+
        // If we couldn't find a boundary (shouldn't happen), use a safe default
        if truncate_at == 0 {
            truncate_at = max_len.min(s.len());
        }
-        
-        format!("{}... (truncated, {} total bytes)", &s[..truncate_at], s.len())
+
+        format!(
+            "{}... (truncated, {} total bytes)",
+            &s[..truncate_at],
+            s.len()
+        )
    }
 }

@@ -398,42 +427,69 @@ mod tests {
    fn test_error_classification() {
        // Rate limit errors
        let error = anyhow!("Rate limit exceeded");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::RateLimit)
+        );
+
        let error = anyhow!("HTTP 429 Too Many Requests");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::RateLimit));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::RateLimit)
+        );
+
        // Network errors
        let error = anyhow!("Network connection failed");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::NetworkError));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::NetworkError)
+        );
+
        // Server errors
        let error = anyhow!("HTTP 503 Service Unavailable");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ServerError));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ServerError)
+        );
+
        // Model busy
        let error = anyhow!("Model is busy, please try again");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ModelBusy));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ModelBusy)
+        );
+
        // Timeout
        let error = anyhow!("Request timed out");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::Timeout));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::Timeout)
+        );
+
        // Token limit
        let error = anyhow!("Token limit exceeded");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::TokenLimit));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::TokenLimit)
+        );
+
        // Context length exceeded
        let error = anyhow!("HTTP 400 Bad Request: context length exceeded");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
+        );
+
        let error = anyhow!("Error 400: prompt is too long");
-        assert_eq!(classify_error(&error), ErrorType::Recoverable(RecoverableError::ContextLengthExceeded));
-        
+        assert_eq!(
+            classify_error(&error),
+            ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
+        );
+
        // Non-recoverable
        let error = anyhow!("Invalid API key");
        assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
-        
+
        let error = anyhow!("Malformed request");
        assert_eq!(classify_error(&error), ErrorType::NonRecoverable);
    }
@@ -444,17 +500,17 @@ mod tests {
        let delay1 = calculate_retry_delay(1, false);
        let delay2 = calculate_retry_delay(2, false);
        let delay3 = calculate_retry_delay(3, false);
-        
+
        // Due to jitter, we can't test exact values, but the base should increase
        assert!(delay1.as_millis() >= (BASE_RETRY_DELAY_MS as f64 * 0.7) as u128);
        assert!(delay1.as_millis() <= (BASE_RETRY_DELAY_MS as f64 * 1.3) as u128);
-        
+
        // Delay 2 should be roughly 2x delay 1 (minus jitter)
        assert!(delay2.as_millis() >= delay1.as_millis());
-        
+
        // Delay 3 should be roughly 2x delay 2 (minus jitter)
        assert!(delay3.as_millis() >= delay2.as_millis());
-        
+
        // Test max cap
        let delay_max = calculate_retry_delay(10, false);
        assert!(delay_max.as_millis() <= (DEFAULT_MAX_RETRY_DELAY_MS as f64 * 1.3) as u128);
@@ -469,7 +525,7 @@ mod tests {
        let delay4 = calculate_retry_delay(4, true);
        let delay5 = calculate_retry_delay(5, true);
        let delay6 = calculate_retry_delay(6, true);
-        
+
        // Base delays should be around: 10s, 30s, 60s, 120s, 180s, 200s
        // With ±30% jitter
        assert!(delay1.as_millis() >= 7000 && delay1.as_millis() <= 13000);
@@ -484,14 +540,14 @@ mod tests {
    fn test_truncate_for_logging() {
        let short_text = "Hello, world!";
        assert_eq!(truncate_for_logging(short_text, 20), "Hello, world!");
-        
+
        let long_text = "This is a very long text that should be truncated for logging purposes";
        let truncated = truncate_for_logging(long_text, 20);
        assert!(truncated.starts_with("This is a very long "));
        assert!(truncated.contains("truncated"));
        assert!(truncated.contains("total bytes"));
    }
-    
+
    #[test]
    fn test_truncate_with_multibyte_chars() {
        // Test with multi-byte UTF-8 characters
@@ -499,7 +555,7 @@ mod tests {
        let truncated = truncate_for_logging(text_with_emoji, 10);
        // Should truncate at a valid UTF-8 boundary
        assert!(truncated.starts_with("Hello "));
-        
+
        // Test with box-drawing characters like the one causing the panic
        let text_with_box = "Some text ┌─────┐ more text";
        let truncated = truncate_for_logging(text_with_box, 12);
--- a/crates/g3-core/src/error_handling_test.rs
+++ b/crates/g3-core/src/error_handling_test.rs
@@ -17,7 +17,7 @@ mod tests {
            "test prompt".to_string(),
            None,
            100,
-            false,  // quiet parameter
+            false, // quiet parameter
        );

        let result = retry_with_backoff(
@@ -57,7 +57,7 @@ mod tests {
            "test prompt".to_string(),
            None,
            100,
-            false,  // quiet parameter
+            false, // quiet parameter
        );

        let result: Result<&str, _> = retry_with_backoff(
@@ -91,7 +91,7 @@ mod tests {
            "test prompt".to_string(),
            None,
            100,
-            false,  // quiet parameter
+            false, // quiet parameter
        );

        let result: Result<&str, _> = retry_with_backoff(
@@ -124,7 +124,7 @@ mod tests {
            long_prompt,
            None,
            100,
-            false,  // quiet parameter
+            false, // quiet parameter
        );

        // The prompt should be truncated to 1000 chars
--- a/crates/g3-core/src/fixed_filter_json.rs
+++ b/crates/g3-core/src/fixed_filter_json.rs
@@ -5,7 +5,7 @@
 // 4. Return everything else as the final filtered string

 //! JSON tool call filtering for streaming LLM responses.
-//! 
+//!
 //! This module filters out JSON tool calls from LLM output streams while preserving
 //! regular text content. It uses a state machine to handle streaming chunks.

@@ -29,7 +29,7 @@ struct FixedJsonToolState {
    brace_depth: i32,
    buffer: String,
    json_start_in_buffer: Option<usize>, // Position where confirmed JSON tool call starts
-    content_returned_up_to: usize, // Track how much content we've already returned
+    content_returned_up_to: usize,       // Track how much content we've already returned
    potential_json_start: Option<usize>, // Where the potential JSON started
 }

--- a/crates/g3-core/src/fixed_filter_tests.rs
+++ b/crates/g3-core/src/fixed_filter_tests.rs
@@ -358,8 +358,8 @@ More text"#;
        // 2. Then the same complete JSON appears
        let chunks = vec![
            "Some text\n",
-            r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli"#,  // Truncated
-            r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli/src/lib.rs"}}"#,  // Complete
+            r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli"#, // Truncated
+            r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli/src/lib.rs"}}"#, // Complete
            "\nMore text",
        ];

--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
--- a/crates/g3-core/src/project.rs
+++ b/crates/g3-core/src/project.rs
@@ -7,19 +7,19 @@ use std::path::{Path, PathBuf};
 pub struct Project {
    /// The workspace directory for the project
    pub workspace_dir: PathBuf,
-    
+
    /// Path to the requirements document (for autonomous mode)
    pub requirements_path: Option<PathBuf>,
-    
+
    /// Override requirements text (takes precedence over requirements_path)
    pub requirements_text: Option<String>,
-    
+
    /// Whether the project is in autonomous mode
    pub autonomous: bool,
-    
+
    /// Project name (derived from workspace directory name)
    pub name: String,
-    
+
    /// Session ID for tracking
    pub session_id: Option<String>,
 }
@@ -32,7 +32,7 @@ impl Project {
            .and_then(|n| n.to_str())
            .unwrap_or("unnamed")
            .to_string();
-        
+
        Self {
            workspace_dir,
            requirements_path: None,
@@ -42,33 +42,36 @@ impl Project {
            session_id: None,
        }
    }
-    
+
    /// Create a project for autonomous mode
    pub fn new_autonomous(workspace_dir: PathBuf) -> Result<Self> {
        let mut project = Self::new(workspace_dir.clone());
        project.autonomous = true;
-        
+
        // Look for requirements.md in the workspace directory
        let requirements_path = workspace_dir.join("requirements.md");
        if requirements_path.exists() {
            project.requirements_path = Some(requirements_path);
        }
-        
+
        Ok(project)
    }
-    
+
    /// Create a project for autonomous mode with requirements text override
-    pub fn new_autonomous_with_requirements(workspace_dir: PathBuf, requirements_text: String) -> Result<Self> {
+    pub fn new_autonomous_with_requirements(
+        workspace_dir: PathBuf,
+        requirements_text: String,
+    ) -> Result<Self> {
        let mut project = Self::new(workspace_dir.clone());
        project.autonomous = true;
        project.requirements_text = Some(requirements_text);
-        
+
        // Don't look for requirements.md file when text is provided
        // The text override takes precedence
-        
+
        Ok(project)
    }
-    
+
    /// Set the workspace directory and update related paths
    pub fn set_workspace(&mut self, workspace_dir: PathBuf) {
        self.workspace_dir = workspace_dir.clone();
@@ -77,7 +80,7 @@ impl Project {
            .and_then(|n| n.to_str())
            .unwrap_or("unnamed")
            .to_string();
-        
+
        // Update requirements path if in autonomous mode
        if self.autonomous {
            let requirements_path = workspace_dir.join("requirements.md");
@@ -86,61 +89,18 @@ impl Project {
            }
        }
    }
-    
+
    /// Get the workspace directory
    pub fn workspace(&self) -> &Path {
        &self.workspace_dir
    }
-    
+
    /// Check if requirements file exists
    pub fn has_requirements(&self) -> bool {
        // Has requirements if either text override is provided or requirements file exists
        self.requirements_text.is_some() || self.requirements_path.is_some()
    }
-    
-    /// Check if implementation files exist in the workspace
-    pub fn has_implementation_files(&self) -> bool {
-        self.check_dir_for_implementation_files(&self.workspace_dir)
-    }
-    
-    /// Recursively check a directory for implementation files
-    #[allow(clippy::only_used_in_recursion)]
-    fn check_dir_for_implementation_files(&self, dir: &Path) -> bool {
-        // Common source file extensions
-        let extensions = vec![
-            "swift", "rs", "py", "js", "ts", "java", "cpp", "c",
-            "go", "rb", "php", "cs", "kt", "scala", "m", "h"
-        ];
-        
-        if let Ok(entries) = std::fs::read_dir(dir) {
-            for entry in entries.flatten() {
-                let path = entry.path();
-                
-                if path.is_file() {
-                    // Check if it's a source file
-                    if let Some(ext) = path.extension() {
-                        if let Some(ext_str) = ext.to_str() {
-                            if extensions.contains(&ext_str) {
-                                return true;
-                            }
-                        }
-                    }
-                } else if path.is_dir() {
-                    // Skip hidden directories and common non-source directories
-                    if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
-                        if !name.starts_with('.') && name != "logs" && name != "target" && name != "node_modules" {
-                            // Recursively check subdirectories
-                            if self.check_dir_for_implementation_files(&path) {
-                                return true;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-        false
-    }
-    
+
    /// Read the requirements file content
    pub fn read_requirements(&self) -> Result<Option<String>> {
        // Prioritize requirements text override
@@ -153,7 +113,7 @@ impl Project {
            Ok(None)
        }
    }
-    
+
    /// Create the workspace directory if it doesn't exist
    pub fn ensure_workspace_exists(&self) -> Result<()> {
        if !self.workspace_dir.exists() {
@@ -161,18 +121,18 @@ impl Project {
        }
        Ok(())
    }
-    
+
    /// Change to the workspace directory
    pub fn enter_workspace(&self) -> Result<()> {
        std::env::set_current_dir(&self.workspace_dir)?;
        Ok(())
    }
-    
+
    /// Get the logs directory for the project
    pub fn logs_dir(&self) -> PathBuf {
        self.workspace_dir.join("logs")
    }
-    
+
    /// Ensure the logs directory exists
    pub fn ensure_logs_dir(&self) -> Result<()> {
        let logs_dir = self.logs_dir();
@@ -181,4 +141,4 @@ impl Project {
        }
        Ok(())
    }
-}
+}
--- a/crates/g3-core/src/prompts.rs
+++ b/crates/g3-core/src/prompts.rs
@@ -0,0 +1,374 @@
+use const_format::concatcp;
+const CODING_STYLE: &'static str = "# IMPORTANT FOR CODING:
+It is very important that you adhere to these principles when writing code. I will use a code quality tool to assess the code you have generated.
+
+### Most important for coding: Specific guideline for code design:
+
+- Functions and methods should be short - at most 80 lines, ideally under 40.
+- Classes should be modular and composable. They should not have more than 20 methods.
+- Do not write deeply nested (above 6 levels deep) ‘if’, ‘match’ or ‘case’ statements, rather refactor into separate logical sections or functions.
+- Code should be written such that it is maintainable and testable.
+- For Rust code write *ALL* test code into a ‘tests’ directory that is a peer to the ‘src’ of each crate, and is for testing code in that crate.
+- For Python code write *ALL* test code into a top level ‘tests’ directory.
+- Each non-trivial function should have test coverage. DO NOT WRITE TESTS FOR INDIVIDUAL FUNCTIONS / METHODS / CLASSES unless they are large and important. Instead write something
+at a higher level of abstraction, closer to an integration test.
+- Write tests in separate files, where the filename should match the main implementation and adding a “_test” suffix.
+
+### Important for coding: General guidelines for code design:
+
+Keep the code as simple as possible, with few if any external dependencies.
+DRY (Don’t repeat yourself) - each small piece code may only occur exactly once in the entire system.
+KISS (Keep it simple, stupid!) - keep each small piece of software simple and unnecessary complexity should be avoided.
+YAGNI (You ain’t gonna need it) - Always implement things when you actually need them never implements things before you need them.
+
+Use Descriptive Names for Code Elements. - As a rule of thumb, use more descriptive names for larger scopes. e.g., name a loop counter variable “i” is good when the scope of the loop is a single line. But don’t name some class field or method parameter “i”.
+
+When modifying an existing code base, do not unnecessarily refactor or modify code that is not directly relevant to the current coding task. It is fine to do so if new code calls/is called by the new functionality, or you prevent code duplication when new functionality is added.
+If possible constrain the side-effects on other pieces of code if possible, this is part of the principle of modularity.
+
+### Important for coding: General advice on designing algorithms:
+
+If possible, consider the \"Gang of Four\" design patterns when writing code.
+
+The Gang of Four (GOF) patterns are set of 23 common software design patterns introduced in the book
+\"Design Patterns: Elements of Reusable Object-Oriented Software\".
+
+These patterns categorize into three main groups:
+
+1. Creational Patterns
+2. Structural Patterns
+3. Behavioral Patterns
+
+These patterns provide solutions to common design problems and help make software systems more modular, flexible and maintainable. Consider using these patterns in your code design.";
+
+const SYSTEM_NATIVE_TOOL_CALLS: &'static str =
+"You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals.
+
+You have access to tools. When you need to accomplish a task, you MUST use the appropriate tool. Do not just describe what you would do - actually use the tools.
+
+IMPORTANT: You must call tools to achieve goals. When you receive a request:
+1. Analyze and identify what needs to be done
+2. Call the appropriate tool with the required parameters
+3. Continue or complete the task based on the result
+4. If you repeatedly try something and it fails, try a different approach
+5. Call the final_output tool with a detailed summary when done.
+
+For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\".
+If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
+
+# Task Management with TODO Tools
+
+**REQUIRED for multi-step tasks.** Use TODO tools when your task involves ANY of:
+- Multiple files to create/modify (2+)
+- Multiple distinct steps (3+)
+- Dependencies between steps
+- Testing or verification needed
+- Uncertainty about approach
+
+## Workflow
+
+Every multi-step task follows this pattern:
+1. **Start**: Call todo_read, then todo_write to create your plan
+2. **During**: Execute steps, then todo_read and todo_write to mark progress
+3. **End**: Call todo_read to verify all items complete
+    
+Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory.
+
+IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
+`{{Based on the requirements file with SHA256: <SHA>}}`
+This ensures the TODO list is tracked against the specific version of requirements it was generated from.
+
+## Examples
+
+**Example 1: Feature Implementation**
+User asks: \"Add user authentication with tests\"
+
+First action:
+{\"tool\": \"todo_read\", \"args\": {}}
+
+Then create plan:
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n  - [ ] Create User struct\\n  - [ ] Add login endpoint\\n  - [ ] Add password hashing\\n  - [ ] Write unit tests\\n  - [ ] Write integration tests\"}}
+
+After completing User struct:
+{\"tool\": \"todo_read\", \"args\": {}}
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n  - [x] Create User struct\\n  - [ ] Add login endpoint\\n  - [ ] Add password hashing\\n  - [ ] Write unit tests\\n  - [ ] Write integration tests\"}}
+
+**Example 2: Bug Fix**
+User asks: \"Fix the memory leak in cache module\"
+
+{\"tool\": \"todo_read\", \"args\": {}}
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Fix memory leak\\n  - [ ] Review cache.rs\\n  - [ ] Check for unclosed resources\\n  - [ ] Add drop implementation\\n  - [ ] Write test to verify fix\"}}
+
+**Example 3: Refactoring**
+User asks: \"Refactor database layer to use async/await\"
+
+{\"tool\": \"todo_read\", \"args\": {}}
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Refactor to async\\n  - [ ] Update function signatures\\n  - [ ] Replace blocking calls\\n  - [ ] Update all callers\\n  - [ ] Update tests\"}}
+
+## Format
+
+Use markdown checkboxes:
+- \"- [ ]\" for incomplete tasks
+- \"- [x]\" for completed tasks
+- Indent with 2 spaces for subtasks
+
+Keep items short, specific, and action-oriented.
+
+## Benefits
+
+✓ Prevents missed steps
+✓ Makes progress visible
+✓ Helps recover from interruptions
+✓ Creates better summaries
+
+## When NOT to Use
+
+Skip TODO tools for simple single-step tasks:
+- \"List files\" → just use shell
+- \"Read config.json\" → just use read_file
+- \"Search for functions\" → just use code_search
+
+If you can complete it with 1-2 tool calls, skip TODO.
+
+# Code Search Guidelines
+
+IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.
+If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
+
+# Code Search Guidelines
+
+IMPORTANT: When searching for code constructs (functions, classes, methods, structs, etc.), ALWAYS use `code_search` instead of shell grep/rg.
+It's syntax-aware and finds actual code, not comments or strings. Only use shell grep for:
+  - Searching non-code files (logs, markdown, text)
+  - Simple string searches across all file types
+  - When you need regex for text content (not code structure)
+
+Common code_search query patterns:
+
+**Rust:**
+  - All functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\"}]}}
+  - Async functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"async_fns\", \"query\": \"(function_item (function_modifiers) name: (identifier) @name)\", \"language\": \"rust\"}]}}
+  - Structs: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
+  - Enums: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"enums\", \"query\": \"(enum_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
+  - Impl blocks: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"impls\", \"query\": \"(impl_item type: (type_identifier) @name)\", \"language\": \"rust\"}]}}
+
+**Python:**
+  - Functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_definition name: (identifier) @name)\", \"language\": \"python\"}]}}
+  - Classes: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"classes\", \"query\": \"(class_definition name: (identifier) @name)\", \"language\": \"python\"}]}}
+
+**JavaScript/TypeScript:**
+  - Functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_declaration name: (identifier) @name)\", \"language\": \"javascript\"}]}}
+  - Classes: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"classes\", \"query\": \"(class_declaration name: (identifier) @name)\", \"language\": \"javascript\"}]}}
+  - Arrow functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"arrow_fns\", \"query\": \"(arrow_function) @fn\", \"language\": \"javascript\"}]}}
+
+**Go:**
+  - Functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"functions\", \"query\": \"(function_declaration name: (identifier) @name)\", \"language\": \"go\"}]}}
+  - Methods: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"methods\", \"query\": \"(method_declaration name: (field_identifier) @name)\", \"language\": \"go\"}]}}
+
+**Java/C++:**
+  - Classes: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"classes\", \"query\": \"(class_declaration name: (identifier) @name)\", \"language\": \"java\"}]}}
+  - Methods: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"methods\", \"query\": \"(method_declaration name: (identifier) @name)\", \"language\": \"java\"}]}}
+
+**Advanced features:**
+  - Multiple searches: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\"}, {\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
+  - With context: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"context_lines\": 3}]}}
+  - Specific paths: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"paths\": [\"src/core\"]}]}}
+
+
+IMPORTANT: If the user asks you to just respond with text (like \"just say hello\" or \"tell me about X\"), do NOT use tools. Simply respond with the requested text directly. Only use tools when you need to execute commands or complete tasks that require action.
+
+When taking screenshots of specific windows (like \"my Safari window\" or \"my terminal\"), ALWAYS use list_windows first to identify the correct window ID, then use take_screenshot with the window_id parameter.
+
+Do not explain what you're going to do - just do it by calling the tools.
+
+
+# Response Guidelines
+
+- Use Markdown formatting for all responses except tool calls.
+- Whenever taking actions, use the pronoun 'I'
+";
+
+pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
+    concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
+
+/// Generate system prompt based on whether multiple tool calls are allowed
+pub fn get_system_prompt_for_native(allow_multiple: bool) -> String {
+    if allow_multiple {
+        // Replace the "ONE tool" instruction with multiple tools instruction
+        let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
+        base.replace(
+            "2. Call the appropriate tool with the required parameters",
+            "2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate. 
+              <use_parallel_tool_calls>
+  For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
+  </use_parallel_tool_calls>
+"
+        )
+    } else {
+        SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
+    }
+}
+
+const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
+"You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.
+
+You have access to tools. When you need to accomplish a task, you MUST use the appropriate tool. Do not just describe what you would do - actually use the tools.
+
+# Tool Call Format
+
+When you need to execute a tool, write ONLY the JSON tool call on a new line:
+
+{\"tool\": \"tool_name\", \"args\": {\"param\": \"value\"}
+
+The tool will execute immediately and you'll receive the result (success or error) to continue with.
+
+# Available Tools
+
+Short description for providers without native calling specs:
+
+- **shell**: Execute shell commands
+  - Format: {\"tool\": \"shell\", \"args\": {\"command\": \"your_command_here\"}
+  - Example: {\"tool\": \"shell\", \"args\": {\"command\": \"ls ~/Downloads\"}
+
+- **read_file**: Read the contents of a file (supports partial reads via start/end)
+  - Format: {\"tool\": \"read_file\", \"args\": {\"file_path\": \"path/to/file\", \"start\": 0, \"end\": 100}
+  - Example: {\"tool\": \"read_file\", \"args\": {\"file_path\": \"src/main.rs\"}
+  - Example (partial): {\"tool\": \"read_file\", \"args\": {\"file_path\": \"large.log\", \"start\": 0, \"end\": 1000}
+
+- **write_file**: Write content to a file (creates or overwrites)
+  - Format: {\"tool\": \"write_file\", \"args\": {\"file_path\": \"path/to/file\", \"content\": \"file content\"}
+  - Example: {\"tool\": \"write_file\", \"args\": {\"file_path\": \"src/lib.rs\", \"content\": \"pub fn hello() {}\"}
+
+- **str_replace**: Replace text in a file using a diff
+  - Format: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"path/to/file\", \"diff\": \"--- old\\n-old text\\n+++ new\\n+new text\"}
+  - Example: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"src/main.rs\", \"diff\": \"--- old\\n-old_code();\\n+++ new\\n+new_code();\"}
+
+- **final_output**: Signal task completion with a detailed summary of work done in markdown format
+  - Format: {\"tool\": \"final_output\", \"args\": {\"summary\": \"what_was_accomplished\"}
+
+- **todo_read**: Read the entire TODO list from todo.g3.md file in workspace directory
+  - Format: {\"tool\": \"todo_read\", \"args\": {}}
+  - Example: {\"tool\": \"todo_read\", \"args\": {}}
+
+- **todo_write**: Write or overwrite the entire todo.g3.md file (WARNING: overwrites completely, always read first)
+  - Format: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Task 1\\n- [ ] Task 2\"}}
+  - Example: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Implement feature\\n  - [ ] Write tests\\n  - [ ] Run tests\"}}
+
+- **code_search**: Syntax-aware code search using tree-sitter. Supports Rust, Python, JavaScript, TypeScript.
+  - Format: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"label\", \"query\": \"tree-sitter query\", \"language\": \"rust|python|javascript|typescript\", \"paths\": [\"src/\"], \"context_lines\": 0}]}}
+  - Find functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_functions\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"paths\": [\"src/\"]}]}}
+  - Find async functions: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"find_async\", \"query\": \"(function_item (function_modifiers) name: (identifier) @name)\", \"language\": \"rust\"}]}}
+  - Find structs: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
+  - Multiple searches: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\"}, {\"name\": \"structs\", \"query\": \"(struct_item name: (type_identifier) @name)\", \"language\": \"rust\"}]}}
+  - With context lines: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"funcs\", \"query\": \"(function_item name: (identifier) @name)\", \"language\": \"rust\", \"context_lines\": 3}]}}
+       - \"context\": 3 (show surrounding lines),
+       - \"json_style\": \"stream\" (for large results)
+
+# Instructions
+
+1. Analyze the request and break down into smaller tasks if appropriate
+2. Execute ONE tool at a time. An exception exists for when you're writing files. See below.
+3. STOP when the original request was satisfied
+4. Call the final_output tool when done
+
+For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense.
+
+Exception to using ONE tool at a time:
+If all you’re doing is WRITING files, and you don’t need to do anything else between each step.
+You can issue MULTIPLE write_file tool calls in a request, however you may ONLY make a SINGLE write_file call for any file in that request.
+For example you may call:
+[START OF REQUEST]
+write_file(\"helper.rs\", \"...\")
+write_file(\"file2.txt\", \"...\")
+[DONE]
+
+But NOT:
+[START OF REQUEST]
+write_file(\"helper.rs\", \"...\")
+write_file(\"file2.txt\", \"...\")
+write_file(\"helper.rs\", \"...\")
+[DONE]
+
+# Task Management with TODO Tools
+
+**REQUIRED for multi-step tasks.** Use TODO tools when your task involves ANY of:
+- Multiple files to create/modify (2+)
+- Multiple distinct steps (3+)
+- Dependencies between steps
+- Testing or verification needed
+- Uncertainty about approach
+
+## Workflow
+
+Every multi-step task follows this pattern:
+1. **Start**: Call todo_read, then todo_write to create your plan
+2. **During**: Execute steps, then todo_read and todo_write to mark progress
+3. **End**: Call todo_read to verify all items complete
+
+Note: todo_write replaces the entire list, so always read first to preserve content.
+
+IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
+`{{Based on the requirements file with SHA256: <SHA>}}`
+This ensures the TODO list is tracked against the specific version of requirements it was generated from.
+
+## Examples
+
+**Example 1: Feature Implementation**
+User asks: \"Add user authentication with tests\"
+
+First action:
+{\"tool\": \"todo_read\", \"args\": {}}
+
+Then create plan:
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n  - [ ] Create User struct\\n  - [ ] Add login endpoint\\n  - [ ] Add password hashing\\n  - [ ] Write unit tests\\n  - [ ] Write integration tests\"}}
+
+After completing User struct:
+{\"tool\": \"todo_read\", \"args\": {}}
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n  - [x] Create User struct\\n  - [ ] Add login endpoint\\n  - [ ] Add password hashing\\n  - [ ] Write unit tests\\n  - [ ] Write integration tests\"}}
+
+**Example 2: Bug Fix**
+User asks: \"Fix the memory leak in cache module\"
+
+{\"tool\": \"todo_read\", \"args\": {}}
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Fix memory leak\\n  - [ ] Review cache.rs\\n  - [ ] Check for unclosed resources\\n  - [ ] Add drop implementation\\n  - [ ] Write test to verify fix\"}}
+
+**Example 3: Refactoring**
+User asks: \"Refactor database layer to use async/await\"
+
+{\"tool\": \"todo_read\", \"args\": {}}
+{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Refactor to async\\n  - [ ] Update function signatures\\n  - [ ] Replace blocking calls\\n  - [ ] Update all callers\\n  - [ ] Update tests\"}}
+
+## Format
+
+Use markdown checkboxes:
+- \"- [ ]\" for incomplete tasks
+- \"- [x]\" for completed tasks
+- Indent with 2 spaces for subtasks
+
+Keep items short, specific, and action-oriented.
+
+## Benefits
+
+✓ Prevents missed steps
+✓ Makes progress visible
+✓ Helps recover from interruptions
+✓ Creates better summaries
+
+## When NOT to Use
+
+Skip TODO tools for simple single-step tasks:
+- \"List files\" → just use shell
+- \"Read config.json\" → just use read_file
+- \"Search for functions\" → just use code_search
+
+If you can complete it with 1-2 tool calls, skip TODO.
+
+
+# Response Guidelines
+
+- Use Markdown formatting for all responses except tool calls.
+- Whenever taking actions, use the pronoun 'I'
+";
+
+pub const SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE: &'static str =
+    concatcp!(SYSTEM_NON_NATIVE_TOOL_USE, CODING_STYLE);
--- a/crates/g3-core/src/task_result.rs
+++ b/crates/g3-core/src/task_result.rs
@@ -30,7 +30,7 @@ impl TaskResult {
        // Look for the final_output marker pattern
        // The final_output content typically appears after the tool is called
        // and is the substantive content that follows
-        
+
        // First, try to find if there's a clear final_output section
        // This would be the content after the last tool execution
        if let Some(final_output_pos) = content_without_timing.rfind("final_output") {
@@ -39,7 +39,7 @@ impl TaskResult {
            if let Some(content_start) = content_without_timing[final_output_pos..].find('\n') {
                let start_pos = final_output_pos + content_start + 1;
                let final_content = &content_without_timing[start_pos..];
-                
+
                // Trim and return the complete content
                let trimmed = final_content.trim();
                if !trimmed.is_empty() {
@@ -47,7 +47,7 @@ impl TaskResult {
                }
            }
        }
-        
+
        // Fallback to the original extract_last_block behavior if we can't find final_output
        // This maintains backward compatibility
        self.extract_last_block()
@@ -62,12 +62,13 @@ impl TaskResult {
        } else {
            &self.response
        };
-        
+
        // Split by double newlines to find the last substantial block
        let blocks: Vec<&str> = content_without_timing.split("\n\n").collect();
-        
+
        // Find the last non-empty block that isn't just whitespace
-        blocks.iter()
+        blocks
+            .iter()
            .rev()
            .find(|block| !block.trim().is_empty())
            .map(|block| block.trim().to_string())
@@ -79,7 +80,8 @@ impl TaskResult {

    /// Check if the response contains an approval (for autonomous mode)
    pub fn is_approved(&self) -> bool {
-        self.extract_final_output().contains("IMPLEMENTATION_APPROVED")
+        self.extract_final_output()
+            .contains("IMPLEMENTATION_APPROVED")
    }
 }

@@ -91,20 +93,21 @@ mod tests {
    fn test_extract_last_block() {
        // Test case 1: Response with timing info
        let context_window = ContextWindow::new(1000);
-        let response_with_timing = "Some initial content\n\nFinal block content\n\n⏱️ 2.3s | 💭 1.2s".to_string();
+        let response_with_timing =
+            "Some initial content\n\nFinal block content\n\n⏱️ 2.3s | 💭 1.2s".to_string();
        let result = TaskResult::new(response_with_timing, context_window.clone());
        assert_eq!(result.extract_last_block(), "Final block content");
-        
+
        // Test case 2: Response without timing
        let response_no_timing = "Some initial content\n\nFinal block content".to_string();
        let result = TaskResult::new(response_no_timing, context_window.clone());
        assert_eq!(result.extract_last_block(), "Final block content");
-        
+
        // Test case 3: Response with IMPLEMENTATION_APPROVED
        let response_approved = "Some content\n\nIMPLEMENTATION_APPROVED".to_string();
        let result = TaskResult::new(response_approved, context_window.clone());
        assert!(result.is_approved());
-        
+
        // Test case 4: Response without approval
        let response_not_approved = "Some content\n\nNeeds more work".to_string();
        let result = TaskResult::new(response_not_approved, context_window);
@@ -114,17 +117,17 @@ mod tests {
    #[test]
    fn test_extract_last_block_edge_cases() {
        let context_window = ContextWindow::new(1000);
-        
+
        // Test empty response
        let empty_response = "".to_string();
        let result = TaskResult::new(empty_response, context_window.clone());
        assert_eq!(result.extract_last_block(), "");
-        
+
        // Test single block
        let single_block = "Just one block".to_string();
        let result = TaskResult::new(single_block, context_window.clone());
        assert_eq!(result.extract_last_block(), "Just one block");
-        
+
        // Test multiple empty blocks
        let multiple_empty = "\n\n\n\nSome content\n\n\n\n".to_string();
        let result = TaskResult::new(multiple_empty, context_window);
@@ -134,18 +137,22 @@ mod tests {
    #[test]
    fn test_extract_final_output() {
        let context_window = ContextWindow::new(1000);
-        
+
        // Test case 1: Response with final_output tool call
        let response_with_final_output = "Analyzing files...\n\nCalling final_output\n\nThis is the complete feedback\nwith multiple lines\nand important details\n\n⏱️ 2.3s".to_string();
        let result = TaskResult::new(response_with_final_output, context_window.clone());
-        assert_eq!(result.extract_final_output(), "This is the complete feedback\nwith multiple lines\nand important details");
-        
+        assert_eq!(
+            result.extract_final_output(),
+            "This is the complete feedback\nwith multiple lines\nand important details"
+        );
+
        // Test case 2: Response with IMPLEMENTATION_APPROVED in final_output
-        let response_approved = "Review complete\n\nfinal_output called\n\nIMPLEMENTATION_APPROVED".to_string();
+        let response_approved =
+            "Review complete\n\nfinal_output called\n\nIMPLEMENTATION_APPROVED".to_string();
        let result = TaskResult::new(response_approved, context_window.clone());
        assert_eq!(result.extract_final_output(), "IMPLEMENTATION_APPROVED");
        assert!(result.is_approved());
-        
+
        // Test case 3: Response with detailed feedback in final_output
        let response_feedback = "Checking implementation...\n\nfinal_output\n\nThe following issues need to be addressed:\n1. Missing error handling in main.rs\n2. Tests are not comprehensive\n3. Documentation needs improvement\n\nPlease fix these issues.".to_string();
        let result = TaskResult::new(response_feedback, context_window.clone());
@@ -154,12 +161,12 @@ mod tests {
        assert!(extracted.contains("1. Missing error handling"));
        assert!(extracted.contains("Please fix these issues."));
        assert!(!result.is_approved());
-        
+
        // Test case 4: Response without final_output (fallback to extract_last_block)
        let response_no_final_output = "Some analysis\n\nFinal thoughts here".to_string();
        let result = TaskResult::new(response_no_final_output, context_window.clone());
        assert_eq!(result.extract_final_output(), "Final thoughts here");
-        
+
        // Test case 5: Empty response
        let empty_response = "".to_string();
        let result = TaskResult::new(empty_response, context_window);
--- a/crates/g3-core/src/task_result_comprehensive_tests.rs
+++ b/crates/g3-core/src/task_result_comprehensive_tests.rs
@@ -6,19 +6,19 @@ use std::sync::Arc;
 fn test_task_result_basic_functionality() {
    // Create a context window with some messages
    let mut context = ContextWindow::new(10000);
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Test message 1".to_string(),
-    });
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: "Response 1".to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Test message 1".to_string(),
+    ));
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        "Response 1".to_string(),
+    ));
+
    // Create a TaskResult
    let response = "This is the response\n\nFinal output block".to_string();
    let result = TaskResult::new(response.clone(), context.clone());
-    
+
    // Test basic properties
    assert_eq!(result.response, response);
    assert_eq!(result.context_window.conversation_history.len(), 2);
@@ -28,32 +28,32 @@ fn test_task_result_basic_functionality() {
 #[test]
 fn test_extract_last_block_various_formats() {
    let context = ContextWindow::new(1000);
-    
+
    // Test 1: Standard format with multiple blocks
    let response1 = "First block\n\nSecond block\n\nThird block".to_string();
    let result1 = TaskResult::new(response1, context.clone());
    assert_eq!(result1.extract_last_block(), "Third block");
-    
+
    // Test 2: With timing information
    let response2 = "Content\n\nFinal block\n\n⏱️ 2.3s | 💭 1.2s".to_string();
    let result2 = TaskResult::new(response2, context.clone());
    assert_eq!(result2.extract_last_block(), "Final block");
-    
+
    // Test 3: Single line response
    let response3 = "Single line response".to_string();
    let result3 = TaskResult::new(response3, context.clone());
    assert_eq!(result3.extract_last_block(), "Single line response");
-    
+
    // Test 4: Empty response
    let response4 = "".to_string();
    let result4 = TaskResult::new(response4, context.clone());
    assert_eq!(result4.extract_last_block(), "");
-    
+
    // Test 5: Only whitespace
    let response5 = "\n\n\n   \n\n".to_string();
    let result5 = TaskResult::new(response5, context.clone());
    assert_eq!(result5.extract_last_block(), "");
-    
+
    // Test 6: Multiple blocks with empty ones
    let response6 = "First\n\n\n\n\n\nLast block here".to_string();
    let result6 = TaskResult::new(response6, context.clone());
@@ -63,7 +63,7 @@ fn test_extract_last_block_various_formats() {
 #[test]
 fn test_is_approved_detection() {
    let context = ContextWindow::new(1000);
-    
+
    // Test approved cases
    let approved_responses = vec![
        "Analysis complete\n\nIMPLEMENTATION_APPROVED",
@@ -71,12 +71,16 @@ fn test_is_approved_detection() {
        "IMPLEMENTATION_APPROVED",
        "Review done\n\n✅ IMPLEMENTATION_APPROVED - All tests pass",
    ];
-    
+
    for response in approved_responses {
        let result = TaskResult::new(response.to_string(), context.clone());
-        assert!(result.is_approved(), "Failed to detect approval in: {}", response);
+        assert!(
+            result.is_approved(),
+            "Failed to detect approval in: {}",
+            response
+        );
    }
-    
+
    // Test not approved cases
    let not_approved_responses = vec![
        "Needs more work",
@@ -85,10 +89,14 @@ fn test_is_approved_detection() {
        "Almost there but not APPROVED",
        "",
    ];
-    
+
    for response in not_approved_responses {
        let result = TaskResult::new(response.to_string(), context.clone());
-        assert!(!result.is_approved(), "Incorrectly detected approval in: {}", response);
+        assert!(
+            !result.is_approved(),
+            "Incorrectly detected approval in: {}",
+            response
+        );
    }
 }

@@ -97,36 +105,46 @@ fn test_context_window_preservation() {
    // Create a context window with specific state
    let mut context = ContextWindow::new(5000);
    context.used_tokens = 1234;
-    
+
    // Add some messages
    for i in 0..5 {
-        context.add_message(Message {
-            role: if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant },
-            content: format!("Message {}", i),
-        });
+        context.add_message(Message::new(
+            if i % 2 == 0 {
+                MessageRole::User
+            } else {
+                MessageRole::Assistant
+            },
+            format!("Message {}", i),
+        ));
    }
-    
+
    // Create TaskResult
    let result = TaskResult::new("Response".to_string(), context.clone());
-    
+
    // Verify context is preserved
    assert_eq!(result.context_window.total_tokens, 5000);
    assert!(result.context_window.used_tokens > 1234); // Should have increased
    assert_eq!(result.context_window.conversation_history.len(), 5);
-    
+
    // Verify messages are preserved correctly
    for i in 0..5 {
-        let is_user = matches!(result.context_window.conversation_history[i].role, MessageRole::User);
+        let is_user = matches!(
+            result.context_window.conversation_history[i].role,
+            MessageRole::User
+        );
        let expected_is_user = i % 2 == 0;
        assert_eq!(is_user, expected_is_user, "Message {} has wrong role", i);
-        assert_eq!(result.context_window.conversation_history[i].content, format!("Message {}", i));
+        assert_eq!(
+            result.context_window.conversation_history[i].content,
+            format!("Message {}", i)
+        );
    }
 }

 #[test]
 fn test_coach_feedback_extraction_scenarios() {
    let context = ContextWindow::new(1000);
-    
+
    // Scenario 1: Coach feedback with file operations and analysis
    let coach_response = r#"Reading file: src/main.rs
 📄 File content (23 lines):
@@ -140,13 +158,13 @@ The implementation needs the following fixes:
 1. Add error handling
 2. Implement missing functions
 3. Add tests"#;
-    
+
    let result = TaskResult::new(coach_response.to_string(), context.clone());
    let feedback = result.extract_last_block();
    assert!(feedback.contains("Add error handling"));
    assert!(feedback.contains("Implement missing functions"));
    assert!(feedback.contains("Add tests"));
-    
+
    // Scenario 2: Coach approval
    let approval_response = r#"Checking compilation...
 ✅ Build successful
@@ -155,11 +173,11 @@ Running tests...
 ✅ All tests pass

 IMPLEMENTATION_APPROVED"#;
-    
+
    let result = TaskResult::new(approval_response.to_string(), context.clone());
    assert!(result.is_approved());
    assert_eq!(result.extract_last_block(), "IMPLEMENTATION_APPROVED");
-    
+
    // Scenario 3: Complex feedback with timing
    let complex_response = r#"Tool execution log...

@@ -170,7 +188,7 @@ The following issues were found:
 - Missing input validation

 ⏱️ 5.2s | 💭 2.1s"#;
-    
+
    let result = TaskResult::new(complex_response.to_string(), context.clone());
    let feedback = result.extract_last_block();
    assert!(feedback.contains("Memory leak"));
@@ -181,17 +199,18 @@ The following issues were found:
 #[test]
 fn test_edge_cases_and_special_characters() {
    let context = ContextWindow::new(1000);
-    
+
    // Test with special characters and emojis
    let response_with_emojis = "First part 🚀\n\n✅ Final part with emojis 🎉".to_string();
    let result = TaskResult::new(response_with_emojis, context.clone());
    assert_eq!(result.extract_last_block(), "✅ Final part with emojis 🎉");
-    
+
    // Test with code blocks
-    let response_with_code = "Explanation\n\n```rust\nfn main() {}\n```\n\nFinal comment".to_string();
+    let response_with_code =
+        "Explanation\n\n```rust\nfn main() {}\n```\n\nFinal comment".to_string();
    let result = TaskResult::new(response_with_code, context.clone());
    assert_eq!(result.extract_last_block(), "Final comment");
-    
+
    // Test with mixed newlines
    let mixed_newlines = "Part 1\r\n\r\nPart 2\n\nPart 3".to_string();
    let result = TaskResult::new(mixed_newlines, context.clone());
@@ -201,30 +220,33 @@ fn test_edge_cases_and_special_characters() {
 #[test]
 fn test_large_response_handling() {
    let context = ContextWindow::new(100000);
-    
+
    // Create a large response
    let mut large_response = String::new();
    for i in 0..100 {
        large_response.push_str(&format!("Block {} with some content\n\n", i));
    }
    large_response.push_str("This is the final block after 100 other blocks");
-    
+
    let result = TaskResult::new(large_response, context);
-    assert_eq!(result.extract_last_block(), "This is the final block after 100 other blocks");
+    assert_eq!(
+        result.extract_last_block(),
+        "This is the final block after 100 other blocks"
+    );
 }

 #[test]
 fn test_concurrent_access() {
    use std::thread;
-    
+
    let context = ContextWindow::new(1000);
    let result = Arc::new(TaskResult::new(
        "Concurrent test\n\nFinal block".to_string(),
        context,
    ));
-    
+
    let mut handles = vec![];
-    
+
    // Spawn multiple threads to access the TaskResult
    for _ in 0..10 {
        let result_clone = Arc::clone(&result);
@@ -232,16 +254,15 @@ fn test_concurrent_access() {
            // Each thread extracts the last block
            let block = result_clone.extract_last_block();
            assert_eq!(block, "Final block");
-            
+
            // Check approval status
            assert!(!result_clone.is_approved());
        });
        handles.push(handle);
    }
-    
+
    // Wait for all threads to complete
    for handle in handles {
        handle.join().unwrap();
    }
 }
-
--- a/crates/g3-core/src/tilde_expansion_tests.rs
+++ b/crates/g3-core/src/tilde_expansion_tests.rs
@@ -7,10 +7,10 @@ mod tilde_expansion_tests {
        // Test that shellexpand works
        let path_with_tilde = "~/test.txt";
        let expanded = shellexpand::tilde(path_with_tilde);
-        
+
        // Get the actual home directory
        let home = env::var("HOME").expect("HOME environment variable not set");
-        
+
        // Verify expansion happened
        assert_eq!(expanded.as_ref(), format!("{}/test.txt", home));
        assert!(!expanded.contains("~"));
@@ -20,9 +20,9 @@ mod tilde_expansion_tests {
    fn test_tilde_expansion_with_subdirs() {
        let path_with_tilde = "~/Documents/test.txt";
        let expanded = shellexpand::tilde(path_with_tilde);
-        
+
        let home = env::var("HOME").expect("HOME environment variable not set");
-        
+
        assert_eq!(expanded.as_ref(), format!("{}/Documents/test.txt", home));
    }

@@ -30,7 +30,7 @@ mod tilde_expansion_tests {
    fn test_no_tilde_unchanged() {
        let path_without_tilde = "/absolute/path/test.txt";
        let expanded = shellexpand::tilde(path_without_tilde);
-        
+
        assert_eq!(expanded.as_ref(), path_without_tilde);
    }
 }
--- a/crates/g3-core/src/ui_writer.rs
+++ b/crates/g3-core/src/ui_writer.rs
@@ -4,58 +4,71 @@
 pub trait UiWriter: Send + Sync {
    /// Print a simple message
    fn print(&self, message: &str);
-    
+
    /// Print a message with a newline
    fn println(&self, message: &str);
-    
+
    /// Print without newline (for progress indicators)
    fn print_inline(&self, message: &str);
-    
+
    /// Print a system prompt section
    fn print_system_prompt(&self, prompt: &str);
-    
+
    /// Print a context window status message
    fn print_context_status(&self, message: &str);
-    
+
    /// Print a context thinning success message with highlight and animation
    fn print_context_thinning(&self, message: &str);
-    
+
    /// Print a tool execution header
    fn print_tool_header(&self, tool_name: &str);
-    
+
    /// Print a tool argument
    fn print_tool_arg(&self, key: &str, value: &str);
-    
+
    /// Print tool output header
    fn print_tool_output_header(&self);
-    
+
    /// Update the current tool output line (replaces previous line)
    fn update_tool_output_line(&self, line: &str);
-    
+
    /// Print a tool output line
    fn print_tool_output_line(&self, line: &str);
-    
+
    /// Print tool output summary (when output is truncated)
    fn print_tool_output_summary(&self, hidden_count: usize);
-    
+
    /// Print tool execution timing
    fn print_tool_timing(&self, duration_str: &str);
-    
+
    /// Print the agent prompt indicator
    fn print_agent_prompt(&self);
-    
+
    /// Print agent response inline (for streaming)
    fn print_agent_response(&self, content: &str);
-    
+
    /// Notify that an SSE event was received (including pings)
    fn notify_sse_received(&self);
-    
+
    /// Flush any buffered output
    fn flush(&self);
-    
+
    /// Returns true if this UI writer wants full, untruncated output
    /// Default is false (truncate for human readability)
-    fn wants_full_output(&self) -> bool { false }
+    fn wants_full_output(&self) -> bool {
+        false
+    }
+
+    /// Prompt the user for a yes/no confirmation
+    fn prompt_user_yes_no(&self, message: &str) -> bool;
+
+    /// Prompt the user to choose from a list of options
+    /// Returns the index of the selected option
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize;
+
+    /// Print the final output summary with markdown formatting
+    /// Shows a spinner while formatting, then renders the markdown
+    fn print_final_output(&self, summary: &str);
 }

 /// A no-op implementation for when UI output is not needed
@@ -79,5 +92,16 @@ impl UiWriter for NullUiWriter {
    fn print_agent_response(&self, _content: &str) {}
    fn notify_sse_received(&self) {}
    fn flush(&self) {}
-    fn wants_full_output(&self) -> bool { false }
-}
+    fn wants_full_output(&self) -> bool {
+        false
+    }
+    fn prompt_user_yes_no(&self, _message: &str) -> bool {
+        true
+    }
+    fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize {
+        0
+    }
+    fn print_final_output(&self, _summary: &str) {
+        // No-op for null writer
+    }
+}
--- a/crates/g3-core/tests/code_search_test.rs
+++ b/crates/g3-core/tests/code_search_test.rs
@@ -8,7 +8,7 @@ async fn test_find_async_functions() {
    // Create a temporary test file
    let test_dir = std::env::temp_dir().join("g3_test_code_search");
    fs::create_dir_all(&test_dir).unwrap();
-    
+
    let test_file = test_dir.join("test.rs");
    fs::write(
        &test_file,
@@ -47,7 +47,10 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
    assert_eq!(response.searches.len(), 1);
    let search_result = &response.searches[0];
    assert_eq!(search_result.name, "find_async_functions");
-    assert_eq!(search_result.match_count, 2, "Should find 2 async functions");
+    assert_eq!(
+        search_result.match_count, 2,
+        "Should find 2 async functions"
+    );
    assert!(search_result.error.is_none());

    // Check that we found the right functions
@@ -69,7 +72,7 @@ async fn test_find_all_functions() {
    // Create a temporary test file
    let test_dir = std::env::temp_dir().join("g3_test_code_search_2");
    fs::create_dir_all(&test_dir).unwrap();
-    
+
    let test_file = test_dir.join("test.rs");
    fs::write(
        &test_file,
@@ -107,7 +110,10 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
    assert_eq!(response.searches.len(), 1);
    let search_result = &response.searches[0];
    assert_eq!(search_result.name, "find_all_functions");
-    assert_eq!(search_result.match_count, 3, "Should find 3 functions total");
+    assert_eq!(
+        search_result.match_count, 3,
+        "Should find 3 functions total"
+    );
    assert!(search_result.error.is_none());

    // Check that we found all functions
@@ -130,7 +136,7 @@ async fn test_find_structs() {
    // Create a temporary test file
    let test_dir = std::env::temp_dir().join("g3_test_code_search_3");
    fs::create_dir_all(&test_dir).unwrap();
-    
+
    let test_file = test_dir.join("test.rs");
    fs::write(
        &test_file,
@@ -188,7 +194,7 @@ async fn test_context_lines() {
    // Create a temporary test file
    let test_dir = std::env::temp_dir().join("g3_test_code_search_4");
    fs::create_dir_all(&test_dir).unwrap();
-    
+
    let test_file = test_dir.join("test.rs");
    fs::write(
        &test_file,
@@ -223,16 +229,22 @@ pub fn target_function() {
    assert_eq!(response.searches.len(), 1);
    let search_result = &response.searches[0];
    assert_eq!(search_result.match_count, 1);
-    
+
    let match_result = &search_result.matches[0];
    assert!(match_result.context.is_some());
-    
+
    let context = match_result.context.as_ref().unwrap();
    assert!(context.contains("Line 2"), "Should include 2 lines before");
-    assert!(context.contains("target_function"), "Should include the function");
+    assert!(
+        context.contains("target_function"),
+        "Should include the function"
+    );
    // Note: context_lines=2 means 2 lines before and after the match line (line 4)
    // So we get lines 2-6, which includes up to println but not the closing brace
-    assert!(context.contains("println"), "Should include 2 lines after the match");
+    assert!(
+        context.contains("println"),
+        "Should include 2 lines after the match"
+    );

    // Cleanup
    fs::remove_dir_all(&test_dir).ok();
@@ -243,7 +255,7 @@ async fn test_multiple_searches() {
    // Create a temporary test file
    let test_dir = std::env::temp_dir().join("g3_test_code_search_5");
    fs::create_dir_all(&test_dir).unwrap();
-    
+
    let test_file = test_dir.join("test.rs");
    fs::write(
        &test_file,
@@ -301,7 +313,7 @@ async fn test_python_search() {
    // Create a temporary Python test file
    let test_dir = std::env::temp_dir().join("g3_test_code_search_python");
    fs::create_dir_all(&test_dir).unwrap();
-    
+
    let test_file = test_dir.join("test.py");
    fs::write(
        &test_file,
@@ -338,14 +350,17 @@ class MyClass:

    assert_eq!(response.searches.len(), 1);
    let search_result = &response.searches[0];
-    assert_eq!(search_result.match_count, 3, "Should find 3 functions in Python (2 regular + 1 async + 1 method)");
-    
+    assert_eq!(
+        search_result.match_count, 3,
+        "Should find 3 functions in Python (2 regular + 1 async + 1 method)"
+    );
+
    let function_names: Vec<String> = search_result
        .matches
        .iter()
        .filter_map(|m| m.captures.get("name").cloned())
        .collect();
-    
+
    assert!(function_names.contains(&"regular_function".to_string()));
    assert!(function_names.contains(&"async_function".to_string()));
    assert!(function_names.contains(&"method".to_string()));
@@ -359,7 +374,7 @@ async fn test_javascript_search() {
    // Create a temporary JavaScript test file
    let test_dir = std::env::temp_dir().join("g3_test_code_search_js");
    fs::create_dir_all(&test_dir).unwrap();
-    
+
    let test_file = test_dir.join("test.js");
    fs::write(
        &test_file,
@@ -396,14 +411,17 @@ class MyClass {

    assert_eq!(response.searches.len(), 1);
    let search_result = &response.searches[0];
-    assert_eq!(search_result.match_count, 2, "Should find 2 functions in JavaScript");
-    
+    assert_eq!(
+        search_result.match_count, 2,
+        "Should find 2 functions in JavaScript"
+    );
+
    let function_names: Vec<String> = search_result
        .matches
        .iter()
        .filter_map(|m| m.captures.get("name").cloned())
        .collect();
-    
+
    assert!(function_names.contains(&"regularFunction".to_string()));
    assert!(function_names.contains(&"asyncFunction".to_string()));

@@ -420,7 +438,7 @@ async fn test_go_search() {
        .and_then(|p| p.parent())
        .unwrap();
    let test_code_path = workspace_root.join("examples/test_code");
-    
+
    let request = CodeSearchRequest {
        searches: vec![SearchSpec {
            name: "go_functions".to_string(),
@@ -435,14 +453,19 @@ async fn test_go_search() {

    let response = execute_code_search(request).await.unwrap();
    assert_eq!(response.searches.len(), 1);
-    
+
    eprintln!("Go search result: {:?}", response.searches[0]);
    eprintln!("Match count: {}", response.searches[0].matches.len());
    eprintln!("Error: {:?}", response.searches[0].error);
-    assert!(response.searches[0].matches.len() > 0, "No matches found for Go search");
-    
+    assert!(
+        response.searches[0].matches.len() > 0,
+        "No matches found for Go search"
+    );
+
    // Should find main and greet functions
-    let names: Vec<&str> = response.searches[0].matches.iter()
+    let names: Vec<&str> = response.searches[0]
+        .matches
+        .iter()
        .filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
        .collect();
    assert!(names.contains(&"main"));
@@ -458,7 +481,7 @@ async fn test_java_search() {
        .and_then(|p| p.parent())
        .unwrap();
    let test_code_path = workspace_root.join("examples/test_code");
-    
+
    let request = CodeSearchRequest {
        searches: vec![SearchSpec {
            name: "java_classes".to_string(),
@@ -474,9 +497,11 @@ async fn test_java_search() {
    let response = execute_code_search(request).await.unwrap();
    assert_eq!(response.searches.len(), 1);
    assert!(response.searches[0].matches.len() > 0);
-    
+
    // Should find Example class
-    let names: Vec<&str> = response.searches[0].matches.iter()
+    let names: Vec<&str> = response.searches[0]
+        .matches
+        .iter()
        .filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
        .collect();
    assert!(names.contains(&"Example"));
@@ -491,7 +516,7 @@ async fn test_c_search() {
        .and_then(|p| p.parent())
        .unwrap();
    let test_code_path = workspace_root.join("examples/test_code");
-    
+
    let request = CodeSearchRequest {
        searches: vec![SearchSpec {
            name: "c_functions".to_string(),
@@ -507,9 +532,11 @@ async fn test_c_search() {
    let response = execute_code_search(request).await.unwrap();
    assert_eq!(response.searches.len(), 1);
    assert!(response.searches[0].matches.len() > 0);
-    
+
    // Should find greet, add, and main functions
-    let names: Vec<&str> = response.searches[0].matches.iter()
+    let names: Vec<&str> = response.searches[0]
+        .matches
+        .iter()
        .filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
        .collect();
    assert!(names.contains(&"greet"));
@@ -526,7 +553,7 @@ async fn test_cpp_search() {
        .and_then(|p| p.parent())
        .unwrap();
    let test_code_path = workspace_root.join("examples/test_code");
-    
+
    let request = CodeSearchRequest {
        searches: vec![SearchSpec {
            name: "cpp_classes".to_string(),
@@ -542,15 +569,18 @@ async fn test_cpp_search() {
    let response = execute_code_search(request).await.unwrap();
    assert_eq!(response.searches.len(), 1);
    assert!(response.searches[0].matches.len() > 0);
-    
+
    // Should find Person class
-    let names: Vec<&str> = response.searches[0].matches.iter()
+    let names: Vec<&str> = response.searches[0]
+        .matches
+        .iter()
        .filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
        .collect();
    assert!(names.contains(&"Person"));
 }

 #[tokio::test]
+#[ignore]
 async fn test_kotlin_search() {
    let request = CodeSearchRequest {
        searches: vec![SearchSpec {
@@ -567,9 +597,11 @@ async fn test_kotlin_search() {
    let response = execute_code_search(request).await.unwrap();
    assert_eq!(response.searches.len(), 1);
    assert!(response.searches[0].matches.len() > 0);
-    
+
    // Should find Person class
-    let names: Vec<&str> = response.searches[0].matches.iter()
+    let names: Vec<&str> = response.searches[0]
+        .matches
+        .iter()
        .filter_map(|m| m.captures.get("name").map(|s| s.as_str()))
        .collect();
    assert!(names.contains(&"Person"));
--- a/crates/g3-core/tests/test_context_thinning.rs
+++ b/crates/g3-core/tests/test_context_thinning.rs
@@ -4,35 +4,35 @@ use g3_providers::{Message, MessageRole};
 #[test]
 fn test_thinning_thresholds() {
    let mut context = ContextWindow::new(10000);
-    
+
    // At 0%, should not thin
    assert!(!context.should_thin());
-    
+
    // Simulate reaching 50% usage
    context.used_tokens = 5000;
    assert!(context.should_thin());
-    
+
    // After thinning at 50%, should not thin again until next threshold
    context.last_thinning_percentage = 50;
    assert!(!context.should_thin());
-    
+
    // At 60%, should thin again
    context.used_tokens = 6000;
    assert!(context.should_thin());
-    
+
    // After thinning at 60%, should not thin
    context.last_thinning_percentage = 60;
    assert!(!context.should_thin());
-    
+
    // At 70%, should thin
    context.used_tokens = 7000;
    assert!(context.should_thin());
-    
+
    // At 80%, should thin
    context.last_thinning_percentage = 70;
    context.used_tokens = 8000;
    assert!(context.should_thin());
-    
+
    // After 80%, should not thin (compaction takes over)
    context.last_thinning_percentage = 80;
    context.used_tokens = 8500;
@@ -42,14 +42,14 @@ fn test_thinning_thresholds() {
 #[test]
 fn test_thin_context_basic() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add some messages to the first third
    for i in 0..9 {
        if i % 2 == 0 {
-            context.add_message(Message {
-                role: MessageRole::Assistant,
-                content: format!("Assistant message {}", i),
-            });
+            context.add_message(Message::new(
+                MessageRole::Assistant,
+                format!("Assistant message {}", i),
+            ));
        } else {
            // Add tool results with varying sizes
            let content = if i == 1 {
@@ -62,24 +62,25 @@ fn test_thin_context_basic() {
                // Small tool result (< 1000 chars)
                format!("Tool result: small result {}", i)
            };
-            
-            context.add_message(Message {
-                role: MessageRole::User,
-                content,
-            });
+
+            context.add_message(Message::new(MessageRole::User, content));
        }
    }
-    
+
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    println!("Thinning summary: {}", summary);
-    
+
    // Should have thinned at least 1 large tool result in the first third
-    assert!(summary.contains("1 tool result"), "Summary was: {}", summary);
+    assert!(
+        summary.contains("1 tool result"),
+        "Summary was: {}",
+        summary
+    );
    assert!(summary.contains("50%"));
-    
+
    // Check that the large tool results were replaced
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
@@ -96,46 +97,46 @@ fn test_thin_context_basic() {
 #[test]
 fn test_thin_write_file_tool_calls() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add some messages including a write_file tool call with large content
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Please create a large file".to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Please create a large file".to_string(),
+    ));
+
    // Add an assistant message with a write_file tool call containing large content
    let large_content = "x".repeat(1500);
    let tool_call_json = format!(
        r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
        large_content
    );
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: format!("I'll create that file.\n\n{}", tool_call_json),
-    });
-    
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        format!("I'll create that file.\n\n{}", tool_call_json),
+    ));
+
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
+    ));
+
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ));
    }
-    
+
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    println!("Thinning summary: {}", summary);
-    
+
    // Should have thinned the write_file tool call
    assert!(summary.contains("tool call") || summary.contains("chars saved"));
-    
+
    // Check that the large content was replaced with a file reference
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
@@ -152,46 +153,50 @@ fn test_thin_write_file_tool_calls() {
 #[test]
 fn test_thin_str_replace_tool_calls() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add some messages including a str_replace tool call with large diff
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Please update the file".to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Please update the file".to_string(),
+    ));
+
    // Add an assistant message with a str_replace tool call containing large diff
-    let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
+    let large_diff = format!(
+        "--- old\n{}\n+++ new\n{}",
+        "-old line\n".repeat(100),
+        "+new line\n".repeat(100)
+    );
    let tool_call_json = format!(
        r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
        large_diff.replace('\n', "\\n")
    );
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: format!("I'll update that file.\n\n{}", tool_call_json),
-    });
-    
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Tool result: ✅ applied unified diff".to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        format!("I'll update that file.\n\n{}", tool_call_json),
+    ));
+
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Tool result: ✅ applied unified diff".to_string(),
+    ));
+
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ));
    }
-    
+
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    println!("Thinning summary: {}", summary);
-    
+
    // Should have thinned the str_replace tool call
    assert!(summary.contains("tool call") || summary.contains("chars saved"));
-    
+
    // Check that the large diff was replaced with a file reference
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
@@ -209,18 +214,18 @@ fn test_thin_str_replace_tool_calls() {
 #[test]
 fn test_thin_context_no_large_results() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add only small messages
    for i in 0..9 {
-        context.add_message(Message {
-            role: MessageRole::User,
-            content: format!("Tool result: small {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::User,
+            format!("Tool result: small {}", i),
+        ));
    }
-    
+
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    // Should report no large results found
    assert!(summary.contains("no large tool results or tool calls found"));
 }
@@ -228,7 +233,7 @@ fn test_thin_context_no_large_results() {
 #[test]
 fn test_thin_context_only_affects_first_third() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add 12 messages (first third = 4 messages)
    for i in 0..12 {
        let content = if i % 2 == 1 {
@@ -237,23 +242,23 @@ fn test_thin_context_only_affects_first_third() {
        } else {
            format!("Assistant message {}", i)
        };
-        
+
        let role = if i % 2 == 1 {
            MessageRole::User
        } else {
            MessageRole::Assistant
        };
-        
-        context.add_message(Message { role, content });
+
+        context.add_message(Message::new(role, content));
    }
-    
+
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    // First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
    // That's 2 tool results
    assert!(summary.contains("2 tool results"));
-    
+
    // Check that messages after the first third are NOT thinned
    let first_third_end = context.conversation_history.len() / 3;
    for i in first_third_end..context.conversation_history.len() {
@@ -261,8 +266,11 @@ fn test_thin_context_only_affects_first_third() {
            if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
                // These should still be large (not thinned)
                if i % 2 == 1 {
-                    assert!(msg.content.len() > 1000, 
-                        "Message at index {} should not have been thinned", i);
+                    assert!(
+                        msg.content.len() > 1000,
+                        "Message at index {} should not have been thinned",
+                        i
+                    );
                }
            }
        }
--- a/crates/g3-core/tests/test_preflight_max_tokens.rs
+++ b/crates/g3-core/tests/test_preflight_max_tokens.rs
@@ -0,0 +1,188 @@
+//! Tests for the pre-flight max_tokens validation with thinking.budget_tokens constraint
+//!
+//! These tests verify that when using Anthropic with extended thinking enabled,
+//! the max_tokens calculation properly accounts for the budget_tokens constraint.
+
+use g3_config::Config;
+use g3_core::ContextWindow;
+
+/// Helper function to create a minimal config for testing
+fn create_test_config_with_thinking(thinking_budget: Option<u32>) -> Config {
+    let mut config = Config::default();
+    
+    // Set up Anthropic provider with optional thinking budget
+    config.providers.anthropic = Some(g3_config::AnthropicConfig {
+        api_key: "test-key".to_string(),
+        model: "claude-sonnet-4-5".to_string(),
+        max_tokens: Some(16000),
+        temperature: Some(0.1),
+        cache_config: None,
+        enable_1m_context: None,
+        thinking_budget_tokens: thinking_budget,
+    });
+    
+    config.providers.default_provider = "anthropic".to_string();
+    config
+}
+
+/// Test that when thinking is disabled, max_tokens passes through unchanged
+#[test]
+fn test_no_thinking_budget_passes_through() {
+    let config = create_test_config_with_thinking(None);
+    
+    // Without thinking budget, any max_tokens should be fine
+    let proposed_max = 5000;
+    
+    // The constraint check would return (proposed_max, false)
+    // since there's no thinking_budget_tokens configured
+    assert!(config.providers.anthropic.as_ref().unwrap().thinking_budget_tokens.is_none());
+}
+
+/// Test that when max_tokens > budget_tokens + buffer, no reduction is needed
+#[test]
+fn test_sufficient_max_tokens_no_reduction_needed() {
+    let config = create_test_config_with_thinking(Some(10000));
+    let budget_tokens = config.providers.anthropic.as_ref().unwrap().thinking_budget_tokens.unwrap();
+    
+    // minimum_required = budget_tokens + 1024 = 11024
+    let minimum_required = budget_tokens + 1024;
+    
+    // If proposed_max >= minimum_required, no reduction is needed
+    let proposed_max = 15000;
+    assert!(proposed_max >= minimum_required);
+}
+
+/// Test that when max_tokens < budget_tokens + buffer, reduction is needed
+#[test]
+fn test_insufficient_max_tokens_needs_reduction() {
+    let config = create_test_config_with_thinking(Some(10000));
+    let budget_tokens = config.providers.anthropic.as_ref().unwrap().thinking_budget_tokens.unwrap();
+    
+    // minimum_required = budget_tokens + 1024 = 11024
+    let minimum_required = budget_tokens + 1024;
+    
+    // If proposed_max < minimum_required, reduction IS needed
+    let proposed_max = 5000;
+    assert!(proposed_max < minimum_required);
+}
+
+/// Test the minimum required calculation
+#[test]
+fn test_minimum_required_calculation() {
+    // For a budget of 10000, we need at least 11024 tokens
+    let budget_tokens = 10000u32;
+    let output_buffer = 1024u32;
+    let minimum_required = budget_tokens + output_buffer;
+    
+    assert_eq!(minimum_required, 11024);
+    
+    // For a larger budget
+    let budget_tokens = 32000u32;
+    let minimum_required = budget_tokens + output_buffer;
+    assert_eq!(minimum_required, 33024);
+}
+
+/// Test context window usage calculation for summary max_tokens
+#[test]
+fn test_context_window_available_tokens() {
+    let mut context = ContextWindow::new(200000); // 200k context window
+    
+    // Simulate heavy usage
+    context.used_tokens = 180000; // 90% used
+    
+    let model_limit = context.total_tokens;
+    let current_usage = context.used_tokens;
+    
+    // 2.5% buffer calculation
+    let buffer = (model_limit / 40).clamp(1000, 10000);
+    assert_eq!(buffer, 5000); // 200000/40 = 5000
+    
+    let available = model_limit
+        .saturating_sub(current_usage)
+        .saturating_sub(buffer);
+    
+    // 200000 - 180000 - 5000 = 15000
+    assert_eq!(available, 15000);
+    
+    // Capped at 10000 for summary
+    let summary_max = available.min(10_000);
+    assert_eq!(summary_max, 10000);
+}
+
+/// Test that when context is nearly full, available tokens may be below thinking budget
+#[test]
+fn test_context_nearly_full_triggers_reduction() {
+    let mut context = ContextWindow::new(200000);
+    
+    // Very heavy usage - 98% used
+    context.used_tokens = 196000;
+    
+    let model_limit = context.total_tokens;
+    let current_usage = context.used_tokens;
+    let buffer = (model_limit / 40).clamp(1000, 10000); // 5000
+    
+    let available = model_limit
+        .saturating_sub(current_usage)
+        .saturating_sub(buffer);
+    
+    // 200000 - 196000 - 5000 = -1000 -> saturates to 0
+    assert_eq!(available, 0);
+    
+    // With thinking_budget of 10000, this would definitely need reduction
+    let thinking_budget = 10000u32;
+    let minimum_required = thinking_budget + 1024;
+    assert!(available < minimum_required);
+}
+
+/// Test the hard-coded fallback value
+#[test]
+fn test_hardcoded_fallback_value() {
+    // When all else fails, we use 5000 as the hard-coded max_tokens
+    let hardcoded_fallback = 5000u32;
+    
+    // This should be a reasonable value that Anthropic will accept
+    // even with thinking enabled (though output will be limited)
+    assert!(hardcoded_fallback > 0);
+    
+    // Note: With a 10000 thinking budget, 5000 is still below the
+    // minimum required (11024), but we send it anyway as a "last resort"
+    // hoping the API might still work for basic operations
+}
+
+/// Test provider-specific caps
+#[test]
+fn test_provider_specific_caps() {
+    // Anthropic/Databricks: cap at 10000
+    let anthropic_cap = 10000u32;
+    let proposed = 15000u32;
+    assert_eq!(proposed.min(anthropic_cap), 10000);
+    
+    // Embedded: cap at 3000
+    let embedded_cap = 3000u32;
+    let proposed = 5000u32;
+    assert_eq!(proposed.min(embedded_cap), 3000);
+    
+    // Default: cap at 5000
+    let default_cap = 5000u32;
+    let proposed = 8000u32;
+    assert_eq!(proposed.min(default_cap), 5000);
+}
+
+/// Test that the error message mentions the thinking budget constraint
+#[test]
+fn test_error_message_content() {
+    // Verify the warning message format contains useful information
+    let proposed_max_tokens = 5000u32;
+    let budget_tokens = 10000u32;
+    let minimum_required = budget_tokens + 1024;
+    
+    let warning = format!(
+        "max_tokens ({}) is below required minimum ({}) for thinking.budget_tokens ({}). Context reduction needed.",
+        proposed_max_tokens, minimum_required, budget_tokens
+    );
+    
+    assert!(warning.contains("5000"));
+    assert!(warning.contains("11024"));
+    assert!(warning.contains("10000"));
+    assert!(warning.contains("Context reduction needed"));
+}
--- a/crates/g3-core/tests/test_reset_with_summary.rs
+++ b/crates/g3-core/tests/test_reset_with_summary.rs
@@ -0,0 +1,159 @@
+//! Tests for reset_with_summary to ensure system prompt is preserved after compaction
+
+use g3_core::ContextWindow;
+use g3_providers::{Message, MessageRole};
+
+/// Test that reset_with_summary preserves the original system prompt
+#[test]
+fn test_reset_with_summary_preserves_system_prompt() {
+    let mut context = ContextWindow::new(10000);
+
+    // Add the system prompt as the first message (simulating agent initialization)
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Add some conversation history
+    context.add_message(Message::new(MessageRole::User, "Task: Write a function".to_string()));
+    context.add_message(Message::new(MessageRole::Assistant, "I'll help you write that function.".to_string()));
+    context.add_message(Message::new(MessageRole::User, "Thanks, now add tests".to_string()));
+    context.add_message(Message::new(MessageRole::Assistant, "Here are the tests.".to_string()));
+
+    // Verify we have 5 messages before reset
+    assert_eq!(context.conversation_history.len(), 5);
+
+    // Reset with summary
+    let summary = "We discussed writing a function and adding tests.".to_string();
+    let latest_user_msg = Some("Continue with the implementation".to_string());
+    context.reset_with_summary(summary, latest_user_msg);
+
+    // Verify the first message is still the system prompt
+    assert!(!context.conversation_history.is_empty(), "Conversation history should not be empty");
+    
+    let first_message = &context.conversation_history[0];
+    assert!(
+        matches!(first_message.role, MessageRole::System),
+        "First message should be a System message, got {:?}",
+        first_message.role
+    );
+    assert!(
+        first_message.content.contains("You are G3"),
+        "First message should contain the system prompt 'You are G3', got: {}",
+        &first_message.content[..first_message.content.len().min(100)]
+    );
+
+    // Verify the summary was added as a separate system message
+    let has_summary = context.conversation_history.iter().any(|m| {
+        matches!(m.role, MessageRole::System) && m.content.contains("Previous conversation summary")
+    });
+    assert!(has_summary, "Should have a summary message");
+
+    // Verify the latest user message was added
+    let has_user_msg = context.conversation_history.iter().any(|m| {
+        matches!(m.role, MessageRole::User) && m.content.contains("Continue with the implementation")
+    });
+    assert!(has_user_msg, "Should have the latest user message");
+}
+
+/// Test that reset_with_summary preserves README message if present
+#[test]
+fn test_reset_with_summary_preserves_readme() {
+    let mut context = ContextWindow::new(10000);
+
+    // Add the system prompt as the first message
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Add README as second system message
+    let readme_content = "# Project README\n\nThis is a test project.";
+    context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
+
+    // Add some conversation history
+    context.add_message(Message::new(MessageRole::User, "Task: Write a function".to_string()));
+    context.add_message(Message::new(MessageRole::Assistant, "Done.".to_string()));
+
+    // Verify we have 4 messages before reset
+    assert_eq!(context.conversation_history.len(), 4);
+
+    // Reset with summary
+    let summary = "We wrote a function.".to_string();
+    context.reset_with_summary(summary, None);
+
+    // Verify the first message is still the system prompt
+    let first_message = &context.conversation_history[0];
+    assert!(
+        first_message.content.contains("You are G3"),
+        "First message should be the system prompt"
+    );
+
+    // Verify the README was preserved as the second message
+    let second_message = &context.conversation_history[1];
+    assert!(
+        matches!(second_message.role, MessageRole::System),
+        "Second message should be a System message"
+    );
+    assert!(
+        second_message.content.contains("Project README"),
+        "Second message should be the README"
+    );
+}
+
+/// Test that reset_with_summary works when there's no README
+#[test]
+fn test_reset_with_summary_without_readme() {
+    let mut context = ContextWindow::new(10000);
+
+    // Add only the system prompt (no README)
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Add conversation without README
+    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
+    context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
+
+    // Reset with summary
+    let summary = "Greeted the user.".to_string();
+    context.reset_with_summary(summary, None);
+
+    // Verify the first message is still the system prompt
+    let first_message = &context.conversation_history[0];
+    assert!(
+        first_message.content.contains("You are G3"),
+        "First message should be the system prompt"
+    );
+
+    // Verify we have system prompt + summary (no README)
+    // The second message should be the summary, not a README
+    let second_message = &context.conversation_history[1];
+    assert!(
+        second_message.content.contains("Previous conversation summary"),
+        "Second message should be the summary when no README exists"
+    );
+}
+
+/// Test that reset_with_summary handles Agent Configuration in addition to README
+#[test]
+fn test_reset_with_summary_preserves_agent_configuration() {
+    let mut context = ContextWindow::new(10000);
+
+    // Add the system prompt as the first message
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Add Agent Configuration as second system message
+    let agents_content = "# Agent Configuration\n\nSpecial instructions for this project.";
+    context.add_message(Message::new(MessageRole::System, agents_content.to_string()));
+
+    // Add some conversation history
+    context.add_message(Message::new(MessageRole::User, "Task: Do something".to_string()));
+
+    // Reset with summary
+    let summary = "Did something.".to_string();
+    context.reset_with_summary(summary, None);
+
+    // Verify the Agent Configuration was preserved
+    let second_message = &context.conversation_history[1];
+    assert!(
+        second_message.content.contains("Agent Configuration"),
+        "Second message should be the Agent Configuration"
+    );
+}
--- a/crates/g3-core/tests/test_system_message_loading.rs
+++ b/crates/g3-core/tests/test_system_message_loading.rs
@@ -0,0 +1,263 @@
+//! Tests for verifying system message loading with README content
+//!
+//! This test verifies that when a README is present, the system message
+//! is correctly loaded and structured in the context window.
+
+use g3_core::ContextWindow;
+use g3_providers::{Message, MessageRole};
+
+/// Test that system prompt is always the first message
+#[test]
+fn test_system_prompt_is_first_message() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization: system prompt first
+    let system_prompt = "You are G3, an AI programming agent of the same skill level...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Verify the first message is the system prompt
+    assert!(!context.conversation_history.is_empty());
+    let first_message = &context.conversation_history[0];
+    assert!(
+        matches!(first_message.role, MessageRole::System),
+        "First message should be a System message"
+    );
+    assert!(
+        first_message.content.contains("You are G3"),
+        "First message should contain the system prompt"
+    );
+}
+
+/// Test that README is added as the second system message after the system prompt
+#[test]
+fn test_readme_is_second_message_after_system_prompt() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization: system prompt first
+    let system_prompt = "You are G3, an AI programming agent of the same skill level...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Add README as second system message (simulating what Agent::new_with_readme does)
+    let readme_content = "📚 Project README (from README.md):\n\n# My Project\n\nThis is a test project.";
+    context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
+
+    // Verify we have 2 messages
+    assert_eq!(context.conversation_history.len(), 2);
+
+    // Verify the first message is the system prompt
+    let first_message = &context.conversation_history[0];
+    assert!(
+        matches!(first_message.role, MessageRole::System),
+        "First message should be a System message"
+    );
+    assert!(
+        first_message.content.contains("You are G3"),
+        "First message should contain the system prompt"
+    );
+
+    // Verify the second message is the README
+    let second_message = &context.conversation_history[1];
+    assert!(
+        matches!(second_message.role, MessageRole::System),
+        "Second message should be a System message"
+    );
+    assert!(
+        second_message.content.contains("Project README"),
+        "Second message should contain the README content"
+    );
+    assert!(
+        second_message.content.contains("My Project"),
+        "Second message should contain the actual README content"
+    );
+}
+
+/// Test that system prompt and README are separate messages (not combined)
+#[test]
+fn test_system_prompt_and_readme_are_separate() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    let readme_content = "📚 Project README (from README.md):\n\n# Test Project";
+    context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
+
+    // Verify they are separate messages
+    assert_eq!(context.conversation_history.len(), 2);
+
+    // First message should NOT contain README
+    let first_message = &context.conversation_history[0];
+    assert!(
+        !first_message.content.contains("Project README"),
+        "System prompt should not contain README content"
+    );
+
+    // Second message should NOT contain system prompt
+    let second_message = &context.conversation_history[1];
+    assert!(
+        !second_message.content.contains("You are G3"),
+        "README message should not contain system prompt"
+    );
+}
+
+/// Test that TODO is added as third message after system prompt and README
+#[test]
+fn test_todo_is_third_message_after_readme() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization order:
+    // 1. System prompt
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // 2. README
+    let readme_content = "📚 Project README (from README.md):\n\n# Test Project";
+    context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
+
+    // 3. TODO (if present)
+    let todo_content = "📋 Existing TODO list (from todo.g3.md):\n\n- [ ] Task 1\n- [x] Task 2";
+    context.add_message(Message::new(MessageRole::System, todo_content.to_string()));
+
+    // Verify we have 3 messages
+    assert_eq!(context.conversation_history.len(), 3);
+
+    // Verify order
+    assert!(
+        context.conversation_history[0].content.contains("You are G3"),
+        "First message should be system prompt"
+    );
+    assert!(
+        context.conversation_history[1].content.contains("Project README"),
+        "Second message should be README"
+    );
+    assert!(
+        context.conversation_history[2].content.contains("TODO list"),
+        "Third message should be TODO"
+    );
+}
+
+/// Test that AGENTS.md content is combined with README in the same message
+#[test]
+fn test_agents_and_readme_combined() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Combined AGENTS.md and README.md content (as done in g3-cli)
+    let combined_content = "# Agent Configuration\n\nSpecial instructions.\n\n# Project README\n\nProject description.";
+    context.add_message(Message::new(MessageRole::System, combined_content.to_string()));
+
+    // Verify we have 2 messages
+    assert_eq!(context.conversation_history.len(), 2);
+
+    // Verify the second message contains both AGENTS and README
+    let second_message = &context.conversation_history[1];
+    assert!(
+        second_message.content.contains("Agent Configuration"),
+        "Combined message should contain AGENTS.md content"
+    );
+    assert!(
+        second_message.content.contains("Project README"),
+        "Combined message should contain README content"
+    );
+}
+
+/// Test that user messages come after system messages
+#[test]
+fn test_user_messages_after_system_messages() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    let readme_content = "📚 Project README (from README.md):\n\n# Test Project";
+    context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
+
+    // Add user message
+    let user_message = "Please help me with this task.";
+    context.add_message(Message::new(MessageRole::User, user_message.to_string()));
+
+    // Verify order
+    assert_eq!(context.conversation_history.len(), 3);
+    assert!(matches!(context.conversation_history[0].role, MessageRole::System));
+    assert!(matches!(context.conversation_history[1].role, MessageRole::System));
+    assert!(matches!(context.conversation_history[2].role, MessageRole::User));
+}
+
+/// Test that empty README content is not added
+#[test]
+fn test_empty_readme_not_added() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Try to add empty README (should be skipped due to empty content check)
+    let empty_readme = "   "; // whitespace only
+    context.add_message(Message::new(MessageRole::System, empty_readme.to_string()));
+
+    // Verify only system prompt was added (empty message should be skipped)
+    assert_eq!(
+        context.conversation_history.len(),
+        1,
+        "Empty README should not be added to conversation history"
+    );
+}
+
+/// Test the reload_readme detection logic
+#[test]
+fn test_readme_detection_for_reload() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Add README with expected markers
+    let readme_content = "# Project README\n\nThis is the project description.";
+    context.add_message(Message::new(MessageRole::System, readme_content.to_string()));
+
+    // Check if the second message (index 1) is a README
+    let has_readme = context
+        .conversation_history
+        .get(1)
+        .map(|m| {
+            matches!(m.role, MessageRole::System)
+                && (m.content.contains("Project README")
+                    || m.content.contains("Agent Configuration"))
+        })
+        .unwrap_or(false);
+
+    assert!(has_readme, "Should detect README at index 1");
+}
+
+/// Test that README detection fails when no README is present
+#[test]
+fn test_readme_detection_without_readme() {
+    let mut context = ContextWindow::new(10000);
+
+    // Simulate agent initialization without README
+    let system_prompt = "You are G3, an AI programming agent...";
+    context.add_message(Message::new(MessageRole::System, system_prompt.to_string()));
+
+    // Add a user message directly (no README)
+    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
+
+    // Check if the second message (index 1) is a README
+    let has_readme = context
+        .conversation_history
+        .get(1)
+        .map(|m| {
+            matches!(m.role, MessageRole::System)
+                && (m.content.contains("Project README")
+                    || m.content.contains("Agent Configuration"))
+        })
+        .unwrap_or(false);
+
+    assert!(!has_readme, "Should not detect README when none exists");
+}
--- a/crates/g3-core/tests/test_todo_completion.rs
+++ b/crates/g3-core/tests/test_todo_completion.rs
@@ -0,0 +1,78 @@
+//! Tests for TODO completion detection and file deletion behavior
+
+/// Helper to check if all TODOs are complete (same logic as in lib.rs)
+fn all_todos_complete(content: &str) -> bool {
+    let has_incomplete = content.lines().any(|line| {
+        let trimmed = line.trim();
+        trimmed.starts_with("- [ ]")
+    });
+    
+    !has_incomplete && (content.contains("- [x]") || content.contains("- [X]"))
+}
+
+#[test]
+fn test_all_complete_lowercase() {
+    let content = "# Test\n\n- [x] Done 1\n- [x] Done 2";
+    assert!(all_todos_complete(content));
+}
+
+#[test]
+fn test_all_complete_uppercase() {
+    let content = "# Test\n\n- [X] Done 1\n- [X] Done 2";
+    assert!(all_todos_complete(content));
+}
+
+#[test]
+fn test_all_complete_mixed_case() {
+    let content = "# Test\n\n- [x] Done 1\n- [X] Done 2";
+    assert!(all_todos_complete(content));
+}
+
+#[test]
+fn test_has_incomplete() {
+    let content = "# Test\n\n- [x] Done 1\n- [ ] Not done";
+    assert!(!all_todos_complete(content));
+}
+
+#[test]
+fn test_all_incomplete() {
+    let content = "# Test\n\n- [ ] Not done 1\n- [ ] Not done 2";
+    assert!(!all_todos_complete(content));
+}
+
+#[test]
+fn test_no_checkboxes() {
+    let content = "# Just a header\n\nSome text without checkboxes";
+    assert!(!all_todos_complete(content));
+}
+
+#[test]
+fn test_nested_complete() {
+    let content = "# Test\n\n- [x] Parent\n  - [x] Child 1\n  - [x] Child 2";
+    assert!(all_todos_complete(content));
+}
+
+#[test]
+fn test_nested_incomplete() {
+    let content = "# Test\n\n- [x] Parent\n  - [x] Child 1\n  - [ ] Child 2";
+    assert!(!all_todos_complete(content));
+}
+
+#[test]
+fn test_indented_incomplete() {
+    // Indented incomplete items should still be detected
+    let content = "# Test\n\n- [x] Done\n    - [ ] Indented incomplete";
+    assert!(!all_todos_complete(content));
+}
+
+#[test]
+fn test_empty_content() {
+    let content = "";
+    assert!(!all_todos_complete(content));
+}
+
+#[test]
+fn test_whitespace_only() {
+    let content = "   \n\n   ";
+    assert!(!all_todos_complete(content));
+}
--- a/crates/g3-core/tests/test_todo_context_thinning.rs
+++ b/crates/g3-core/tests/test_todo_context_thinning.rs
@@ -6,37 +6,34 @@ use serial_test::serial;
 #[serial]
 fn test_todo_read_results_not_thinned() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add a todo_read tool call
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        r#"{"tool": "todo_read", "args": {}}"#.to_string(),
+    ));
+
    // Add a large TODO result (> 500 chars)
    let large_todo_result = format!(
        "Tool result: 📝 TODO list:\n{}",
        "- [ ] Task with long description\n".repeat(50)
    );
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_todo_result.clone(),
-    });
-    
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
+
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ))
    }
-    
+
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    println!("Thinning summary: {}", summary);
-    
+
    // Check that the TODO result was NOT thinned
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
@@ -62,38 +59,38 @@ fn test_todo_read_results_not_thinned() {
 #[serial]
 fn test_todo_write_results_not_thinned() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add a todo_write tool call
    let large_content = "- [ ] Task\n".repeat(100);
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        format!(
+            r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#,
+            large_content
+        ),
+    ));
+
    // Add a large TODO write result
    let large_todo_result = format!(
        "Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
        large_content.len()
    );
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_todo_result.clone(),
-    });
-    
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
+
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ))
    }
-    
+
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    println!("Thinning summary: {}", summary);
-    
+
    // Check that the TODO write result was NOT thinned
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
@@ -117,40 +114,37 @@ fn test_todo_write_results_not_thinned() {
 #[serial]
 fn test_non_todo_results_still_thinned() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add a non-TODO tool call (e.g., read_file)
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
+    ));
+
    // Add a large read_file result (> 500 chars)
    let large_result = format!("Tool result: {}", "x".repeat(1500));
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_result,
-    });
-    
+    context.add_message(Message::new(MessageRole::User, large_result));
+
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ))
    }
-    
+
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let (summary, _chars_saved) = context.thin_context();
-    
+
    println!("Thinning summary: {}", summary);
-    
+
    // Should have thinned the non-TODO result
    assert!(
        summary.contains("1 tool result") || summary.contains("chars saved"),
        "Non-TODO results should be thinned"
    );
-    
+
    // Check that the result was actually thinned
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
@@ -170,35 +164,29 @@ fn test_non_todo_results_still_thinned() {
 #[serial]
 fn test_todo_read_with_spaces_in_tool_name() {
    let mut context = ContextWindow::new(10000);
-    
+
    // Add a todo_read tool call with spaces (JSON formatting variation)
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
-    });
-    
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        r#"{"tool": "todo_read", "args": {}}"#.to_string(),
+    ));
+
    // Add a large TODO result
-    let large_todo_result = format!(
-        "Tool result: 📝 TODO list:\n{}",
-        "- [ ] Task\n".repeat(50)
-    );
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_todo_result.clone(),
-    });
-    
+    let large_todo_result = format!("Tool result: 📝 TODO list:\n{}", "- [ ] Task\n".repeat(50));
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
+
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ))
    }
-    
+
    // Trigger thinning
    context.used_tokens = 5000;
    let (_summary, _chars_saved) = context.thin_context();
-    
+
    // Verify TODO result was not thinned
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
--- a/crates/g3-core/tests/test_todo_persistence.rs
+++ b/crates/g3-core/tests/test_todo_persistence.rs
@@ -1,20 +1,19 @@
-use g3_core::Agent;
 use g3_core::ui_writer::NullUiWriter;
+use g3_core::Agent;
 use serial_test::serial;
 use std::fs;
 use std::path::PathBuf;
 use tempfile::TempDir;

-
 /// Helper to create a test agent in a temporary directory
 async fn create_test_agent_in_dir(temp_dir: &TempDir) -> Agent<NullUiWriter> {
    // Change to temp directory
    std::env::set_current_dir(temp_dir.path()).unwrap();
-    
+
    // Create a minimal config
    let config = g3_config::Config::default();
    let ui_writer = NullUiWriter;
-    
+
    Agent::new(config, ui_writer).await.unwrap()
 }

@@ -27,12 +26,12 @@ fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
 #[serial]
 async fn test_todo_write_creates_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Initially, todo.g3.md should not exist
    assert!(!todo_path.exists(), "todo.g3.md should not exist initially");
-    
+
    // Create a tool call to write TODO
    let tool_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
@@ -40,17 +39,21 @@ async fn test_todo_write_creates_file() {
            "content": "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3"
        }),
    };
-    
+
    // Execute the tool
    let result = agent.execute_tool(&tool_call).await.unwrap();
-    
+
    // Should report success
    assert!(result.contains("✅"), "Should report success: {}", result);
-    assert!(result.contains("todo.g3.md"), "Should mention todo.g3.md: {}", result);
-    
+    assert!(
+        result.contains("todo.g3.md"),
+        "Should mention todo.g3.md: {}",
+        result
+    );
+
    // File should now exist
    assert!(todo_path.exists(), "todo.g3.md should exist after write");
-    
+
    // File should contain the correct content
    let content = fs::read_to_string(&todo_path).unwrap();
    assert_eq!(content, "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3");
@@ -61,44 +64,56 @@ async fn test_todo_write_creates_file() {
 async fn test_todo_read_from_file() {
    let temp_dir = TempDir::new().unwrap();
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Pre-create a todo.g3.md file
    let test_content = "# My TODO\n\n- [ ] First task\n- [x] Completed task";
    fs::write(&todo_path, test_content).unwrap();
-    
+
    // Create agent (should load from file)
-    let agent = create_test_agent_in_dir(&temp_dir).await;
-    
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
+
    // Create a tool call to read TODO
    let tool_call = g3_core::ToolCall {
        tool: "todo_read".to_string(),
        args: serde_json::json!({}),
    };
-    
+
    // Execute the tool
    let result = agent.execute_tool(&tool_call).await.unwrap();
-    
+
    // Should contain the TODO content
-    assert!(result.contains("📝 TODO list:"), "Should have TODO list header: {}", result);
-    assert!(result.contains("First task"), "Should contain first task: {}", result);
-    assert!(result.contains("Completed task"), "Should contain completed task: {}", result);
+    assert!(
+        result.contains("📝 TODO list:"),
+        "Should have TODO list header: {}",
+        result
+    );
+    assert!(
+        result.contains("First task"),
+        "Should contain first task: {}",
+        result
+    );
+    assert!(
+        result.contains("Completed task"),
+        "Should contain completed task: {}",
+        result
+    );
 }

 #[tokio::test]
 #[serial]
 async fn test_todo_read_empty_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
-    
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
+
    // Create a tool call to read TODO (file doesn't exist)
    let tool_call = g3_core::ToolCall {
        tool: "todo_read".to_string(),
        args: serde_json::json!({}),
    };
-    
+
    // Execute the tool
    let result = agent.execute_tool(&tool_call).await.unwrap();
-    
+
    // Should report empty
    assert!(result.contains("empty"), "Should report empty: {}", result);
 }
@@ -108,10 +123,10 @@ async fn test_todo_read_empty_file() {
 async fn test_todo_persistence_across_agents() {
    let temp_dir = TempDir::new().unwrap();
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Agent 1: Write TODO
    {
-        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let mut agent = create_test_agent_in_dir(&temp_dir).await;
        let tool_call = g3_core::ToolCall {
            tool: "todo_write".to_string(),
            args: serde_json::json!({
@@ -120,22 +135,33 @@ async fn test_todo_persistence_across_agents() {
        };
        agent.execute_tool(&tool_call).await.unwrap();
    }
-    
+
    // Verify file exists
-    assert!(todo_path.exists(), "todo.g3.md should persist after agent drops");
-    
+    assert!(
+        todo_path.exists(),
+        "todo.g3.md should persist after agent drops"
+    );
+
    // Agent 2: Read TODO (new agent instance)
    {
-        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let mut agent = create_test_agent_in_dir(&temp_dir).await;
        let tool_call = g3_core::ToolCall {
            tool: "todo_read".to_string(),
            args: serde_json::json!({}),
        };
        let result = agent.execute_tool(&tool_call).await.unwrap();
-        
+
        // Should read the persisted content
-        assert!(result.contains("Persistent task"), "Should read persisted task: {}", result);
-        assert!(result.contains("Done task"), "Should read done task: {}", result);
+        assert!(
+            result.contains("Persistent task"),
+            "Should read persisted task: {}",
+            result
+        );
+        assert!(
+            result.contains("Done task"),
+            "Should read done task: {}",
+            result
+        );
    }
 }

@@ -143,9 +169,9 @@ async fn test_todo_persistence_across_agents() {
 #[serial]
 async fn test_todo_update_preserves_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Write initial TODO
    let write_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
@@ -154,7 +180,7 @@ async fn test_todo_update_preserves_file() {
        }),
    };
    agent.execute_tool(&write_call).await.unwrap();
-    
+
    // Update TODO
    let update_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
@@ -163,7 +189,7 @@ async fn test_todo_update_preserves_file() {
        }),
    };
    agent.execute_tool(&update_call).await.unwrap();
-    
+
    // Verify file has updated content
    let content = fs::read_to_string(&todo_path).unwrap();
    assert_eq!(content, "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3");
@@ -173,25 +199,32 @@ async fn test_todo_update_preserves_file() {
 #[serial]
 async fn test_todo_handles_large_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Create a large TODO (but under the 50k limit)
    let mut large_content = String::from("# Large TODO\n\n");
    for i in 0..100 {
-        large_content.push_str(&format!("- [ ] Task {} with a long description that exceeds normal line lengths\n", i));
+        large_content.push_str(&format!(
+            "- [ ] Task {} with a long description that exceeds normal line lengths\n",
+            i
+        ));
    }
-    
+
    let tool_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
        args: serde_json::json!({
            "content": large_content
        }),
    };
-    
+
    let result = agent.execute_tool(&tool_call).await.unwrap();
-    assert!(result.contains("✅"), "Should handle large content: {}", result);
-    
+    assert!(
+        result.contains("✅"),
+        "Should handle large content: {}",
+        result
+    );
+
    // Verify file contains all content
    let file_content = fs::read_to_string(&todo_path).unwrap();
    assert_eq!(file_content, large_content);
@@ -202,23 +235,31 @@ async fn test_todo_handles_large_content() {
 #[serial]
 async fn test_todo_respects_size_limit() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
-    
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
+
    // Create content that exceeds the default 50k limit
    let huge_content = "x".repeat(60_000);
-    
+
    let tool_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
        args: serde_json::json!({
            "content": huge_content
        }),
    };
-    
+
    let result = agent.execute_tool(&tool_call).await.unwrap();
-    
+
    // Should reject content that's too large
-    assert!(result.contains("❌"), "Should reject oversized content: {}", result);
-    assert!(result.contains("too large"), "Should mention size limit: {}", result);
+    assert!(
+        result.contains("❌"),
+        "Should reject oversized content: {}",
+        result
+    );
+    assert!(
+        result.contains("too large"),
+        "Should mention size limit: {}",
+        result
+    );
 }

 #[tokio::test]
@@ -226,66 +267,78 @@ async fn test_todo_respects_size_limit() {
 async fn test_todo_agent_initialization_loads_file() {
    let temp_dir = TempDir::new().unwrap();
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Pre-create todo.g3.md before agent initialization
    let initial_content = "- [ ] Pre-existing task";
    fs::write(&todo_path, initial_content).unwrap();
-    
+
    // Create agent - should load the file during initialization
-    let agent = create_test_agent_in_dir(&temp_dir).await;
-    
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
+
    // Read TODO - should return the pre-existing content
    let tool_call = g3_core::ToolCall {
        tool: "todo_read".to_string(),
        args: serde_json::json!({}),
    };
-    
+
    let result = agent.execute_tool(&tool_call).await.unwrap();
-    assert!(result.contains("Pre-existing task"), "Should load file on init: {}", result);
+    assert!(
+        result.contains("Pre-existing task"),
+        "Should load file on init: {}",
+        result
+    );
 }

 #[tokio::test]
 #[serial]
 async fn test_todo_handles_unicode_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Create TODO with unicode characters
    let unicode_content = "- [ ] 日本語タスク\n- [ ] Émoji task 🚀\n- [x] Ελληνικά task";
-    
+
    let tool_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
        args: serde_json::json!({
            "content": unicode_content
        }),
    };
-    
+
    agent.execute_tool(&tool_call).await.unwrap();
-    
+
    // Verify file preserves unicode
    let file_content = fs::read_to_string(&todo_path).unwrap();
    assert_eq!(file_content, unicode_content);
-    
+
    // Verify reading back works
    let read_call = g3_core::ToolCall {
        tool: "todo_read".to_string(),
        args: serde_json::json!({}),
    };
-    
+
    let result = agent.execute_tool(&read_call).await.unwrap();
-    assert!(result.contains("日本語"), "Should preserve Japanese: {}", result);
+    assert!(
+        result.contains("日本語"),
+        "Should preserve Japanese: {}",
+        result
+    );
    assert!(result.contains("🚀"), "Should preserve emoji: {}", result);
-    assert!(result.contains("Ελληνικά"), "Should preserve Greek: {}", result);
+    assert!(
+        result.contains("Ελληνικά"),
+        "Should preserve Greek: {}",
+        result
+    );
 }

 #[tokio::test]
 #[serial]
 async fn test_todo_empty_content_creates_empty_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
-    
+
    // Write empty TODO
    let tool_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
@@ -293,9 +346,9 @@ async fn test_todo_empty_content_creates_empty_file() {
            "content": ""
        }),
    };
-    
+
    agent.execute_tool(&tool_call).await.unwrap();
-    
+
    // File should exist but be empty
    assert!(todo_path.exists(), "Empty todo.g3.md should create file");
    let content = fs::read_to_string(&todo_path).unwrap();
@@ -306,8 +359,8 @@ async fn test_todo_empty_content_creates_empty_file() {
 #[serial]
 async fn test_todo_whitespace_only_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
-    
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
+
    // Write whitespace-only TODO
    let tool_call = g3_core::ToolCall {
        tool: "todo_write".to_string(),
@@ -315,17 +368,21 @@ async fn test_todo_whitespace_only_content() {
            "content": "   \n\n  \t  \n"
        }),
    };
-    
+
    agent.execute_tool(&tool_call).await.unwrap();
-    
+
    // Read it back
    let read_call = g3_core::ToolCall {
        tool: "todo_read".to_string(),
        args: serde_json::json!({}),
    };
-    
+
    let result = agent.execute_tool(&read_call).await.unwrap();
-    
+
    // Should report as empty (whitespace is trimmed)
-    assert!(result.contains("empty"), "Whitespace-only should be empty: {}", result);
+    assert!(
+        result.contains("empty"),
+        "Whitespace-only should be empty: {}",
+        result
+    );
 }
--- a/crates/g3-core/tests/test_token_counting.rs
+++ b/crates/g3-core/tests/test_token_counting.rs
@@ -1,94 +1,170 @@
 use g3_core::ContextWindow;
-use g3_providers::Usage;
+use g3_providers::{Message, MessageRole, Usage};

+/// Test that used_tokens is tracked via add_message, not update_usage_from_response.
+/// This is critical for the 80% summarization threshold to work correctly.
 #[test]
-fn test_token_accumulation() {
+fn test_used_tokens_tracked_via_messages() {
    let mut window = ContextWindow::new(10000);
+
+    // Add a user message - this should update used_tokens
+    let user_msg = Message::new(MessageRole::User, "Hello, how are you?".to_string());
+    window.add_message(user_msg);
    
-    // First API call: 100 prompt + 50 completion = 150 total
-    let usage1 = Usage {
+    // used_tokens should be non-zero after adding a message
+    assert!(window.used_tokens > 0, "used_tokens should increase after add_message");
+    let tokens_after_user_msg = window.used_tokens;
+
+    // Add an assistant message
+    let assistant_msg = Message::new(MessageRole::Assistant, "I'm doing well, thank you!".to_string());
+    window.add_message(assistant_msg);
+    
+    // used_tokens should increase further
+    assert!(window.used_tokens > tokens_after_user_msg, "used_tokens should increase after adding assistant message");
+}
+
+/// Test that update_usage_from_response only updates cumulative_tokens, not used_tokens.
+/// This prevents double-counting which was causing the 80% threshold to be reached at 200%+.
+#[test]
+fn test_update_usage_only_affects_cumulative() {
+    let mut window = ContextWindow::new(10000);
+
+    // Initial state
+    assert_eq!(window.used_tokens, 0);
+    assert_eq!(window.cumulative_tokens, 0);
+
+    // Simulate API response with usage data
+    let usage = Usage {
        prompt_tokens: 100,
        completion_tokens: 50,
        total_tokens: 150,
    };
-    window.update_usage_from_response(&usage1);
-    assert_eq!(window.used_tokens, 150, "First call should have 150 tokens");
-    assert_eq!(window.cumulative_tokens, 150, "Cumulative should be 150");
+    window.update_usage_from_response(&usage);

-    // Second API call: 200 prompt + 75 completion = 275 total
+    // used_tokens should NOT change - it's tracked via add_message
+    assert_eq!(window.used_tokens, 0, "used_tokens should not be updated by update_usage_from_response");
+    
+    // cumulative_tokens SHOULD be updated for API usage tracking
+    assert_eq!(window.cumulative_tokens, 150, "cumulative_tokens should track total API usage");
+
+    // Another API call
    let usage2 = Usage {
        prompt_tokens: 200,
        completion_tokens: 75,
        total_tokens: 275,
    };
    window.update_usage_from_response(&usage2);
-    assert_eq!(window.used_tokens, 425, "Second call should accumulate to 425 tokens");
-    assert_eq!(window.cumulative_tokens, 425, "Cumulative should be 425");

-    // Third API call with SMALLER token count: 50 prompt + 25 completion = 75 total
-    let usage3 = Usage {
-        prompt_tokens: 50,
-        completion_tokens: 25,
-        total_tokens: 75,
-    };
-    window.update_usage_from_response(&usage3);
-    assert_eq!(window.used_tokens, 500, "Third call should accumulate to 500 tokens");
-    assert_eq!(window.cumulative_tokens, 500, "Cumulative should be 500");
+    // used_tokens still unchanged
+    assert_eq!(window.used_tokens, 0, "used_tokens should remain unchanged");
    
-    // Verify tokens never decrease
-    assert!(window.used_tokens >= 425, "Token count should never decrease!");
+    // cumulative_tokens accumulates
+    assert_eq!(window.cumulative_tokens, 425, "cumulative_tokens should accumulate");
 }

+/// Test that add_streaming_tokens only updates cumulative_tokens.
+/// The assistant message will be added via add_message which tracks used_tokens.
 #[test]
-fn test_add_streaming_tokens() {
+fn test_add_streaming_tokens_only_affects_cumulative() {
    let mut window = ContextWindow::new(10000);
-    
-    // Add some streaming tokens
+
+    // Add streaming tokens (fallback when no usage data available)
    window.add_streaming_tokens(100);
-    assert_eq!(window.used_tokens, 100);
-    assert_eq!(window.cumulative_tokens, 100);
    
-    // Add more
+    // used_tokens should NOT change
+    assert_eq!(window.used_tokens, 0, "used_tokens should not be updated by add_streaming_tokens");
+    
+    // cumulative_tokens SHOULD be updated
+    assert_eq!(window.cumulative_tokens, 100, "cumulative_tokens should be updated");
+
+    // Add more streaming tokens
    window.add_streaming_tokens(50);
-    assert_eq!(window.used_tokens, 150);
+    assert_eq!(window.used_tokens, 0);
    assert_eq!(window.cumulative_tokens, 150);
-    
-    // Now update from provider response
-    let usage = Usage {
-        prompt_tokens: 80,
-        completion_tokens: 40,
-        total_tokens: 120,
-    };
-    window.update_usage_from_response(&usage);
-    
-    // Should ADD to existing, not replace
-    assert_eq!(window.used_tokens, 270, "Should add 120 to existing 150");
-    assert_eq!(window.cumulative_tokens, 270);
 }

+/// Test percentage calculation is based on used_tokens (actual context content).
 #[test]
-fn test_percentage_calculation() {
+fn test_percentage_based_on_used_tokens() {
    let mut window = ContextWindow::new(1000);
+
+    // Initially 0%
+    assert_eq!(window.percentage_used(), 0.0);
+    assert_eq!(window.remaining_tokens(), 1000);
+
+    // Add messages to increase used_tokens
+    // A message with ~100 chars should be roughly 25-30 tokens
+    let msg = Message::new(MessageRole::User, "x".repeat(400)); // ~100 tokens estimated
+    window.add_message(msg);
    
-    // Add tokens via provider response
+    // Percentage should be based on used_tokens
+    let percentage = window.percentage_used();
+    assert!(percentage > 0.0, "percentage should be > 0 after adding message");
+    assert!(percentage < 100.0, "percentage should be < 100");
+    
+    // remaining_tokens should decrease
+    assert!(window.remaining_tokens() < 1000, "remaining tokens should decrease");
+}
+
+/// Test that the 80% summarization threshold works correctly.
+/// This was the original bug - used_tokens was being double/triple counted.
+#[test]
+fn test_should_summarize_threshold() {
+    let mut window = ContextWindow::new(1000);
+
+    // Add messages until we approach 80%
+    // Each message of ~320 chars is roughly 80 tokens (at 4 chars/token)
+    for _ in 0..9 {
+        let msg = Message::new(MessageRole::User, "x".repeat(320));
+        window.add_message(msg);
+    }
+
+    // Should be around 720 tokens (72%) - not yet at threshold
+    // Note: actual token count depends on estimation algorithm
+    let percentage = window.percentage_used();
+    println!("After 9 messages: {}% used ({} tokens)", percentage, window.used_tokens);
+
+    // Add one more message to push over 80%
+    let msg = Message::new(MessageRole::User, "x".repeat(320));
+    window.add_message(msg);
+    
+    let percentage_after = window.percentage_used();
+    println!("After 10 messages: {}% used ({} tokens)", percentage_after, window.used_tokens);
+
+    // Now should_summarize should return true if we're at 80%+
+    if percentage_after >= 80.0 {
+        assert!(window.should_summarize(), "should_summarize should be true at 80%+");
+    }
+}
+
+/// Test that cumulative_tokens and used_tokens are independent.
+#[test]
+fn test_cumulative_vs_used_independence() {
+    let mut window = ContextWindow::new(10000);
+
+    // Add a message (affects used_tokens)
+    let msg = Message::new(MessageRole::User, "Hello world".to_string());
+    window.add_message(msg);
+    let used_after_msg = window.used_tokens;
+    let cumulative_after_msg = window.cumulative_tokens;
+    
+    // Both should be equal at this point (message adds to both)
+    assert_eq!(used_after_msg, cumulative_after_msg);
+
+    // Now simulate API response (only affects cumulative_tokens)
    let usage = Usage {
-        prompt_tokens: 150,
-        completion_tokens: 100,
-        total_tokens: 250,
+        prompt_tokens: 500,
+        completion_tokens: 200,
+        total_tokens: 700,
    };
    window.update_usage_from_response(&usage);
+
+    // used_tokens unchanged
+    assert_eq!(window.used_tokens, used_after_msg, "used_tokens should not change from API response");
    
-    assert_eq!(window.percentage_used(), 25.0);
-    assert_eq!(window.remaining_tokens(), 750);
+    // cumulative_tokens increased
+    assert_eq!(window.cumulative_tokens, cumulative_after_msg + 700, "cumulative_tokens should increase");
    
-    // Add more tokens
-    let usage2 = Usage {
-        prompt_tokens: 300,
-        completion_tokens: 200,
-        total_tokens: 500,
-    };
-    window.update_usage_from_response(&usage2);
-    
-    assert_eq!(window.percentage_used(), 75.0);
-    assert_eq!(window.remaining_tokens(), 250);
+    // They should now be different
+    assert!(window.cumulative_tokens > window.used_tokens, "cumulative should be greater than used");
 }
--- a/crates/g3-core/tests/todo_staleness_test.rs
+++ b/crates/g3-core/tests/todo_staleness_test.rs
@@ -0,0 +1,219 @@
+use g3_config::Config;
+use g3_core::ui_writer::UiWriter;
+use g3_core::{Agent, ToolCall};
+use serial_test::serial;
+use std::sync::{Arc, Mutex};
+use tempfile::TempDir;
+
+// Mock UI Writer for testing
+#[derive(Clone)]
+struct MockUiWriter {
+    output: Arc<Mutex<Vec<String>>>,
+    prompt_responses: Arc<Mutex<Vec<bool>>>,
+    choice_responses: Arc<Mutex<Vec<usize>>>,
+}
+
+impl MockUiWriter {
+    fn new() -> Self {
+        Self {
+            output: Arc::new(Mutex::new(Vec::new())),
+            prompt_responses: Arc::new(Mutex::new(Vec::new())),
+            choice_responses: Arc::new(Mutex::new(Vec::new())),
+        }
+    }
+
+    fn set_prompt_response(&self, response: bool) {
+        self.prompt_responses.lock().unwrap().push(response);
+    }
+
+    fn set_choice_response(&self, response: usize) {
+        self.choice_responses.lock().unwrap().push(response);
+    }
+
+    fn get_output(&self) -> Vec<String> {
+        self.output.lock().unwrap().clone()
+    }
+}
+
+impl UiWriter for MockUiWriter {
+    fn print(&self, message: &str) {
+        self.output.lock().unwrap().push(message.to_string());
+    }
+    fn println(&self, message: &str) {
+        self.output.lock().unwrap().push(message.to_string());
+    }
+    fn print_inline(&self, message: &str) {
+        self.output.lock().unwrap().push(message.to_string());
+    }
+    fn print_system_prompt(&self, _prompt: &str) {}
+    fn print_context_status(&self, message: &str) {
+        self.output
+            .lock()
+            .unwrap()
+            .push(format!("STATUS: {}", message));
+    }
+    fn print_context_thinning(&self, _message: &str) {}
+    fn print_tool_header(&self, _tool_name: &str) {}
+    fn print_tool_arg(&self, _key: &str, _value: &str) {}
+    fn print_tool_output_header(&self) {}
+    fn update_tool_output_line(&self, _line: &str) {}
+    fn print_tool_output_line(&self, _line: &str) {}
+    fn print_tool_output_summary(&self, _hidden_count: usize) {}
+    fn print_tool_timing(&self, _duration_str: &str) {}
+    fn print_agent_prompt(&self) {}
+    fn print_agent_response(&self, _content: &str) {}
+    fn notify_sse_received(&self) {}
+    fn flush(&self) {}
+    fn wants_full_output(&self) -> bool {
+        false
+    }
+    fn prompt_user_yes_no(&self, message: &str) -> bool {
+        self.output
+            .lock()
+            .unwrap()
+            .push(format!("PROMPT: {}", message));
+        self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
+    }
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
+        self.output
+            .lock()
+            .unwrap()
+            .push(format!("CHOICE: {} Options: {:?}", message, options));
+        self.choice_responses.lock().unwrap().pop().unwrap_or(0)
+    }
+    fn print_final_output(&self, summary: &str) {
+        self.output.lock().unwrap().push(format!("FINAL: {}", summary));
+    }
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_matching_sha() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha = "abc123hash";
+    let content = format!(
+        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
+        sha
+    );
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = true;
+
+    let ui_writer = MockUiWriter::new();
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("📝 TODO list:"));
+    assert!(!result.contains("⚠️ TODO list is stale"));
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_mismatch_sha_ignore() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha_file = "old_sha";
+    let sha_req = "new_sha";
+    let content = format!(
+        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
+        sha_file
+    );
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = true;
+
+    let ui_writer = MockUiWriter::new();
+    ui_writer.set_choice_response(0); // Ignore
+
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha_req.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("📝 TODO list:"));
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_mismatch_sha_mark_stale() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha_file = "old_sha";
+    let sha_req = "new_sha";
+    let content = format!(
+        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
+        sha_file
+    );
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = true;
+
+    let ui_writer = MockUiWriter::new();
+    ui_writer.set_choice_response(1); // Mark as Stale
+
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha_req.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("⚠️ TODO list is stale"));
+    assert!(result.contains("Please regenerate"));
+}
+
+// Note: We cannot easily test "Quit" (index 2) because it calls std::process::exit(0)
+// which would kill the test runner. We skip that test case here.
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_disabled() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha_file = "old_sha";
+    let sha_req = "new_sha";
+    let content = format!(
+        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
+        sha_file
+    );
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = false;
+
+    let ui_writer = MockUiWriter::new();
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha_req.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("📝 TODO list:"));
+}
--- a/crates/g3-ensembles/Cargo.toml
+++ b/crates/g3-ensembles/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "g3-ensembles"
+version = "0.1.0"
+edition = "2021"
+description = "Multi-agent ensemble functionality for G3"
+
+[dependencies]
+g3-core = { path = "../g3-core" }
+g3-config = { path = "../g3-config" }
+clap = { workspace = true }
+tokio = { workspace = true }
+anyhow = { workspace = true }
+tracing = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+chrono = { version = "0.4", features = ["serde"] }
+uuid = { workspace = true }
+
+[dev-dependencies]
+tempfile = "3.8"
--- a/crates/g3-ensembles/TESTING.md
+++ b/crates/g3-ensembles/TESTING.md
@@ -0,0 +1,422 @@
+# G3 Ensembles Testing Documentation
+
+This document describes the comprehensive test suite for the g3-ensembles crate (Flock Mode).
+
+## Test Coverage
+
+### Unit Tests (`src/tests.rs`)
+
+Unit tests cover the core data structures and logic:
+
+#### Status Module Tests
+
+1. **`test_segment_state_display`**
+   - Verifies that `SegmentState` enum displays correctly with emojis
+   - Tests all states: Pending, Running, Completed, Failed, Cancelled
+
+2. **`test_flock_status_creation`**
+   - Tests creation of `FlockStatus` with correct initial values
+   - Verifies session ID, segment count, and zero metrics
+
+3. **`test_segment_status_update`**
+   - Tests updating a single segment's status
+   - Verifies metrics are correctly aggregated
+
+4. **`test_multiple_segment_updates`**
+   - Tests updating multiple segments
+   - Verifies aggregate metrics (tokens, tool calls, errors) are summed correctly
+
+5. **`test_is_complete`**
+   - Tests the completion detection logic
+   - Verifies that flock is only complete when all segments are in terminal states
+   - Tests various scenarios: no segments, partial completion, full completion
+
+6. **`test_count_by_state`**
+   - Tests counting segments by their state
+   - Verifies correct counts for each state type
+
+7. **`test_status_serialization`**
+   - Tests JSON serialization and deserialization
+   - Verifies round-trip conversion preserves all data
+
+8. **`test_report_generation`**
+   - Tests the comprehensive report generation
+   - Verifies all expected sections are present
+   - Checks that metrics are correctly displayed
+
+**Run unit tests:**
+```bash
+cargo test -p g3-ensembles --lib
+```
+
+### Integration Tests (`tests/integration_tests.rs`)
+
+Integration tests verify end-to-end functionality with real file system and git operations:
+
+#### Configuration Tests
+
+1. **`test_flock_config_validation`**
+   - Tests validation of project directory requirements
+   - Verifies error messages for:
+     - Non-existent directory
+     - Non-git repository
+     - Missing flock-requirements.md
+   - Verifies successful creation with valid inputs
+
+2. **`test_flock_config_builder`**
+   - Tests the builder pattern for `FlockConfig`
+   - Verifies `with_max_turns()` and `with_g3_binary()` methods
+
+3. **`test_workspace_creation`**
+   - Tests creation of `FlockMode` instance
+   - Verifies project structure is valid
+
+#### Git Operations Tests
+
+4. **`test_git_clone_functionality`**
+   - Tests git cloning of project repository
+   - Verifies cloned repository structure:
+     - `.git` directory exists
+     - All files are present
+     - Git history is preserved
+
+5. **`test_multiple_segment_clones`**
+   - Tests cloning multiple segments (2 segments)
+   - Verifies each segment is independent
+   - Tests that modifications in one segment don't affect others
+
+6. **`test_git_repo_independence`**
+   - Comprehensive test of segment independence
+   - Creates commits in different segments
+   - Verifies git histories diverge correctly
+   - Ensures files in one segment don't appear in others
+
+#### Segment Management Tests
+
+7. **`test_segment_requirements_creation`**
+   - Tests creation of `segment-requirements.md` files
+   - Verifies content is written correctly
+
+8. **`test_requirements_file_content`**
+   - Tests the structure of flock-requirements.md
+   - Verifies content contains expected sections
+
+#### Status File Tests
+
+9. **`test_status_file_operations`**
+   - Tests saving and loading `flock-status.json`
+   - Verifies JSON serialization to file
+   - Tests deserialization from file
+
+#### JSON Processing Tests
+
+10. **`test_json_extraction`**
+    - Tests extraction of JSON arrays from text output
+    - Verifies handling of various formats:
+      - Plain JSON
+      - JSON in markdown code blocks
+      - JSON with surrounding text
+      - Invalid input (no JSON)
+
+11. **`test_partition_json_parsing`**
+    - Tests parsing of partition JSON structure
+    - Verifies module names, requirements, and dependencies are extracted correctly
+
+**Run integration tests:**
+```bash
+cargo test -p g3-ensembles --test integration_tests
+```
+
+### End-to-End Test Script (`scripts/test-flock-mode.sh`)
+
+A comprehensive bash script that tests the complete flock mode workflow:
+
+#### Test Scenarios
+
+1. **Project Creation**
+   - Creates a temporary test project
+   - Initializes git repository
+   - Creates flock-requirements.md with realistic content
+   - Makes initial commit
+
+2. **Project Structure Validation**
+   - Verifies `.git` directory exists
+   - Verifies `flock-requirements.md` exists
+
+3. **Git Operations**
+   - Tests cloning project to segment directories
+   - Verifies cloned repositories are valid
+   - Tests git log to ensure history is preserved
+
+4. **Segment Independence**
+   - Creates two segments
+   - Modifies one segment
+   - Verifies other segment is unaffected
+
+5. **Segment Requirements**
+   - Creates `segment-requirements.md` in segments
+   - Verifies content is written correctly
+
+6. **Status File Operations**
+   - Creates `flock-status.json`
+   - Validates JSON structure (if `jq` is available)
+
+**Run end-to-end test:**
+```bash
+./scripts/test-flock-mode.sh
+```
+
+## Test Results
+
+### Current Status
+
+✅ **All tests passing**
+
+- **Unit tests**: 8/8 passed
+- **Integration tests**: 11/11 passed
+- **End-to-end test**: All scenarios passed
+
+### Test Execution Time
+
+- Unit tests: ~0.01s
+- Integration tests: ~0.35s (includes git operations)
+- End-to-end test: ~1-2s (includes cleanup)
+
+## Running All Tests
+
+### Run all tests for g3-ensembles:
+```bash
+cargo test -p g3-ensembles
+```
+
+### Run with verbose output:
+```bash
+cargo test -p g3-ensembles -- --nocapture
+```
+
+### Run specific test:
+```bash
+cargo test -p g3-ensembles test_git_clone_functionality
+```
+
+### Run tests with coverage (requires cargo-tarpaulin):
+```bash
+cargo tarpaulin -p g3-ensembles
+```
+
+## Test Helpers
+
+### `create_test_project(name: &str) -> TempDir`
+
+Helper function in integration tests that creates a complete test project:
+- Initializes git repository
+- Configures git user
+- Creates flock-requirements.md with two modules
+- Creates README.md
+- Makes initial commit
+- Returns `TempDir` that auto-cleans on drop
+
+**Usage:**
+```rust
+let project_dir = create_test_project("my-test");
+// Use project_dir.path() to access the directory
+// Automatically cleaned up when project_dir goes out of scope
+```
+
+### `extract_json_array(output: &str) -> Option<String>`
+
+Helper function that extracts JSON arrays from text output:
+- Finds first `[` and last `]`
+- Returns content between them
+- Returns `None` if no valid JSON array found
+
+## Test Data
+
+### Sample Requirements
+
+The test suite uses realistic requirements for a calculator project:
+
+**Module A: Core Library**
+- Arithmetic operations (add, sub, mul, div)
+- Error handling for division by zero
+- Unit tests
+- Documentation
+
+**Module B: CLI Application**
+- Command-line interface using clap
+- Subcommands for each operation
+- User-friendly output
+- Error handling
+
+This structure tests the partitioning logic with:
+- Clear module boundaries
+- Dependency relationship (CLI depends on Core)
+- Realistic implementation requirements
+
+## Continuous Integration
+
+To integrate these tests into CI/CD:
+
+### GitHub Actions Example
+
+```yaml
+name: Test G3 Ensembles
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+      - name: Run unit tests
+        run: cargo test -p g3-ensembles --lib
+      - name: Run integration tests
+        run: cargo test -p g3-ensembles --test integration_tests
+      - name: Run end-to-end test
+        run: ./scripts/test-flock-mode.sh
+```
+
+## Test Coverage Goals
+
+### Current Coverage
+
+- ✅ Status data structures: 100%
+- ✅ Configuration validation: 100%
+- ✅ Git operations: 100%
+- ✅ Segment independence: 100%
+- ✅ JSON processing: 100%
+- ⚠️  Full flock execution: Requires LLM access (tested manually)
+
+### Future Test Additions
+
+1. **Mock LLM Tests**
+   - Mock the partitioning agent response
+   - Test full flock workflow without real LLM calls
+
+2. **Performance Tests**
+   - Test with large numbers of segments (10+)
+   - Measure memory usage
+   - Test concurrent segment execution
+
+3. **Error Handling Tests**
+   - Test behavior when git operations fail
+   - Test behavior when segments fail
+   - Test recovery scenarios
+
+4. **Edge Cases**
+   - Empty requirements file
+   - Single segment (degenerate case)
+   - Very large requirements file
+   - Binary files in project
+
+## Debugging Tests
+
+### Enable debug logging:
+```bash
+RUST_LOG=debug cargo test -p g3-ensembles -- --nocapture
+```
+
+### Keep test artifacts:
+```bash
+# Modify test to not cleanup
+# Or inspect TEST_DIR before cleanup in end-to-end test
+export TEST_DIR=/tmp/my-test
+./scripts/test-flock-mode.sh
+ls -la $TEST_DIR
+```
+
+### Run single test with backtrace:
+```bash
+RUST_BACKTRACE=1 cargo test -p g3-ensembles test_git_clone_functionality -- --nocapture
+```
+
+## Contributing Tests
+
+When adding new features to g3-ensembles:
+
+1. **Add unit tests** for new data structures and logic
+2. **Add integration tests** for new file/git operations
+3. **Update end-to-end test** if workflow changes
+4. **Document tests** in this file
+5. **Ensure all tests pass** before submitting PR
+
+### Test Naming Convention
+
+- Unit tests: `test_<functionality>`
+- Integration tests: `test_<feature>_<scenario>`
+- Use descriptive names that explain what is being tested
+
+### Test Structure
+
+```rust
+#[test]
+fn test_feature_name() {
+    // Arrange: Set up test data
+    let data = create_test_data();
+    
+    // Act: Perform the operation
+    let result = perform_operation(data);
+    
+    // Assert: Verify the result
+    assert_eq!(result, expected_value);
+    assert!(result.is_ok());
+}
+```
+
+## Troubleshooting
+
+### Tests fail with "git not found"
+
+**Solution**: Install git:
+```bash
+# macOS
+brew install git
+
+# Ubuntu/Debian
+sudo apt-get install git
+
+# Windows
+choco install git
+```
+
+### Tests fail with permission errors
+
+**Solution**: Ensure test directories are writable:
+```bash
+chmod -R u+w /tmp
+```
+
+### Integration tests are slow
+
+**Cause**: Git operations and file I/O take time
+
+**Solution**: Run only unit tests for quick feedback:
+```bash
+cargo test -p g3-ensembles --lib
+```
+
+### Test artifacts not cleaned up
+
+**Cause**: Test panicked before cleanup
+
+**Solution**: Manually clean temp directories:
+```bash
+rm -rf /tmp/tmp.*
+```
+
+## Summary
+
+The g3-ensembles test suite provides comprehensive coverage of:
+- ✅ Core data structures and logic
+- ✅ Configuration validation
+- ✅ Git repository operations
+- ✅ Segment independence
+- ✅ Status tracking and reporting
+- ✅ JSON processing
+- ✅ End-to-end workflow
+
+All tests are automated, fast, and reliable. The test suite ensures that flock mode works correctly across different scenarios and edge cases.
--- a/crates/g3-ensembles/src/flock.rs
+++ b/crates/g3-ensembles/src/flock.rs
@@ -0,0 +1,966 @@
+//! Flock mode implementation - parallel multi-agent development
+
+use anyhow::{Context, Result};
+use chrono::Utc;
+use g3_config::Config;
+use std::path::{Path, PathBuf};
+use std::process::Stdio;
+use tokio::io::{AsyncBufReadExt, BufReader};
+use tokio::process::Command;
+use tracing::{debug, error, info, warn};
+use uuid::Uuid;
+
+use crate::status::{FlockStatus, SegmentState, SegmentStatus};
+
+/// Configuration for flock mode
+#[derive(Debug, Clone)]
+pub struct FlockConfig {
+    /// Project directory (must be a git repo with flock-requirements.md)
+    pub project_dir: PathBuf,
+
+    /// Flock workspace directory where segments will be created
+    pub flock_workspace: PathBuf,
+
+    /// Number of segments to partition work into
+    pub num_segments: usize,
+
+    /// Maximum turns per segment (for autonomous mode)
+    pub max_turns: usize,
+
+    /// G3 configuration to use for agents
+    pub g3_config: Config,
+
+    /// Path to g3 binary (defaults to current executable)
+    pub g3_binary: Option<PathBuf>,
+}
+
+impl FlockConfig {
+    /// Create a new flock configuration
+    pub fn new(
+        project_dir: PathBuf,
+        flock_workspace: PathBuf,
+        num_segments: usize,
+    ) -> Result<Self> {
+        // Validate project directory
+        if !project_dir.exists() {
+            anyhow::bail!(
+                "Project directory does not exist: {}",
+                project_dir.display()
+            );
+        }
+
+        // Check if it's a git repo
+        if !project_dir.join(".git").exists() {
+            anyhow::bail!(
+                "Project directory must be a git repository: {}",
+                project_dir.display()
+            );
+        }
+
+        // Check for flock-requirements.md
+        let requirements_path = project_dir.join("flock-requirements.md");
+        if !requirements_path.exists() {
+            anyhow::bail!(
+                "Project directory must contain flock-requirements.md: {}",
+                project_dir.display()
+            );
+        }
+
+        // Load default config
+        let g3_config = Config::load(None)?;
+
+        Ok(Self {
+            project_dir,
+            flock_workspace,
+            num_segments,
+            max_turns: 5, // Default
+            g3_config,
+            g3_binary: None,
+        })
+    }
+
+    /// Set maximum turns per segment
+    pub fn with_max_turns(mut self, max_turns: usize) -> Self {
+        self.max_turns = max_turns;
+        self
+    }
+
+    /// Set custom g3 binary path
+    pub fn with_g3_binary(mut self, binary: PathBuf) -> Self {
+        self.g3_binary = Some(binary);
+        self
+    }
+
+    /// Set custom g3 config
+    pub fn with_config(mut self, config: Config) -> Self {
+        self.g3_config = config;
+        self
+    }
+}
+
+/// Flock mode orchestrator
+pub struct FlockMode {
+    config: FlockConfig,
+    status: FlockStatus,
+    session_id: String,
+}
+
+impl FlockMode {
+    /// Create a new flock mode instance
+    pub fn new(config: FlockConfig) -> Result<Self> {
+        let session_id = Uuid::new_v4().to_string();
+
+        let status = FlockStatus::new(
+            session_id.clone(),
+            config.project_dir.clone(),
+            config.flock_workspace.clone(),
+            config.num_segments,
+        );
+
+        Ok(Self {
+            config,
+            status,
+            session_id,
+        })
+    }
+
+    /// Run flock mode
+    pub async fn run(&mut self) -> Result<()> {
+        info!(
+            "Starting flock mode with {} segments",
+            self.config.num_segments
+        );
+
+        // Step 1: Partition requirements
+        println!(
+            "\n🧠 Step 1: Partitioning requirements into {} segments...",
+            self.config.num_segments
+        );
+        let partitions = self.partition_requirements().await?;
+
+        // Step 2: Create segment workspaces
+        println!("\n📁 Step 2: Creating segment workspaces...");
+        self.create_segment_workspaces(&partitions).await?;
+
+        // Step 3: Run segments in parallel
+        println!(
+            "\n🚀 Step 3: Running {} segments in parallel...",
+            self.config.num_segments
+        );
+        self.run_segments_parallel().await?;
+
+        // Step 4: Generate final report
+        println!("\n📊 Step 4: Generating final report...");
+        self.status.completed_at = Some(Utc::now());
+        self.save_status()?;
+
+        let report = self.status.generate_report();
+        println!("{}", report);
+
+        Ok(())
+    }
+
+    /// Partition requirements using an AI agent
+    async fn partition_requirements(&mut self) -> Result<Vec<String>> {
+        let requirements_path = self.config.project_dir.join("flock-requirements.md");
+        let requirements_content = std::fs::read_to_string(&requirements_path)
+            .context("Failed to read flock-requirements.md")?;
+
+        // Create a temporary workspace for the partitioning agent
+        let partition_workspace = self.config.flock_workspace.join("_partition");
+        std::fs::create_dir_all(&partition_workspace)?;
+
+        // Create the partitioning prompt
+        let partition_prompt = format!(
+            "You are a software architect tasked with partitioning project requirements into {} logical, \
+            largely non-overlapping modules that can grow into separate architectural components \
+            (e.g., crates, services, or packages).\n\n\
+            REQUIREMENTS:\n{}\n\n\
+            INSTRUCTIONS:\n\
+            1. Analyze the requirements carefully\n\
+            2. Identify {} distinct architectural modules that:\n\
+               - Have minimal overlap and dependencies\n\
+               - Can be developed largely independently\n\
+               - Represent logical architectural boundaries\n\
+               - Could eventually become separate crates or services\n\
+            3. For each module, provide:\n\
+               - A clear module name\n\
+               - The specific requirements that belong to this module\n\
+               - Any dependencies on other modules\n\n\
+            4. Return your final partitioning exactly once, prefixed by the marker '{{PARTITION JSON}}' followed by a fenced code block that starts with \"```json\" and ends with \"```\". Place only the JSON array inside the fence.\n\
+            5. Use the final_output tool to provide your partitioning as a JSON array of objects, where each object has:\n\
+               - \"module_name\": string\n\
+               - \"requirements\": string (the requirements text for this module)\n\
+               - \"dependencies\": array of strings (names of other modules this depends on)\n\n\
+            Example format:\n\
+            {{{{PARTITION JSON}}}}\n\
+            ```json\n\
+            [\n\
+              {{\n\
+                \"module_name\": \"core-engine\",\n\
+                \"requirements\": \"Implement the core processing engine...\",\n\
+                \"dependencies\": []\n\
+              }},\n\
+              {{\n\
+                \"module_name\": \"api-server\",\n\
+                \"requirements\": \"Create REST API endpoints...\",\n\
+                \"dependencies\": [\"core-engine\"]\n\
+              }}\n\
+            ]\n\
+            ```\n\n\
+            Be thoughtful and strategic in your partitioning. The goal is to enable parallel development.",
+            self.config.num_segments,
+            requirements_content,
+            self.config.num_segments
+        );
+
+        // Get g3 binary path
+        let g3_binary = self.get_g3_binary()?;
+
+        // Run g3 in single-shot mode to partition requirements
+        println!("   Analyzing requirements and creating partitions...");
+        let output = Command::new(&g3_binary)
+            .arg("--workspace")
+            .arg(&partition_workspace)
+            .arg("--quiet") // Disable logging for partitioning agent
+            .arg(&partition_prompt)
+            .output()
+            .await
+            .context("Failed to run g3 for partitioning")?;
+
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            anyhow::bail!("Partitioning agent failed: {}", stderr);
+        }
+
+        let stdout = String::from_utf8_lossy(&output.stdout);
+        debug!("Partitioning agent output: {}", stdout);
+
+        // Extract JSON from the output
+        let partitions_json = Self::extract_json_from_output(&stdout)
+            .context("Failed to extract partition JSON from agent output")?;
+
+        // Parse the partitions
+        let partitions: Vec<serde_json::Value> =
+            serde_json::from_str(&partitions_json).context("Failed to parse partition JSON")?;
+
+        if partitions.len() != self.config.num_segments {
+            warn!(
+                "Expected {} partitions but got {}. Adjusting...",
+                self.config.num_segments,
+                partitions.len()
+            );
+        }
+
+        // Extract requirements text from each partition
+        let mut partition_texts = Vec::new();
+        for (i, partition) in partitions.iter().enumerate() {
+            let default_name = format!("module-{}", i + 1);
+            let module_name = partition["module_name"].as_str().unwrap_or(&default_name);
+            let requirements = partition["requirements"]
+                .as_str()
+                .context("Missing requirements field in partition")?;
+            let dependencies = partition["dependencies"]
+                .as_array()
+                .map(|arr| {
+                    arr.iter()
+                        .filter_map(|v| v.as_str())
+                        .collect::<Vec<_>>()
+                        .join(", ")
+                })
+                .unwrap_or_default();
+
+            let partition_text = format!(
+                "# Module: {}\n\n## Dependencies\n{}\n\n## Requirements\n\n{}",
+                module_name,
+                if dependencies.is_empty() {
+                    "None".to_string()
+                } else {
+                    dependencies
+                },
+                requirements
+            );
+
+            partition_texts.push(partition_text);
+            println!("   ✓ Created partition {}: {}", i + 1, module_name);
+        }
+
+        Ok(partition_texts)
+    }
+
+    /// Extract JSON from agent output (looks for JSON array in output)
+    fn extract_json_from_output(output: &str) -> Result<String> {
+        // Try to find all occurrences of partition markers and extract valid JSON
+        const MARKERS: &[&str] = &["{{PARTITION JSON}}", "{PARTITION JSON}"];
+
+        let mut candidates = Vec::new();
+
+        // Find all marker occurrences
+        for &marker in MARKERS {
+            let mut search_start = 0;
+            while let Some(marker_index) = output[search_start..].find(marker) {
+                let absolute_index = search_start + marker_index;
+                let after_marker = &output[absolute_index + marker.len()..];
+
+                // Try to find a code fence after this marker
+                if let Some(fence_start) = after_marker.find("```") {
+                    let after_fence = &after_marker[fence_start + 3..];
+
+                    // Skip optional "json" language identifier
+                    let content_start = after_fence
+                        .strip_prefix("json")
+                        .unwrap_or(after_fence)
+                        .trim_start_matches(|c: char| c.is_whitespace());
+
+                    // Find closing fence
+                    if let Some(fence_end) = content_start.find("```") {
+                        let json_candidate = content_start[..fence_end].trim();
+                        candidates.push(json_candidate.to_string());
+                    }
+                }
+
+                // Move search position forward
+                search_start = absolute_index + marker.len();
+            }
+        }
+
+        if candidates.is_empty() {
+            anyhow::bail!(
+                "Could not find any partition JSON markers with code fences in agent output"
+            );
+        }
+
+        // Try to parse each candidate and return the first valid JSON
+        let mut last_error = None;
+        for (i, candidate) in candidates.iter().enumerate() {
+            match serde_json::from_str::<serde_json::Value>(candidate) {
+                Ok(_) => {
+                    debug!(
+                        "Successfully parsed JSON from candidate {} of {}",
+                        i + 1,
+                        candidates.len()
+                    );
+                    return Ok(candidate.clone());
+                }
+                Err(e) => {
+                    debug!(
+                        "Failed to parse candidate {} of {}: {}",
+                        i + 1,
+                        candidates.len(),
+                        e
+                    );
+                    last_error = Some(e);
+                }
+            }
+        }
+
+        // If we get here, none of the candidates were valid JSON
+        if let Some(err) = last_error {
+            anyhow::bail!(
+                "Found {} JSON candidate(s) but none were valid JSON. Last error: {}",
+                candidates.len(),
+                err
+            );
+        }
+
+        anyhow::bail!("No valid JSON found in output")
+    }
+
+    /// Create segment workspaces by copying project directory
+    async fn create_segment_workspaces(&mut self, partitions: &[String]) -> Result<()> {
+        // Ensure flock workspace exists
+        std::fs::create_dir_all(&self.config.flock_workspace)?;
+
+        for (i, partition) in partitions.iter().enumerate() {
+            let segment_id = i + 1;
+            let segment_dir = self
+                .config
+                .flock_workspace
+                .join(format!("segment-{}", segment_id));
+
+            println!("   Creating segment {} workspace...", segment_id);
+
+            // Copy project directory to segment directory
+            self.copy_git_repo(&self.config.project_dir, &segment_dir)
+                .await
+                .context(format!("Failed to copy project to segment {}", segment_id))?;
+
+            // Write segment-requirements.md
+            let requirements_path = segment_dir.join("segment-requirements.md");
+            std::fs::write(&requirements_path, partition).context(format!(
+                "Failed to write requirements for segment {}",
+                segment_id
+            ))?;
+
+            println!(
+                "   ✓ Segment {} workspace ready at {}",
+                segment_id,
+                segment_dir.display()
+            );
+        }
+
+        Ok(())
+    }
+
+    /// Copy a git repository to a new location
+    async fn copy_git_repo(&self, source: &Path, dest: &Path) -> Result<()> {
+        // Use git clone for efficient copying
+        let output = Command::new("git")
+            .arg("clone")
+            .arg(source)
+            .arg(dest)
+            .output()
+            .await
+            .context("Failed to run git clone")?;
+
+        if !output.status.success() {
+            let stderr = String::from_utf8_lossy(&output.stderr);
+            anyhow::bail!("Git clone failed: {}", stderr);
+        }
+
+        Ok(())
+    }
+
+    /// Run all segments in parallel
+    async fn run_segments_parallel(&mut self) -> Result<()> {
+        let mut handles = Vec::new();
+
+        for segment_id in 1..=self.config.num_segments {
+            let segment_dir = self
+                .config
+                .flock_workspace
+                .join(format!("segment-{}", segment_id));
+            let max_turns = self.config.max_turns;
+            let g3_binary = self.get_g3_binary()?;
+            let status_file = self.get_status_file_path();
+            let session_id = self.session_id.clone();
+
+            // Initialize segment status
+            let segment_status = SegmentStatus {
+                segment_id,
+                workspace: segment_dir.clone(),
+                state: SegmentState::Running,
+                started_at: Utc::now(),
+                completed_at: None,
+                tokens_used: 0,
+                tool_calls: 0,
+                errors: 0,
+                current_turn: 0,
+                max_turns,
+                last_message: Some("Starting...".to_string()),
+                error_message: None,
+            };
+
+            self.status.update_segment(segment_id, segment_status);
+            self.save_status()?;
+
+            // Spawn a task for this segment
+            let handle = tokio::spawn(async move {
+                run_segment(
+                    segment_id,
+                    segment_dir,
+                    max_turns,
+                    g3_binary,
+                    status_file,
+                    session_id,
+                )
+                .await
+            });
+
+            handles.push((segment_id, handle));
+        }
+
+        // Wait for all segments to complete
+        for (segment_id, handle) in handles {
+            match handle.await {
+                Ok(Ok(final_status)) => {
+                    println!("\n✅ Segment {} completed", segment_id);
+                    self.status.update_segment(segment_id, final_status);
+                    self.save_status()?;
+                }
+                Ok(Err(e)) => {
+                    error!("Segment {} failed: {}", segment_id, e);
+                    let mut segment_status = self
+                        .status
+                        .segments
+                        .get(&segment_id)
+                        .cloned()
+                        .unwrap_or_else(|| SegmentStatus {
+                            segment_id,
+                            workspace: self
+                                .config
+                                .flock_workspace
+                                .join(format!("segment-{}", segment_id)),
+                            state: SegmentState::Failed,
+                            started_at: Utc::now(),
+                            completed_at: Some(Utc::now()),
+                            tokens_used: 0,
+                            tool_calls: 0,
+                            errors: 1,
+                            current_turn: 0,
+                            max_turns: self.config.max_turns,
+                            last_message: None,
+                            error_message: Some(e.to_string()),
+                        });
+                    segment_status.state = SegmentState::Failed;
+                    segment_status.completed_at = Some(Utc::now());
+                    segment_status.error_message = Some(e.to_string());
+                    segment_status.errors += 1;
+                    self.status.update_segment(segment_id, segment_status);
+                    self.save_status()?;
+                }
+                Err(e) => {
+                    error!("Segment {} task panicked: {}", segment_id, e);
+                    let mut segment_status = self
+                        .status
+                        .segments
+                        .get(&segment_id)
+                        .cloned()
+                        .unwrap_or_else(|| SegmentStatus {
+                            segment_id,
+                            workspace: self
+                                .config
+                                .flock_workspace
+                                .join(format!("segment-{}", segment_id)),
+                            state: SegmentState::Failed,
+                            started_at: Utc::now(),
+                            completed_at: Some(Utc::now()),
+                            tokens_used: 0,
+                            tool_calls: 0,
+                            errors: 1,
+                            current_turn: 0,
+                            max_turns: self.config.max_turns,
+                            last_message: None,
+                            error_message: Some(format!("Task panicked: {}", e)),
+                        });
+                    segment_status.state = SegmentState::Failed;
+                    segment_status.completed_at = Some(Utc::now());
+                    segment_status.error_message = Some(format!("Task panicked: {}", e));
+                    segment_status.errors += 1;
+                    self.status.update_segment(segment_id, segment_status);
+                    self.save_status()?;
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Get the g3 binary path
+    fn get_g3_binary(&self) -> Result<PathBuf> {
+        if let Some(ref binary) = self.config.g3_binary {
+            Ok(binary.clone())
+        } else {
+            // Use current executable
+            std::env::current_exe().context("Failed to get current executable path")
+        }
+    }
+
+    /// Get the status file path
+    fn get_status_file_path(&self) -> PathBuf {
+        self.config.flock_workspace.join("flock-status.json")
+    }
+
+    /// Save current status to file
+    fn save_status(&self) -> Result<()> {
+        let status_file = self.get_status_file_path();
+        self.status.save_to_file(&status_file)
+    }
+}
+
+/// Run a single segment worker
+async fn run_segment(
+    segment_id: usize,
+    segment_dir: PathBuf,
+    max_turns: usize,
+    g3_binary: PathBuf,
+    status_file: PathBuf,
+    session_id: String,
+) -> Result<SegmentStatus> {
+    info!(
+        "Starting segment {} in {}",
+        segment_id,
+        segment_dir.display()
+    );
+
+    let mut segment_status = SegmentStatus {
+        segment_id,
+        workspace: segment_dir.clone(),
+        state: SegmentState::Running,
+        started_at: Utc::now(),
+        completed_at: None,
+        tokens_used: 0,
+        tool_calls: 0,
+        errors: 0,
+        current_turn: 0,
+        max_turns,
+        last_message: Some("Starting autonomous mode...".to_string()),
+        error_message: None,
+    };
+
+    // Run g3 in autonomous mode with segment-requirements.md
+    let mut child = Command::new(&g3_binary)
+        .arg("--workspace")
+        .arg(&segment_dir)
+        .arg("--autonomous")
+        .arg("--max-turns")
+        .arg(max_turns.to_string())
+        .arg("--requirements")
+        .arg(std::fs::read_to_string(
+            segment_dir.join("segment-requirements.md"),
+        )?)
+        .arg("--quiet") // Disable session logging for workers
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .context("Failed to spawn g3 process")?;
+
+    // Stream output and update status
+    let stdout = child.stdout.take().context("Failed to get stdout")?;
+    let stderr = child.stderr.take().context("Failed to get stderr")?;
+
+    let stdout_reader = BufReader::new(stdout);
+    let stderr_reader = BufReader::new(stderr);
+
+    let mut stdout_lines = stdout_reader.lines();
+    let mut stderr_lines = stderr_reader.lines();
+
+    // Read output and update status
+    loop {
+        tokio::select! {
+            line = stdout_lines.next_line() => {
+                match line {
+                    Ok(Some(line)) => {
+                        println!("[Segment {}] {}", segment_id, line);
+
+                        // Parse output for status updates
+                        if line.contains("TURN") {
+                            // Extract turn number if possible
+                            if let Some(turn_str) = line.split("TURN").nth(1) {
+                                if let Ok(turn) = turn_str.trim().split('/').next().unwrap_or("0").parse::<usize>() {
+                                    segment_status.current_turn = turn;
+                                }
+                            }
+                        }
+
+                        segment_status.last_message = Some(line);
+                        update_status_file(&status_file, &session_id, segment_status.clone())?;
+                    }
+                    Ok(None) => break,
+                    Err(e) => {
+                        error!("Error reading stdout for segment {}: {}", segment_id, e);
+                        break;
+                    }
+                }
+            }
+            line = stderr_lines.next_line() => {
+                match line {
+                    Ok(Some(line)) => {
+                        eprintln!("[Segment {} ERROR] {}", segment_id, line);
+                        segment_status.errors += 1;
+                        update_status_file(&status_file, &session_id, segment_status.clone())?;
+                    }
+                    Ok(None) => break,
+                    Err(e) => {
+                        error!("Error reading stderr for segment {}: {}", segment_id, e);
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    // Wait for process to complete
+    let status = child
+        .wait()
+        .await
+        .context("Failed to wait for g3 process")?;
+
+    segment_status.completed_at = Some(Utc::now());
+
+    if status.success() {
+        segment_status.state = SegmentState::Completed;
+        segment_status.last_message = Some("Completed successfully".to_string());
+    } else {
+        segment_status.state = SegmentState::Failed;
+        segment_status.error_message = Some(format!("Process exited with status: {}", status));
+        segment_status.errors += 1;
+    }
+
+    // Try to extract metrics from session log if available
+    let log_dir = segment_dir.join("logs");
+    if log_dir.exists() {
+        if let Ok(entries) = std::fs::read_dir(&log_dir) {
+            for entry in entries.flatten() {
+                let path = entry.path();
+                if path.extension().and_then(|s| s.to_str()) == Some("json") {
+                    if let Ok(log_content) = std::fs::read_to_string(&path) {
+                        if let Ok(log_json) =
+                            serde_json::from_str::<serde_json::Value>(&log_content)
+                        {
+                            // Extract token usage
+                            if let Some(context) = log_json.get("context_window") {
+                                if let Some(cumulative) = context.get("cumulative_tokens") {
+                                    if let Some(tokens) = cumulative.as_u64() {
+                                        segment_status.tokens_used = tokens;
+                                    }
+                                }
+                            }
+
+                            // Count tool calls from conversation history
+                            if let Some(context) = log_json.get("context_window") {
+                                if let Some(history) = context.get("conversation_history") {
+                                    if let Some(messages) = history.as_array() {
+                                        let tool_call_count = messages
+                                            .iter()
+                                            .filter(|msg| {
+                                                msg.get("role").and_then(|r| r.as_str())
+                                                    == Some("tool")
+                                            })
+                                            .count();
+                                        segment_status.tool_calls = tool_call_count as u64;
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    update_status_file(&status_file, &session_id, segment_status.clone())?;
+
+    Ok(segment_status)
+}
+
+/// Update the status file with new segment status
+fn update_status_file(
+    status_file: &PathBuf,
+    session_id: &str,
+    segment_status: SegmentStatus,
+) -> Result<()> {
+    // Load existing status or create new one
+    let mut flock_status = if status_file.exists() {
+        FlockStatus::load_from_file(status_file)?
+    } else {
+        // This shouldn't happen, but handle it gracefully
+        FlockStatus::new(session_id.to_string(), PathBuf::new(), PathBuf::new(), 0)
+    };
+
+    flock_status.update_segment(segment_status.segment_id, segment_status);
+    flock_status.save_to_file(status_file)?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::FlockMode;
+
+    #[test]
+    fn extract_json_from_output_handles_partition_marker_and_fences() {
+        const NOISY_PREFIX: &str = concat!(
+            "\u{001b}[2m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m# Requirements Partitioning into 2 Architectural Modules\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m## Analysis\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m```json\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m[\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m  {\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m  }\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m]\u{001b}[0m\n",
+            "\u{001b}[1A\u{001b}[2K│ \u{001b}[2m```\u{001b}[0m\n",
+            "\n",
+            "# Requirements Partitioning into 2 Architectural Modules\n",
+            "\n",
+            "## Analysis\n",
+            "\n",
+            "The requirements have been partitioned into two logical, largely non-overlapping modules based on architectural concerns:\n",
+            "\n",
+            "1. **Message Protocol Module** - Handles message identity, formatting, and LLM communication\n",
+            "2. **Observability Module** - Handles logging, summarization, and monitoring of message history\n",
+            "\n",
+            "## Module Partitioning\n",
+            "\n"
+        );
+
+        let expected_json = r#"[
+  {
+    "module_name": "message-protocol",
+    "requirements": "For all messages sent in the message history, unique ID that is not longer than six characters they need to be alphanumeric and can be case sensitive. Double check the message format specification for Open AI message formats. Write tests to make sure the LLM works, so make sure it's an integration test.",
+    "dependencies": []
+  },
+  {
+    "module_name": "observability",
+    "requirements": "Add functionality that will summarise the entire message history every time it is sent to LLM. Put it in the logs directory the same as the workspace logs for message history. Call it \"context_window_<suffix>\" where the suffix is the same name as will be used for logging the message history, for example \"g3_session_you_are_g3_in_coach_f79be2a46ac40c35.json\". Look at the code that generates that file name in G3 and use the same code. This file name changes every time and new agent is created, so follow the same pattern with the context window summary. Whenever the file name changes, update a symlink called \"current_context_window\" to that new file. Every time the message history is sent to the LLM, rewrite the entire file. Each message should only take up one line. The format is: date&time, estimated number of tokens of the entire message (use the token estimator code in G3, write it in a compact way for example 1K, 2M, 100b, 200K, colour code it graded from bright green to dark red where 200b is bright green and 50K is dark red), message ID, role (e.g. \"user\", \"assistant\"), the first hundred characters of \"content\".",
+    "dependencies": ["message-protocol"]
+  }
+]"#;
+
+        let mut output = String::from(NOISY_PREFIX);
+        output.push_str("{{PARTITION JSON}}\n```json\n");
+        output.push_str(expected_json);
+        output.push_str("```");
+
+        let extracted = FlockMode::extract_json_from_output(&output)
+            .expect("should extract JSON between markers");
+
+        assert_eq!(extracted, expected_json);
+    }
+
+    #[test]
+    fn extract_json_from_output_handles_multiple_markers_and_invalid_json() {
+        // This is the actual output from the LLM that was failing
+        let output = r#"[2m[0m
+[1A[2K│ [2m# Requirements Partitioning into 2 Architectural Modules[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m## Analysis[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2mThe requirements have been partitioned into two logical, largely non-overlapping modules based on architectural concerns:[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m1. **Message Protocol Module** - Handles message identity, formatting, and LLM communication[0m
+[1A[2K│ [2m2. **Observability Module** - Handles logging, summarization, and monitoring of message history[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m## Module Partitioning[0m
+[1A[2K│ [2m[0m{PARTITION JSON}
+[1A[2K│ [2m```json[0m
+[1A[2K│ [2m[[0m
+[1A[2K│ [2m  {[0m
+[1A[2K│ [2m    "module_name": "message-protocol",[0m
+[1A[2K│ [2m    "requirements": "For all messages sent in the message history, unique ID that is not longer than six characters they need to be alphanumeric and can be case sensitive. Double check the message format specification for Open AI message formats. Write tests to make sure the LLM works, so make sure it's an integration test.",[0m
+[1A[2K│ [2m    "dependencies": [][0m
+[1A[2K│ [2m  },[0m
+[1A[2K│ [2m  {[0m
+[1A[2K│ [2m    "module_name": "observability",[0m
+[1A[2K│ [2m    "requirements": "Add functionality that will summarise the entire message history every time it is sent to LLM. Put it in the logs directory the same as the workspace logs for message history. Call it \"context_window_<suffix>\" where the suffix is the same name as will be used for logging the message history, for example \"g3_session_you_are_g3_in_coach_f79be2a46ac40c35.json\". Look at the code that generates that file name in G3 and use the same code. This file name changes every time and new agent is created, so follow the same pattern with the context window summary. Whenever the file name changes, update a symlink called \"current_context_window\" to that new file. Every time the message history is sent to the LLM, rewrite the entire file. Each message should only take up one line. The format is: date&time, estimated number of tokens of the entire message (use the token estimator code in G3, write it in a compact way for example 1K, 2M, 100b, 200K, colour code it graded from bright green to dark red where 200b is bright green and 50K is dark red), message ID, role (e.g. \"user\", \"assistant\"), the first hundred characters of \"content\".",[0m
+[1A[2K│ [2m    "dependencies": ["message-protocol"][0m
+[1A[2K│ [2m  }[0m
+[1A[2K│ [2m][0m
+[1A[2K│ [2m```[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m## Rationale[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m### Module 1: message-protocol[0m
+[1A[2K│ [2m**Purpose**: Core messaging infrastructure and LLM communication layer[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m**Responsibilities**:[0m
+[1A[2K│ [2m- Generate unique 6-character alphanumeric message IDs[0m
+[1A[2K│ [2m- Ensure OpenAI message format compliance[0m
+[1A[2K│ [2m- Handle LLM request/response cycles[0m
+[1A[2K│ [2m- Integration testing of LLM functionality[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m**Why it's independent**: This module defines the fundamental message structure and communication protocol. It can be developed and tested independently as a core library.[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m**Future evolution**: Could become a separate crate (e.g., `g3-message-protocol`) or even a standalone service if message routing becomes complex.[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m### Module 2: observability[0m
+[1A[2K│ [2m**Purpose**: Monitoring, logging, and visualization of system activity[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m**Responsibilities**:[0m
+[1A[2K│ [2m- Summarize message history on each LLM interaction[0m
+[1A[2K│ [2m- Generate context window summary files with specific naming conventions[0m
+[1A[2K│ [2m- Manage symlinks to current summary files[0m
+[1A[2K│ [2m- Format one-line summaries with timestamps, token counts, message IDs, roles, and content previews[0m
+[1A[2K│ [2m- Color-code token estimates for visual monitoring[0m
+[1A[2K│ [2m- Integrate with existing G3 logging infrastructure[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m**Why it depends on message-protocol**: Needs access to message IDs, message content, and token estimation utilities. However, the core messaging system doesn't need to know about observability.[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m**Future evolution**: Could become a separate crate (e.g., `g3-observability`) or monitoring service that subscribes to message events.[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m## Benefits of This Partitioning[0m
+[1A[2K│ [2m[0m
+[1A[2K│ [2m1. **Separation of Concerns**: Core messaging logic is isolated from monitoring/logging concerns[0m
+[1A[2K│ [2m2. **Parallel Development**: Teams can work independently on message protocol vs. observability features[0m
+[1A[2K│ [2m3. **Testability**: Each module can be tested in isolation[0m
+[1A[2K│ [2m4. **Maintainability**: Changes to logging/monitoring don't affect core message handling[0m
+[1A[2K│ [2m5. **Scalability**: Observability could be extracted to a separate service for distributed systems[0m
+[1A[2K│ [2m6. **Dependency Direction**: Clean one-way dependency (observability → message-protocol) prevents circular dependencies[0m
+
+
+
+# Requirements Partitioning into 2 Architectural Modules
+
+## Analysis
+
+The requirements have been partitioned into two logical, largely non-overlapping modules based on architectural concerns:
+
+1. **Message Protocol Module** - Handles message identity, formatting, and LLM communication
+2. **Observability Module** - Handles logging, summarization, and monitoring of message history
+
+## Module Partitioning
+
+{{PARTITION JSON}}
+```json
+[
+  {
+    "module_name": "message-protocol",
+    "requirements": "For all messages sent in the message history, unique ID that is not longer than six characters they need to be alphanumeric and can be case sensitive. Double check the message format specification for Open AI message formats. Write tests to make sure the LLM works, so make sure it's an integration test.",
+    "dependencies": []
+  },
+  {
+    "module_name": "observability",
+    "requirements": "Add functionality that will summarise the entire message history every time it is sent to LLM. Put it in the logs directory the same as the workspace logs for message history. Call it \"context_window_<suffix>\" where the suffix is the same name as will be used for logging the message history, for example \"g3_session_you_are_g3_in_coach_f79be2a46ac40c35.json\". Look at the code that generates that file name in G3 and use the same code. This file name changes every time and new agent is created, so follow the same pattern with the context window summary. Whenever the file name changes, update a symlink called \"current_context_window\" to that new file. Every time the message history is sent to the LLM, rewrite the entire file. Each message should only take up one line. The format is: date&time, estimated number of tokens of the entire message (use the token estimator code in G3, write it in a compact way for example 1K, 2M, 100b, 200K, colour code it graded from bright green to dark red where 200b is bright green and 50K is dark red), message ID, role (e.g. \"user\", \"assistant\"), the first hundred characters of \"content\".",
+    "dependencies": ["message-protocol"]
+  }
+]
+```
+
+## Rationale
+
+### Module 1: message-protocol
+**Purpose**: Core messaging infrastructure and LLM communication layer
+
+**Responsibilities**:
+- Generate unique 6-character alphanumeric message IDs
+- Ensure OpenAI message format compliance
+- Handle LLM request/response cycles
+- Integration testing of LLM functionality
+
+**Why it's independent**: This module defines the fundamental message structure and communication protocol. It can be developed and tested independently as a core library.
+
+**Future evolution**: Could become a separate crate (e.g., `g3-message-protocol`) or even a standalone service if message routing becomes complex.
+
+### Module 2: observability
+**Purpose**: Monitoring, logging, and visualization of system activity
+
+**Responsibilities**:
+- Summarize message history on each LLM interaction
+- Generate context window summary files with specific naming conventions
+- Manage symlinks to current summary files
+- Format one-line summaries with timestamps, token counts, message IDs, roles, and content previews
+- Color-code token estimates for visual monitoring
+- Integrate with existing G3 logging infrastructure
+
+**Why it depends on message-protocol**: Needs access to message IDs, message content, and token estimation utilities. However, the core messaging system doesn't need to know about observability.
+
+**Future evolution**: Could become a separate crate (e.g., `g3-observability`) or monitoring service that subscribes to message events.
+
+## Benefits of This Partitioning
+
+1. **Separation of Concerns**: Core messaging logic is isolated from monitoring/logging concerns
+2. **Parallel Development**: Teams can work independently on message protocol vs. observability features
+3. **Testability**: Each module can be tested in isolation
+4. **Maintainability**: Changes to logging/monitoring don't affect core message handling
+5. **Scalability**: Observability could be extracted to a separate service for distributed systems
+6. **Dependency Direction**: Clean one-way dependency (observability → message-protocol) prevents circular dependencies"#;
+
+        let extracted = FlockMode::extract_json_from_output(output)
+            .expect("should extract valid JSON from output with multiple markers");
+
+        // Should be able to parse as JSON
+        let parsed: serde_json::Value =
+            serde_json::from_str(&extracted).expect("extracted content should be valid JSON");
+
+        // Verify it's an array with 2 elements
+        assert!(parsed.is_array());
+        let arr = parsed.as_array().unwrap();
+        assert_eq!(arr.len(), 2);
+
+        // Verify the structure
+        assert_eq!(arr[0]["module_name"], "message-protocol");
+        assert_eq!(arr[1]["module_name"], "observability");
+    }
+}
--- a/crates/g3-ensembles/src/lib.rs
+++ b/crates/g3-ensembles/src/lib.rs
@@ -0,0 +1,12 @@
+//! G3 Ensembles - Multi-agent ensemble functionality
+//!
+//! This crate provides functionality for running multiple G3 agents in coordination,
+//! enabling parallel development across different architectural modules.
+
+pub mod flock;
+pub mod status;
+mod tests;
+
+/// Re-export main types for convenience
+pub use flock::{FlockConfig, FlockMode};
+pub use status::{FlockStatus, SegmentStatus};
--- a/crates/g3-ensembles/src/status.rs
+++ b/crates/g3-ensembles/src/status.rs
@@ -0,0 +1,270 @@
+//! Status tracking for flock mode
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::path::PathBuf;
+
+/// Status of an individual segment worker
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SegmentStatus {
+    /// Segment number
+    pub segment_id: usize,
+
+    /// Segment workspace directory
+    pub workspace: PathBuf,
+
+    /// Current state of the segment
+    pub state: SegmentState,
+
+    /// Start time
+    pub started_at: DateTime<Utc>,
+
+    /// Completion time (if finished)
+    pub completed_at: Option<DateTime<Utc>>,
+
+    /// Total tokens used
+    pub tokens_used: u64,
+
+    /// Number of tool calls made
+    pub tool_calls: u64,
+
+    /// Number of errors encountered
+    pub errors: u64,
+
+    /// Current turn number (for autonomous mode)
+    pub current_turn: usize,
+
+    /// Maximum turns allowed
+    pub max_turns: usize,
+
+    /// Last status message
+    pub last_message: Option<String>,
+
+    /// Error message (if failed)
+    pub error_message: Option<String>,
+}
+
+/// State of a segment worker
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub enum SegmentState {
+    /// Waiting to start
+    Pending,
+
+    /// Currently running
+    Running,
+
+    /// Completed successfully
+    Completed,
+
+    /// Failed with error
+    Failed,
+
+    /// Cancelled by user
+    Cancelled,
+}
+
+impl std::fmt::Display for SegmentState {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SegmentState::Pending => write!(f, "⏳ Pending"),
+            SegmentState::Running => write!(f, "🔄 Running"),
+            SegmentState::Completed => write!(f, "✅ Completed"),
+            SegmentState::Failed => write!(f, "❌ Failed"),
+            SegmentState::Cancelled => write!(f, "⚠️  Cancelled"),
+        }
+    }
+}
+
+/// Overall flock status
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FlockStatus {
+    /// Flock session ID
+    pub session_id: String,
+
+    /// Project directory
+    pub project_dir: PathBuf,
+
+    /// Flock workspace directory
+    pub flock_workspace: PathBuf,
+
+    /// Number of segments
+    pub num_segments: usize,
+
+    /// Start time
+    pub started_at: DateTime<Utc>,
+
+    /// Completion time (if finished)
+    pub completed_at: Option<DateTime<Utc>>,
+
+    /// Status of each segment
+    pub segments: HashMap<usize, SegmentStatus>,
+
+    /// Total tokens used across all segments
+    pub total_tokens: u64,
+
+    /// Total tool calls across all segments
+    pub total_tool_calls: u64,
+
+    /// Total errors across all segments
+    pub total_errors: u64,
+}
+
+impl FlockStatus {
+    /// Create a new flock status
+    pub fn new(
+        session_id: String,
+        project_dir: PathBuf,
+        flock_workspace: PathBuf,
+        num_segments: usize,
+    ) -> Self {
+        Self {
+            session_id,
+            project_dir,
+            flock_workspace,
+            num_segments,
+            started_at: Utc::now(),
+            completed_at: None,
+            segments: HashMap::new(),
+            total_tokens: 0,
+            total_tool_calls: 0,
+            total_errors: 0,
+        }
+    }
+
+    /// Update segment status
+    pub fn update_segment(&mut self, segment_id: usize, status: SegmentStatus) {
+        self.segments.insert(segment_id, status);
+        self.recalculate_totals();
+    }
+
+    /// Recalculate total metrics
+    fn recalculate_totals(&mut self) {
+        self.total_tokens = self.segments.values().map(|s| s.tokens_used).sum();
+        self.total_tool_calls = self.segments.values().map(|s| s.tool_calls).sum();
+        self.total_errors = self.segments.values().map(|s| s.errors).sum();
+    }
+
+    /// Check if all segments are complete
+    pub fn is_complete(&self) -> bool {
+        self.segments.len() == self.num_segments
+            && self.segments.values().all(|s| {
+                matches!(
+                    s.state,
+                    SegmentState::Completed | SegmentState::Failed | SegmentState::Cancelled
+                )
+            })
+    }
+
+    /// Get count of segments by state
+    pub fn count_by_state(&self, state: SegmentState) -> usize {
+        self.segments.values().filter(|s| s.state == state).count()
+    }
+
+    /// Save status to file
+    pub fn save_to_file(&self, path: &PathBuf) -> anyhow::Result<()> {
+        let json = serde_json::to_string_pretty(self)?;
+        std::fs::write(path, json)?;
+        Ok(())
+    }
+
+    /// Load status from file
+    pub fn load_from_file(path: &PathBuf) -> anyhow::Result<Self> {
+        let json = std::fs::read_to_string(path)?;
+        let status = serde_json::from_str(&json)?;
+        Ok(status)
+    }
+
+    /// Generate a summary report
+    pub fn generate_report(&self) -> String {
+        let mut report = String::new();
+
+        report.push_str(&format!("\n{}", "=".repeat(80)));
+        report.push_str(&format!("\n📊 FLOCK MODE SESSION REPORT"));
+        report.push_str(&format!("\n{}", "=".repeat(80)));
+
+        report.push_str(&format!("\n\n🆔 Session ID: {}", self.session_id));
+        report.push_str(&format!("\n📁 Project: {}", self.project_dir.display()));
+        report.push_str(&format!(
+            "\n🗂️  Workspace: {}",
+            self.flock_workspace.display()
+        ));
+        report.push_str(&format!("\n🔢 Segments: {}", self.num_segments));
+
+        let duration = if let Some(completed) = self.completed_at {
+            completed.signed_duration_since(self.started_at)
+        } else {
+            Utc::now().signed_duration_since(self.started_at)
+        };
+
+        report.push_str(&format!(
+            "\n⏱️  Duration: {:.2}s",
+            duration.num_milliseconds() as f64 / 1000.0
+        ));
+
+        // Segment status summary
+        report.push_str(&format!("\n\n📈 Segment Status:"));
+        report.push_str(&format!(
+            "\n   • Completed: {}",
+            self.count_by_state(SegmentState::Completed)
+        ));
+        report.push_str(&format!(
+            "\n   • Running: {}",
+            self.count_by_state(SegmentState::Running)
+        ));
+        report.push_str(&format!(
+            "\n   • Failed: {}",
+            self.count_by_state(SegmentState::Failed)
+        ));
+        report.push_str(&format!(
+            "\n   • Pending: {}",
+            self.count_by_state(SegmentState::Pending)
+        ));
+        report.push_str(&format!(
+            "\n   • Cancelled: {}",
+            self.count_by_state(SegmentState::Cancelled)
+        ));
+
+        // Metrics
+        report.push_str(&format!("\n\n📊 Aggregate Metrics:"));
+        report.push_str(&format!("\n   • Total Tokens: {}", self.total_tokens));
+        report.push_str(&format!(
+            "\n   • Total Tool Calls: {}",
+            self.total_tool_calls
+        ));
+        report.push_str(&format!("\n   • Total Errors: {}", self.total_errors));
+
+        // Per-segment details
+        report.push_str(&format!("\n\n🔍 Segment Details:"));
+        let mut segments: Vec<_> = self.segments.iter().collect();
+        segments.sort_by_key(|(id, _)| *id);
+
+        for (id, segment) in segments {
+            report.push_str(&format!("\n\n   Segment {}:", id));
+            report.push_str(&format!("\n      Status: {}", segment.state));
+            report.push_str(&format!(
+                "\n      Workspace: {}",
+                segment.workspace.display()
+            ));
+            report.push_str(&format!("\n      Tokens: {}", segment.tokens_used));
+            report.push_str(&format!("\n      Tool Calls: {}", segment.tool_calls));
+            report.push_str(&format!("\n      Errors: {}", segment.errors));
+            report.push_str(&format!(
+                "\n      Turn: {}/{}",
+                segment.current_turn, segment.max_turns
+            ));
+
+            if let Some(ref msg) = segment.last_message {
+                report.push_str(&format!("\n      Last Message: {}", msg));
+            }
+
+            if let Some(ref err) = segment.error_message {
+                report.push_str(&format!("\n      Error: {}", err));
+            }
+        }
+
+        report.push_str(&format!("\n\n{}", "=".repeat(80)));
+
+        report
+    }
+}
--- a/crates/g3-ensembles/src/tests.rs
+++ b/crates/g3-ensembles/src/tests.rs
@@ -0,0 +1,330 @@
+//! Unit tests for g3-ensembles
+
+#[cfg(test)]
+mod tests {
+    use crate::status::{FlockStatus, SegmentState, SegmentStatus};
+    use chrono::Utc;
+    use std::path::PathBuf;
+
+    #[test]
+    fn test_segment_state_display() {
+        assert_eq!(format!("{}", SegmentState::Pending), "⏳ Pending");
+        assert_eq!(format!("{}", SegmentState::Running), "🔄 Running");
+        assert_eq!(format!("{}", SegmentState::Completed), "✅ Completed");
+        assert_eq!(format!("{}", SegmentState::Failed), "❌ Failed");
+        assert_eq!(format!("{}", SegmentState::Cancelled), "⚠️  Cancelled");
+    }
+
+    #[test]
+    fn test_flock_status_creation() {
+        let status = FlockStatus::new(
+            "test-session".to_string(),
+            PathBuf::from("/test/project"),
+            PathBuf::from("/test/workspace"),
+            3,
+        );
+
+        assert_eq!(status.session_id, "test-session");
+        assert_eq!(status.num_segments, 3);
+        assert_eq!(status.segments.len(), 0);
+        assert_eq!(status.total_tokens, 0);
+        assert_eq!(status.total_tool_calls, 0);
+        assert_eq!(status.total_errors, 0);
+        assert!(status.completed_at.is_none());
+    }
+
+    #[test]
+    fn test_segment_status_update() {
+        let mut status = FlockStatus::new(
+            "test-session".to_string(),
+            PathBuf::from("/test/project"),
+            PathBuf::from("/test/workspace"),
+            2,
+        );
+
+        let segment1 = SegmentStatus {
+            segment_id: 1,
+            workspace: PathBuf::from("/test/workspace/segment-1"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 1000,
+            tool_calls: 50,
+            errors: 2,
+            current_turn: 5,
+            max_turns: 10,
+            last_message: Some("Done".to_string()),
+            error_message: None,
+        };
+
+        status.update_segment(1, segment1);
+
+        assert_eq!(status.segments.len(), 1);
+        assert_eq!(status.total_tokens, 1000);
+        assert_eq!(status.total_tool_calls, 50);
+        assert_eq!(status.total_errors, 2);
+    }
+
+    #[test]
+    fn test_multiple_segment_updates() {
+        let mut status = FlockStatus::new(
+            "test-session".to_string(),
+            PathBuf::from("/test/project"),
+            PathBuf::from("/test/workspace"),
+            2,
+        );
+
+        let segment1 = SegmentStatus {
+            segment_id: 1,
+            workspace: PathBuf::from("/test/workspace/segment-1"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 1000,
+            tool_calls: 50,
+            errors: 2,
+            current_turn: 5,
+            max_turns: 10,
+            last_message: Some("Done".to_string()),
+            error_message: None,
+        };
+
+        let segment2 = SegmentStatus {
+            segment_id: 2,
+            workspace: PathBuf::from("/test/workspace/segment-2"),
+            state: SegmentState::Failed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 500,
+            tool_calls: 25,
+            errors: 5,
+            current_turn: 3,
+            max_turns: 10,
+            last_message: Some("Error".to_string()),
+            error_message: Some("Test error".to_string()),
+        };
+
+        status.update_segment(1, segment1);
+        status.update_segment(2, segment2);
+
+        assert_eq!(status.segments.len(), 2);
+        assert_eq!(status.total_tokens, 1500);
+        assert_eq!(status.total_tool_calls, 75);
+        assert_eq!(status.total_errors, 7);
+    }
+
+    #[test]
+    fn test_is_complete() {
+        let mut status = FlockStatus::new(
+            "test-session".to_string(),
+            PathBuf::from("/test/project"),
+            PathBuf::from("/test/workspace"),
+            2,
+        );
+
+        // Not complete - no segments
+        assert!(!status.is_complete());
+
+        // Add one completed segment
+        let segment1 = SegmentStatus {
+            segment_id: 1,
+            workspace: PathBuf::from("/test/workspace/segment-1"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 1000,
+            tool_calls: 50,
+            errors: 0,
+            current_turn: 5,
+            max_turns: 10,
+            last_message: None,
+            error_message: None,
+        };
+        status.update_segment(1, segment1);
+
+        // Still not complete - only 1 of 2 segments
+        assert!(!status.is_complete());
+
+        // Add second segment (running)
+        let segment2 = SegmentStatus {
+            segment_id: 2,
+            workspace: PathBuf::from("/test/workspace/segment-2"),
+            state: SegmentState::Running,
+            started_at: Utc::now(),
+            completed_at: None,
+            tokens_used: 500,
+            tool_calls: 25,
+            errors: 0,
+            current_turn: 3,
+            max_turns: 10,
+            last_message: None,
+            error_message: None,
+        };
+        status.update_segment(2, segment2);
+
+        // Still not complete - segment 2 is running
+        assert!(!status.is_complete());
+
+        // Update segment 2 to completed
+        let segment2_done = SegmentStatus {
+            segment_id: 2,
+            workspace: PathBuf::from("/test/workspace/segment-2"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 500,
+            tool_calls: 25,
+            errors: 0,
+            current_turn: 5,
+            max_turns: 10,
+            last_message: None,
+            error_message: None,
+        };
+        status.update_segment(2, segment2_done);
+
+        // Now complete
+        assert!(status.is_complete());
+    }
+
+    #[test]
+    fn test_count_by_state() {
+        let mut status = FlockStatus::new(
+            "test-session".to_string(),
+            PathBuf::from("/test/project"),
+            PathBuf::from("/test/workspace"),
+            3,
+        );
+
+        let segment1 = SegmentStatus {
+            segment_id: 1,
+            workspace: PathBuf::from("/test/workspace/segment-1"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 1000,
+            tool_calls: 50,
+            errors: 0,
+            current_turn: 5,
+            max_turns: 10,
+            last_message: None,
+            error_message: None,
+        };
+
+        let segment2 = SegmentStatus {
+            segment_id: 2,
+            workspace: PathBuf::from("/test/workspace/segment-2"),
+            state: SegmentState::Failed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 500,
+            tool_calls: 25,
+            errors: 5,
+            current_turn: 3,
+            max_turns: 10,
+            last_message: None,
+            error_message: Some("Error".to_string()),
+        };
+
+        let segment3 = SegmentStatus {
+            segment_id: 3,
+            workspace: PathBuf::from("/test/workspace/segment-3"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 800,
+            tool_calls: 40,
+            errors: 1,
+            current_turn: 4,
+            max_turns: 10,
+            last_message: None,
+            error_message: None,
+        };
+
+        status.update_segment(1, segment1);
+        status.update_segment(2, segment2);
+        status.update_segment(3, segment3);
+
+        assert_eq!(status.count_by_state(SegmentState::Completed), 2);
+        assert_eq!(status.count_by_state(SegmentState::Failed), 1);
+        assert_eq!(status.count_by_state(SegmentState::Running), 0);
+        assert_eq!(status.count_by_state(SegmentState::Pending), 0);
+    }
+
+    #[test]
+    fn test_status_serialization() {
+        let mut status = FlockStatus::new(
+            "test-session".to_string(),
+            PathBuf::from("/test/project"),
+            PathBuf::from("/test/workspace"),
+            1,
+        );
+
+        let segment1 = SegmentStatus {
+            segment_id: 1,
+            workspace: PathBuf::from("/test/workspace/segment-1"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 1000,
+            tool_calls: 50,
+            errors: 2,
+            current_turn: 5,
+            max_turns: 10,
+            last_message: Some("Done".to_string()),
+            error_message: None,
+        };
+
+        status.update_segment(1, segment1);
+
+        // Serialize to JSON
+        let json = serde_json::to_string(&status).expect("Failed to serialize");
+        assert!(json.contains("test-session"));
+        assert!(json.contains("segment_id"));
+        assert!(json.contains("Completed"));
+
+        // Deserialize back
+        let deserialized: FlockStatus = serde_json::from_str(&json).expect("Failed to deserialize");
+        assert_eq!(deserialized.session_id, "test-session");
+        assert_eq!(deserialized.segments.len(), 1);
+        assert_eq!(deserialized.total_tokens, 1000);
+    }
+
+    #[test]
+    fn test_report_generation() {
+        let mut status = FlockStatus::new(
+            "test-session".to_string(),
+            PathBuf::from("/test/project"),
+            PathBuf::from("/test/workspace"),
+            2,
+        );
+
+        let segment1 = SegmentStatus {
+            segment_id: 1,
+            workspace: PathBuf::from("/test/workspace/segment-1"),
+            state: SegmentState::Completed,
+            started_at: Utc::now(),
+            completed_at: Some(Utc::now()),
+            tokens_used: 1000,
+            tool_calls: 50,
+            errors: 2,
+            current_turn: 5,
+            max_turns: 10,
+            last_message: Some("Done".to_string()),
+            error_message: None,
+        };
+
+        status.update_segment(1, segment1);
+
+        let report = status.generate_report();
+
+        // Check that report contains expected sections
+        assert!(report.contains("FLOCK MODE SESSION REPORT"));
+        assert!(report.contains("test-session"));
+        assert!(report.contains("Segment Status:"));
+        assert!(report.contains("Aggregate Metrics:"));
+        assert!(report.contains("Segment Details:"));
+        assert!(report.contains("Total Tokens: 1000"));
+        assert!(report.contains("Total Tool Calls: 50"));
+        assert!(report.contains("Total Errors: 2"));
+    }
+}
--- a/crates/g3-ensembles/tests/integration_tests.rs
+++ b/crates/g3-ensembles/tests/integration_tests.rs
@@ -0,0 +1,445 @@
+//! Integration tests for g3-ensembles flock mode
+
+use g3_ensembles::{FlockConfig, FlockMode};
+use std::fs;
+use std::path::PathBuf;
+use std::process::Command;
+use tempfile::TempDir;
+
+/// Helper to create a test git repository with flock-requirements.md
+fn create_test_project(name: &str) -> TempDir {
+    let temp_dir = TempDir::new().expect("Failed to create temp dir");
+    let project_path = temp_dir.path();
+
+    // Initialize git repo
+    let output = Command::new("git")
+        .arg("init")
+        .current_dir(project_path)
+        .output()
+        .expect("Failed to run git init");
+    assert!(output.status.success(), "git init failed");
+
+    // Configure git user (required for commits)
+    Command::new("git")
+        .args(["config", "user.email", "test@example.com"])
+        .current_dir(project_path)
+        .output()
+        .expect("Failed to configure git email");
+
+    Command::new("git")
+        .args(["config", "user.name", "Test User"])
+        .current_dir(project_path)
+        .output()
+        .expect("Failed to configure git name");
+
+    // Create flock-requirements.md
+    let requirements = format!(
+        "# {} Test Project\n\n\
+        ## Module A\n\
+        - Create a simple Rust library\n\
+        - Add a function that returns \"Hello from Module A\"\n\
+        - Write a unit test for the function\n\n\
+        ## Module B\n\
+        - Create another Rust library\n\
+        - Add a function that returns \"Hello from Module B\"\n\
+        - Write a unit test for the function\n",
+        name
+    );
+
+    fs::write(project_path.join("flock-requirements.md"), requirements)
+        .expect("Failed to write requirements");
+
+    // Create a simple README
+    fs::write(project_path.join("README.md"), format!("# {}\n", name))
+        .expect("Failed to write README");
+
+    // Create initial commit
+    Command::new("git")
+        .args(["add", "."])
+        .current_dir(project_path)
+        .output()
+        .expect("Failed to git add");
+
+    let output = Command::new("git")
+        .args(["commit", "-m", "Initial commit"])
+        .current_dir(project_path)
+        .output()
+        .expect("Failed to git commit");
+    assert!(output.status.success(), "git commit failed");
+
+    temp_dir
+}
+
+#[test]
+fn test_flock_config_validation() {
+    let temp_dir = TempDir::new().unwrap();
+    let project_path = temp_dir.path().to_path_buf();
+    let workspace_path = temp_dir.path().join("workspace");
+
+    // Should fail - not a git repo
+    let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
+    assert!(result.is_err());
+    assert!(result
+        .unwrap_err()
+        .to_string()
+        .contains("must be a git repository"));
+
+    // Initialize git repo
+    Command::new("git")
+        .arg("init")
+        .current_dir(&project_path)
+        .output()
+        .expect("Failed to run git init");
+
+    // Should fail - no flock-requirements.md
+    let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
+    assert!(result.is_err());
+    assert!(result
+        .unwrap_err()
+        .to_string()
+        .contains("flock-requirements.md"));
+
+    // Create flock-requirements.md
+    fs::write(project_path.join("flock-requirements.md"), "# Test\n")
+        .expect("Failed to write requirements");
+
+    // Should succeed now
+    let result = FlockConfig::new(project_path, workspace_path, 2);
+    assert!(result.is_ok());
+}
+
+#[test]
+fn test_flock_config_builder() {
+    let project_dir = create_test_project("builder-test");
+    let workspace_dir = TempDir::new().unwrap();
+
+    let config = FlockConfig::new(
+        project_dir.path().to_path_buf(),
+        workspace_dir.path().to_path_buf(),
+        2,
+    )
+    .expect("Failed to create config")
+    .with_max_turns(15)
+    .with_g3_binary(PathBuf::from("/custom/g3"));
+
+    assert_eq!(config.num_segments, 2);
+    assert_eq!(config.max_turns, 15);
+    assert_eq!(config.g3_binary, Some(PathBuf::from("/custom/g3")));
+}
+
+#[test]
+fn test_workspace_creation() {
+    let project_dir = create_test_project("workspace-test");
+    let workspace_dir = TempDir::new().unwrap();
+
+    let config = FlockConfig::new(
+        project_dir.path().to_path_buf(),
+        workspace_dir.path().to_path_buf(),
+        2,
+    )
+    .expect("Failed to create config");
+
+    // Create FlockMode instance
+    let _flock = FlockMode::new(config).expect("Failed to create FlockMode");
+
+    // Verify workspace directory structure will be created
+    // (We can't run the full flock without LLM access, but we can test the setup)
+    assert!(project_dir.path().join(".git").exists());
+    assert!(project_dir.path().join("flock-requirements.md").exists());
+}
+
+#[test]
+fn test_git_clone_functionality() {
+    let project_dir = create_test_project("clone-test");
+    let workspace_dir = TempDir::new().unwrap();
+
+    // Manually test git cloning (what flock mode does internally)
+    let segment_dir = workspace_dir.path().join("segment-1");
+
+    let output = Command::new("git")
+        .arg("clone")
+        .arg(project_dir.path())
+        .arg(&segment_dir)
+        .output()
+        .expect("Failed to run git clone");
+
+    assert!(output.status.success(), "git clone failed: {:?}", output);
+
+    // Verify the clone
+    assert!(segment_dir.exists());
+    assert!(segment_dir.join(".git").exists());
+    assert!(segment_dir.join("flock-requirements.md").exists());
+    assert!(segment_dir.join("README.md").exists());
+
+    // Verify it's a proper git repo
+    let output = Command::new("git")
+        .args(["log", "--oneline"])
+        .current_dir(&segment_dir)
+        .output()
+        .expect("Failed to run git log");
+
+    assert!(output.status.success());
+    let log = String::from_utf8_lossy(&output.stdout);
+    assert!(log.contains("Initial commit"));
+}
+
+#[test]
+fn test_multiple_segment_clones() {
+    let project_dir = create_test_project("multi-clone-test");
+    let workspace_dir = TempDir::new().unwrap();
+
+    // Clone multiple segments
+    for i in 1..=2 {
+        let segment_dir = workspace_dir.path().join(format!("segment-{}", i));
+
+        let output = Command::new("git")
+            .arg("clone")
+            .arg(project_dir.path())
+            .arg(&segment_dir)
+            .output()
+            .expect("Failed to run git clone");
+
+        assert!(output.status.success(), "git clone {} failed", i);
+        assert!(segment_dir.exists());
+        assert!(segment_dir.join(".git").exists());
+        assert!(segment_dir.join("flock-requirements.md").exists());
+    }
+
+    // Verify both segments exist and are independent
+    let segment1 = workspace_dir.path().join("segment-1");
+    let segment2 = workspace_dir.path().join("segment-2");
+
+    assert!(segment1.exists());
+    assert!(segment2.exists());
+
+    // Modify segment 1
+    fs::write(segment1.join("test.txt"), "segment 1").expect("Failed to write to segment 1");
+
+    // Verify segment 2 is unaffected
+    assert!(!segment2.join("test.txt").exists());
+}
+
+#[test]
+fn test_segment_requirements_creation() {
+    let project_dir = create_test_project("segment-req-test");
+    let workspace_dir = TempDir::new().unwrap();
+
+    // Clone a segment
+    let segment_dir = workspace_dir.path().join("segment-1");
+    Command::new("git")
+        .arg("clone")
+        .arg(project_dir.path())
+        .arg(&segment_dir)
+        .output()
+        .expect("Failed to clone");
+
+    // Create segment-requirements.md (what flock mode does)
+    let segment_requirements = "# Module A\n\nImplement module A functionality\n";
+    fs::write(
+        segment_dir.join("segment-requirements.md"),
+        segment_requirements,
+    )
+    .expect("Failed to write segment requirements");
+
+    // Verify it was created
+    assert!(segment_dir.join("segment-requirements.md").exists());
+    let content = fs::read_to_string(segment_dir.join("segment-requirements.md"))
+        .expect("Failed to read segment requirements");
+    assert!(content.contains("Module A"));
+}
+
+#[test]
+fn test_status_file_operations() {
+    use g3_ensembles::FlockStatus;
+
+    let temp_dir = TempDir::new().unwrap();
+    let status_file = temp_dir.path().join("flock-status.json");
+
+    // Create a status
+    let status = FlockStatus::new(
+        "test-session".to_string(),
+        PathBuf::from("/test/project"),
+        PathBuf::from("/test/workspace"),
+        2,
+    );
+
+    // Save to file
+    status
+        .save_to_file(&status_file)
+        .expect("Failed to save status");
+
+    // Verify file exists
+    assert!(status_file.exists());
+
+    // Load from file
+    let loaded = FlockStatus::load_from_file(&status_file).expect("Failed to load status");
+
+    assert_eq!(loaded.session_id, "test-session");
+    assert_eq!(loaded.num_segments, 2);
+}
+
+#[test]
+fn test_json_extraction() {
+    // Test the JSON extraction logic used in partition_requirements
+    let test_cases = vec![
+        (
+            "Here is the result: [{\"module_name\": \"test\"}]",
+            Some("[{\"module_name\": \"test\"}]"),
+        ),
+        (
+            "```json\n[{\"module_name\": \"test\"}]\n```",
+            Some("[{\"module_name\": \"test\"}]"),
+        ),
+        (
+            "Some text before\n[{\"a\": 1}, {\"b\": 2}]\nSome text after",
+            Some("[{\"a\": 1}, {\"b\": 2}]"),
+        ),
+        ("No JSON here", None),
+    ];
+
+    for (input, expected) in test_cases {
+        let result = extract_json_array(input);
+        match expected {
+            Some(exp) => {
+                assert!(result.is_some(), "Failed to extract from: {}", input);
+                assert_eq!(result.unwrap(), exp);
+            }
+            None => {
+                assert!(result.is_none(), "Should not extract from: {}", input);
+            }
+        }
+    }
+}
+
+// Helper function to extract JSON array (mimics the logic in flock.rs)
+fn extract_json_array(output: &str) -> Option<String> {
+    if let Some(start) = output.find('[') {
+        if let Some(end) = output.rfind(']') {
+            if end > start {
+                return Some(output[start..=end].to_string());
+            }
+        }
+    }
+    None
+}
+
+#[test]
+fn test_partition_json_parsing() {
+    // Test parsing of partition JSON
+    let json = r#"[
+        {
+            "module_name": "core-library",
+            "requirements": "Build the core library with basic functionality",
+            "dependencies": []
+        },
+        {
+            "module_name": "cli-tool",
+            "requirements": "Create a CLI tool that uses the core library",
+            "dependencies": ["core-library"]
+        }
+    ]"#;
+
+    let partitions: Vec<serde_json::Value> =
+        serde_json::from_str(json).expect("Failed to parse JSON");
+
+    assert_eq!(partitions.len(), 2);
+    assert_eq!(partitions[0]["module_name"], "core-library");
+    assert_eq!(partitions[1]["module_name"], "cli-tool");
+    assert_eq!(partitions[1]["dependencies"][0], "core-library");
+}
+
+#[test]
+fn test_requirements_file_content() {
+    let project_dir = create_test_project("content-test");
+
+    let requirements_path = project_dir.path().join("flock-requirements.md");
+    let content = fs::read_to_string(&requirements_path).expect("Failed to read requirements");
+
+    // Verify content structure
+    assert!(content.contains("# content-test Test Project"));
+    assert!(content.contains("## Module A"));
+    assert!(content.contains("## Module B"));
+    assert!(content.contains("Hello from Module A"));
+    assert!(content.contains("Hello from Module B"));
+}
+
+#[test]
+fn test_git_repo_independence() {
+    let project_dir = create_test_project("independence-test");
+    let workspace_dir = TempDir::new().unwrap();
+
+    // Clone two segments
+    let segment1 = workspace_dir.path().join("segment-1");
+    let segment2 = workspace_dir.path().join("segment-2");
+
+    Command::new("git")
+        .arg("clone")
+        .arg(project_dir.path())
+        .arg(&segment1)
+        .output()
+        .expect("Failed to clone segment 1");
+
+    Command::new("git")
+        .arg("clone")
+        .arg(project_dir.path())
+        .arg(&segment2)
+        .output()
+        .expect("Failed to clone segment 2");
+
+    // Make a commit in segment 1
+    fs::write(segment1.join("file1.txt"), "content 1").expect("Failed to write file1");
+
+    Command::new("git")
+        .args(["add", "file1.txt"])
+        .current_dir(&segment1)
+        .output()
+        .expect("Failed to git add");
+
+    Command::new("git")
+        .args(["commit", "-m", "Add file1"])
+        .current_dir(&segment1)
+        .output()
+        .expect("Failed to commit in segment 1");
+
+    // Make a different commit in segment 2
+    fs::write(segment2.join("file2.txt"), "content 2").expect("Failed to write file2");
+
+    Command::new("git")
+        .args(["add", "file2.txt"])
+        .current_dir(&segment2)
+        .output()
+        .expect("Failed to git add");
+
+    Command::new("git")
+        .args(["commit", "-m", "Add file2"])
+        .current_dir(&segment2)
+        .output()
+        .expect("Failed to commit in segment 2");
+
+    // Verify they have different commits
+    let log1 = Command::new("git")
+        .args(["log", "--oneline"])
+        .current_dir(&segment1)
+        .output()
+        .expect("Failed to get log 1");
+
+    let log2 = Command::new("git")
+        .args(["log", "--oneline"])
+        .current_dir(&segment2)
+        .output()
+        .expect("Failed to get log 2");
+
+    let log1_str = String::from_utf8_lossy(&log1.stdout);
+    let log2_str = String::from_utf8_lossy(&log2.stdout);
+
+    assert!(log1_str.contains("Add file1"));
+    assert!(!log1_str.contains("Add file2"));
+    assert!(log2_str.contains("Add file2"));
+    assert!(!log2_str.contains("Add file1"));
+
+    // Verify files exist only in their respective segments
+    assert!(segment1.join("file1.txt").exists());
+    assert!(!segment1.join("file2.txt").exists());
+    assert!(segment2.join("file2.txt").exists());
+    assert!(!segment2.join("file1.txt").exists());
+}
--- a/crates/g3-execution/examples/setup_coverage_tools.rs
+++ b/crates/g3-execution/examples/setup_coverage_tools.rs
@@ -0,0 +1,13 @@
+use g3_execution::ensure_coverage_tools_installed;
+
+fn main() -> anyhow::Result<()> {
+    // Ensure coverage tools are installed
+    let already_installed = ensure_coverage_tools_installed()?;
+
+    if already_installed {
+        println!("All coverage tools are already installed!");
+    } else {
+        println!("Coverage tools have been installed successfully!");
+    }
+    Ok(())
+}
--- a/crates/g3-execution/src/lib.rs
+++ b/crates/g3-execution/src/lib.rs
@@ -1,9 +1,20 @@
 use anyhow::Result;
 use regex::Regex;
+use std::io::Write;
 use std::process::Command;
 use tempfile::NamedTempFile;
-use std::io::Write;
-use tracing::{info, debug, error};
+use tracing::{debug, error, info};
+
+/// Expand tilde (~) in a path to the user's home directory
+fn expand_tilde(path: &str) -> String {
+    if path.starts_with("~") {
+        if let Some(home) = std::env::var_os("HOME") {
+            let home_str = home.to_string_lossy();
+            return path.replacen("~", &home_str, 1);
+        }
+    }
+    path.to_string()
+}

 pub struct CodeExecutor {
    // Future: add configuration for execution limits, sandboxing, etc.
@@ -21,40 +32,52 @@ impl CodeExecutor {
    pub fn new() -> Self {
        Self {}
    }
-    
+
    /// Extract code blocks from LLM response and execute them
    pub async fn execute_from_response(&self, response: &str) -> Result<String> {
-        self.execute_from_response_with_options(response, true).await
+        self.execute_from_response_with_options(response, true)
+            .await
    }
-    
+
    /// Extract code blocks from LLM response and execute them with UI options
-    pub async fn execute_from_response_with_options(&self, response: &str, show_code: bool) -> Result<String> {
-        debug!("CodeExecutor received response ({} chars): {}", response.len(), response);
+    pub async fn execute_from_response_with_options(
+        &self,
+        response: &str,
+        show_code: bool,
+    ) -> Result<String> {
+        debug!(
+            "CodeExecutor received response ({} chars): {}",
+            response.len(),
+            response
+        );
        let code_blocks = self.extract_code_blocks(response)?;
-        
+
        if code_blocks.is_empty() {
            if show_code {
-                return Ok(format!("⚠️  No executable code blocks found in response.\n\n{}", response));
+                return Ok(format!(
+                    "⚠️  No executable code blocks found in response.\n\n{}",
+                    response
+                ));
            } else {
                return Ok("⚠️  No executable code found.".to_string());
            }
        }
-        
+
        let mut results = Vec::new();
-        
+
        // Only show the original LLM response if show_code is true
        if show_code {
            results.push(response.to_string());
            results.push("\n🚀 Executing code...\n".to_string());
        }
-        
+
        for (language, code) in code_blocks {
            info!("Executing {} code", language);
-            
+
            if show_code {
                results.push(format!("📋 Running {} code:", language));
            }
-            
+
            match self.execute_code(&language, &code).await {
                Ok(result) => {
                    if result.success {
@@ -78,8 +101,8 @@ impl CodeExecutor {
                }
            }
        }
-        
-        // If no results were added (e.g., successful execution with no output), 
+
+        // If no results were added (e.g., successful execution with no output),
        // return a simple success message when show_code is false
        if results.is_empty() && !show_code {
            Ok("✅ Done".to_string())
@@ -87,51 +110,58 @@ impl CodeExecutor {
            Ok(results.join("\n"))
        }
    }
-    
+
    /// Extract code blocks from markdown-formatted text
    fn extract_code_blocks(&self, text: &str) -> Result<Vec<(String, String)>> {
        let mut blocks = Vec::new();
-        
+
        debug!("Extracting code blocks from text: {}", text);
-        
+
        // Pattern 1: Standard markdown format ```language\ncode```
        let markdown_re = Regex::new(r"(?s)```(\w+)?\n(.*?)```")?;
        for cap in markdown_re.captures_iter(text) {
-            let language = cap.get(1)
+            let language = cap
+                .get(1)
                .map(|m| m.as_str().to_lowercase())
                .unwrap_or_else(|| "bash".to_string()); // Default to bash
            let code = cap.get(2).map(|m| m.as_str()).unwrap_or("").trim();
-            
-            debug!("Found markdown code block - language: '{}', code: '{}'", language, code);
-            
+
+            debug!(
+                "Found markdown code block - language: '{}', code: '{}'",
+                language, code
+            );
+
            if !code.is_empty() {
                blocks.push((language, code.to_string()));
            }
        }
-        
+
        // Pattern 2: Bracket format [Language]code[/Language]
        let bracket_re = Regex::new(r"(?s)\[(\w+)\]\s*(.*?)\s*\[/(\w+)\]")?;
        for cap in bracket_re.captures_iter(text) {
            let open_lang = cap.get(1).map(|m| m.as_str()).unwrap_or("");
            let close_lang = cap.get(3).map(|m| m.as_str()).unwrap_or("");
-            
+
            // Only match if opening and closing tags are the same (case insensitive)
            if open_lang.to_lowercase() == close_lang.to_lowercase() {
                let language = open_lang.to_lowercase();
                let code = cap.get(2).map(|m| m.as_str()).unwrap_or("").trim();
-                
-                debug!("Found bracket code block - language: '{}', code: '{}'", language, code);
-                
+
+                debug!(
+                    "Found bracket code block - language: '{}', code: '{}'",
+                    language, code
+                );
+
                if !code.is_empty() {
                    blocks.push((language, code.to_string()));
                }
            }
        }
-        
+
        debug!("Total code blocks found: {}", blocks.len());
        Ok(blocks)
    }
-    
+
    /// Execute code in the specified language
    pub async fn execute_code(&self, language: &str, code: &str) -> Result<ExecutionResult> {
        match language.to_lowercase().as_str() {
@@ -145,17 +175,15 @@ impl CodeExecutor {
            }
        }
    }
-    
+
    /// Execute Python code
    async fn execute_python(&self, code: &str) -> Result<ExecutionResult> {
        let mut temp_file = NamedTempFile::new()?;
        temp_file.write_all(code.as_bytes())?;
        let temp_path = temp_file.path();
-        
-        let output = Command::new("python3")
-            .arg(temp_path)
-            .output()?;
-        
+
+        let output = Command::new("python3").arg(temp_path).output()?;
+
        Ok(ExecutionResult {
            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
@@ -163,15 +191,15 @@ impl CodeExecutor {
            success: output.status.success(),
        })
    }
-    
+
    /// Execute Bash code
    async fn execute_bash(&self, code: &str) -> Result<ExecutionResult> {
        // Check if this is a detached/daemon command that should run independently
-        let is_detached = code.trim_start().starts_with("setsid ") 
+        let is_detached = code.trim_start().starts_with("setsid ")
            || code.trim_start().starts_with("nohup ")
            || code.contains(" disown")
            || (code.contains(" &") && (code.contains("nohup") || code.contains("setsid")));
-        
+
        if is_detached {
            // For detached commands, just spawn and return immediately
            use std::process::Stdio;
@@ -182,7 +210,7 @@ impl CodeExecutor {
                .stdout(Stdio::null())
                .stderr(Stdio::null())
                .spawn()?;
-            
+
            return Ok(ExecutionResult {
                stdout: "✅ Command launched in background (detached process)".to_string(),
                stderr: String::new(),
@@ -190,12 +218,9 @@ impl CodeExecutor {
                success: true,
            });
        }
-        
-        let output = Command::new("bash")
-            .arg("-c")
-            .arg(code)
-            .output()?;
-        
+
+        let output = Command::new("bash").arg("-c").arg(code).output()?;
+
        Ok(ExecutionResult {
            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
@@ -203,17 +228,15 @@ impl CodeExecutor {
            success: output.status.success(),
        })
    }
-    
+
    /// Execute JavaScript code (requires Node.js)
    async fn execute_javascript(&self, code: &str) -> Result<ExecutionResult> {
        let mut temp_file = NamedTempFile::new()?;
        temp_file.write_all(code.as_bytes())?;
        let temp_path = temp_file.path();
-        
-        let output = Command::new("node")
-            .arg(temp_path)
-            .output()?;
-        
+
+        let output = Command::new("node").arg(temp_path).output()?;
+
        Ok(ExecutionResult {
            stdout: String::from_utf8_lossy(&output.stdout).to_string(),
            stderr: String::from_utf8_lossy(&output.stderr).to_string(),
@@ -238,28 +261,69 @@ pub trait OutputReceiver: Send + Sync {
 impl CodeExecutor {
    /// Execute bash command with streaming output
    pub async fn execute_bash_streaming<R: OutputReceiver>(
-        &self, 
-        code: &str, 
-        receiver: &R
+        &self,
+        code: &str,
+        receiver: &R,
+    ) -> Result<ExecutionResult> {
+        self.execute_bash_streaming_in_dir(code, receiver, None)
+            .await
+    }
+
+    /// Execute bash command with streaming output in a specific directory
+    pub async fn execute_bash_streaming_in_dir<R: OutputReceiver>(
+        &self,
+        code: &str,
+        receiver: &R,
+        working_dir: Option<&str>,
    ) -> Result<ExecutionResult> {
        use std::process::Stdio;
        use tokio::io::{AsyncBufReadExt, BufReader};
        use tokio::process::Command as TokioCommand;
-        
+
+        // CRITICAL DEBUG: Print to stderr so it's always visible
+        debug!("========== execute_bash_streaming_in_dir START ==========");
+        debug!("Code to execute: {}", code);
+        debug!("Working directory parameter: {:?}", working_dir);
+        debug!(
+            "FULL DIAGNOSTIC: code='{}', working_dir={:?}",
+            code, working_dir
+        );
+
+        if let Some(dir) = working_dir {
+            debug!(
+                "Working dir exists check: {}",
+                std::path::Path::new(dir).exists()
+            );
+            debug!(
+                "Working dir is_dir check: {}",
+                std::path::Path::new(dir).is_dir()
+            );
+        }
+        debug!(
+            "Current process working directory: {:?}",
+            std::env::current_dir()
+        );
+
        // Check if this is a detached/daemon command that should run independently
        // Look for patterns like: setsid, nohup with &, or explicit backgrounding with disown
-        let is_detached = code.trim_start().starts_with("setsid ") 
+        let is_detached = code.trim_start().starts_with("setsid ")
            || code.trim_start().starts_with("nohup ")
            || code.contains(" disown")
            || (code.contains(" &") && (code.contains("nohup") || code.contains("setsid")));
-        
+
        if is_detached {
            // For detached commands, just spawn and return immediately
-            TokioCommand::new("bash")
-                .arg("-c")
-                .arg(code)
-                .spawn()?;
-            
+            let mut cmd = TokioCommand::new("bash");
+            cmd.arg("-c").arg(code);
+
+            // Set working directory if provided
+            if let Some(dir) = working_dir {
+                let expanded_dir = expand_tilde(dir);
+                cmd.current_dir(&expanded_dir);
+            }
+
+            cmd.spawn()?;
+
            // Don't wait for the process - it's meant to run independently
            return Ok(ExecutionResult {
                stdout: "✅ Command launched in background (detached process)".to_string(),
@@ -268,26 +332,53 @@ impl CodeExecutor {
                success: true,
            });
        }
-        
-        let mut child = TokioCommand::new("bash")
-            .arg("-c")
+
+        let mut cmd = TokioCommand::new("bash");
+        cmd.arg("-c")
            .arg(code)
            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
-            .spawn()?;
-        
+            .stderr(Stdio::piped());
+
+        // Set working directory if provided
+        if let Some(dir) = working_dir {
+            debug!("Setting current_dir on command to: {}", dir);
+            let expanded_dir = expand_tilde(dir);
+            debug!("Expanded working dir: {}", expanded_dir);
+            debug!(
+                "Expanded dir exists: {}",
+                std::path::Path::new(&expanded_dir).exists()
+            );
+            debug!(
+                "Expanded dir is_dir: {}",
+                std::path::Path::new(&expanded_dir).is_dir()
+            );
+            cmd.current_dir(&expanded_dir);
+        }
+
+        debug!("About to spawn command...");
+        let spawn_result = cmd.spawn();
+        debug!("Spawn result: {:?}", spawn_result.is_ok());
+        let mut child = match spawn_result {
+            Ok(c) => c,
+            Err(e) => {
+                debug!("SPAWN ERROR: {:?}", e);
+                return Err(e.into());
+            }
+        };
+        debug!("Command spawned successfully");
+
        let stdout = child.stdout.take().unwrap();
        let stderr = child.stderr.take().unwrap();
-        
+
        let stdout_reader = BufReader::new(stdout);
        let stderr_reader = BufReader::new(stderr);
-        
+
        let mut stdout_lines = stdout_reader.lines();
        let mut stderr_lines = stderr_reader.lines();
-        
+
        let mut stdout_output = Vec::new();
        let mut stderr_output = Vec::new();
-        
+
        // Read output lines as they come
        loop {
            tokio::select! {
@@ -319,14 +410,107 @@ impl CodeExecutor {
                else => break
            }
        }
-        
+
        let status = child.wait().await?;
-        
-        Ok(ExecutionResult {
+
+        let result = ExecutionResult {
            stdout: stdout_output.join("\n"),
            stderr: stderr_output.join("\n"),
            exit_code: status.code().unwrap_or(-1),
            success: status.success(),
-        })
+        };
+
+        debug!("========== execute_bash_streaming_in_dir END ==========");
+        debug!("Exit code: {}", result.exit_code);
+        debug!("Success: {}", result.success);
+        debug!("Stdout length: {}", result.stdout.len());
+        debug!("Stderr length: {}", result.stderr.len());
+        if !result.stderr.is_empty() {
+            debug!("Stderr content: {}", result.stderr);
+        }
+
+        Ok(result)
    }
 }
+
+/// Check if rustup component llvm-tools-preview is installed
+pub fn is_llvm_tools_installed() -> Result<bool> {
+    let output = Command::new("rustup")
+        .args(&["component", "list", "--installed"])
+        .output()?;
+
+    let installed = String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .any(|line| line.trim() == "llvm-tools-preview" || line.starts_with("llvm-tools"));
+
+    Ok(installed)
+}
+
+/// Check if cargo-llvm-cov is installed
+pub fn is_cargo_llvm_cov_installed() -> Result<bool> {
+    let output = Command::new("cargo").args(&["--list"]).output()?;
+
+    let installed = String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .any(|line| line.trim().starts_with("llvm-cov"));
+
+    Ok(installed)
+}
+
+/// Install llvm-tools-preview via rustup
+pub fn install_llvm_tools() -> Result<()> {
+    info!("Installing llvm-tools-preview...");
+    let output = Command::new("rustup")
+        .args(&["component", "add", "llvm-tools-preview"])
+        .output()?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr);
+    }
+
+    info!("✅ llvm-tools-preview installed successfully");
+    Ok(())
+}
+
+/// Install cargo-llvm-cov via cargo install
+pub fn install_cargo_llvm_cov() -> Result<()> {
+    info!("Installing cargo-llvm-cov... (this may take a few minutes)");
+    let output = Command::new("cargo")
+        .args(&["install", "cargo-llvm-cov"])
+        .output()?;
+
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr);
+    }
+
+    info!("✅ cargo-llvm-cov installed successfully");
+    Ok(())
+}
+
+/// Ensure both llvm-tools-preview and cargo-llvm-cov are installed
+/// Returns Ok(true) if tools were already installed, Ok(false) if they were installed by this function
+pub fn ensure_coverage_tools_installed() -> Result<bool> {
+    let mut already_installed = true;
+
+    // Check and install llvm-tools-preview
+    if !is_llvm_tools_installed()? {
+        info!("llvm-tools-preview not found, installing...");
+        install_llvm_tools()?;
+        already_installed = false;
+    } else {
+        info!("✅ llvm-tools-preview is already installed");
+    }
+
+    // Check and install cargo-llvm-cov
+    if !is_cargo_llvm_cov_installed()? {
+        info!("cargo-llvm-cov not found, installing...");
+        install_cargo_llvm_cov()?;
+        already_installed = false;
+    } else {
+        info!("✅ cargo-llvm-cov is already installed");
+    }
+
+    Ok(already_installed)
+}
--- a/crates/g3-planner/Cargo.toml
+++ b/crates/g3-planner/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "g3-planner"
+version = "0.1.0"
+edition = "2021"
+description = "Fast-discovery planner for G3 AI coding agent"
+
+[dependencies]
+g3-providers = { path = "../g3-providers" }
+serde = { workspace = true }
+serde_json = { workspace = true }
+const_format = "0.2"
+anyhow = { workspace = true }
+tokio = { workspace = true }
+chrono = { version = "0.4", features = ["serde"] }
--- a/crates/g3-planner/src/code_explore.rs
+++ b/crates/g3-planner/src/code_explore.rs
@@ -0,0 +1,727 @@
+//! Code exploration module for analyzing codebases
+//!
+//! This module provides functions to explore and analyze codebases
+//! for various programming languages, returning structured reports
+//! about the code structure.
+
+use std::path::Path;
+use std::process::Command;
+
+/// Main entry point for exploring a codebase at the given path.
+/// Detects which languages are present and generates a comprehensive report.
+pub fn explore_codebase(path: &str) -> String {
+    let path = expand_tilde(path);
+    let mut report = String::new();
+    let mut languages_found = Vec::new();
+
+    // Check for each language and add to report if found
+    if has_rust_files(&path) {
+        languages_found.push("Rust".to_string());
+        report.push_str(&explore_rust(&path));
+    }
+    if has_java_files(&path) {
+        languages_found.push("Java".to_string());
+        report.push_str(&explore_java(&path));
+    }
+    if has_kotlin_files(&path) {
+        languages_found.push("Kotlin".to_string());
+        report.push_str(&explore_kotlin(&path));
+    }
+    if has_swift_files(&path) {
+        languages_found.push("Swift".to_string());
+        report.push_str(&explore_swift(&path));
+    }
+    if has_go_files(&path) {
+        languages_found.push("Go".to_string());
+        report.push_str(&explore_go(&path));
+    }
+    if has_python_files(&path) {
+        languages_found.push("Python".to_string());
+        report.push_str(&explore_python(&path));
+    }
+    if has_typescript_files(&path) {
+        languages_found.push("TypeScript".to_string());
+        report.push_str(&explore_typescript(&path));
+    }
+    if has_javascript_files(&path) {
+        languages_found.push("JavaScript".to_string());
+        report.push_str(&explore_javascript(&path));
+    }
+    if has_cpp_files(&path) {
+        languages_found.push("C/C++".to_string());
+        report.push_str(&explore_cpp(&path));
+    }
+    if has_markdown_files(&path) {
+        languages_found.push("Markdown".to_string());
+        report.push_str(&explore_markdown(&path));
+    }
+    if has_yaml_files(&path) {
+        languages_found.push("YAML".to_string());
+        report.push_str(&explore_yaml(&path));
+    }
+    if has_sql_files(&path) {
+        languages_found.push("SQL".to_string());
+        report.push_str(&explore_sql(&path));
+    }
+    if has_ruby_files(&path) {
+        languages_found.push("Ruby".to_string());
+        report.push_str(&explore_ruby(&path));
+    }
+
+    if languages_found.is_empty() {
+        report.push_str("No recognized programming languages found in the codebase.\n");
+    } else {
+        let header = format!(
+            "=== CODEBASE ANALYSIS ===\nLanguages detected: {}\n\n",
+            languages_found.join(", ")
+        );
+        report = header + &report;
+    }
+
+    report
+}
+
+/// Expand tilde to home directory
+fn expand_tilde(path: &str) -> String {
+    if path.starts_with("~/") {
+        if let Some(home) = std::env::var_os("HOME") {
+            return path.replacen("~", &home.to_string_lossy(), 1);
+        }
+    }
+    path.to_string()
+}
+
+/// Run a shell command and return its output
+fn run_command(cmd: &str, working_dir: &str) -> String {
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg(cmd)
+        .current_dir(working_dir)
+        .output();
+
+    match output {
+        Ok(out) => {
+            let stdout = String::from_utf8_lossy(&out.stdout);
+            let stderr = String::from_utf8_lossy(&out.stderr);
+            if !stdout.is_empty() {
+                stdout.to_string()
+            } else if !stderr.is_empty() {
+                format!("(stderr): {}", stderr)
+            } else {
+                String::new()
+            }
+        }
+        Err(e) => format!("Error running command: {}", e),
+    }
+}
+
+/// Check if files with given extension exist
+fn has_files_with_extension(path: &str, extension: &str) -> bool {
+    let cmd = format!(
+        "find . -name '.git' -prune -o -type f -name '*.{}' -print | head -1",
+        extension
+    );
+    !run_command(&cmd, path).trim().is_empty()
+}
+
+// Language detection functions
+fn has_rust_files(path: &str) -> bool {
+    has_files_with_extension(path, "rs") || Path::new(path).join("Cargo.toml").exists()
+}
+
+fn has_java_files(path: &str) -> bool {
+    has_files_with_extension(path, "java")
+}
+
+fn has_kotlin_files(path: &str) -> bool {
+    has_files_with_extension(path, "kt") || has_files_with_extension(path, "kts")
+}
+
+fn has_swift_files(path: &str) -> bool {
+    has_files_with_extension(path, "swift")
+}
+
+fn has_go_files(path: &str) -> bool {
+    has_files_with_extension(path, "go")
+}
+
+fn has_python_files(path: &str) -> bool {
+    has_files_with_extension(path, "py")
+}
+
+fn has_typescript_files(path: &str) -> bool {
+    has_files_with_extension(path, "ts") || has_files_with_extension(path, "tsx")
+}
+
+fn has_javascript_files(path: &str) -> bool {
+    has_files_with_extension(path, "js") || has_files_with_extension(path, "jsx")
+}
+
+fn has_cpp_files(path: &str) -> bool {
+    has_files_with_extension(path, "cpp")
+        || has_files_with_extension(path, "cc")
+        || has_files_with_extension(path, "c")
+        || has_files_with_extension(path, "h")
+        || has_files_with_extension(path, "hpp")
+}
+
+fn has_markdown_files(path: &str) -> bool {
+    has_files_with_extension(path, "md")
+}
+
+fn has_yaml_files(path: &str) -> bool {
+    has_files_with_extension(path, "yaml") || has_files_with_extension(path, "yml")
+}
+
+fn has_sql_files(path: &str) -> bool {
+    has_files_with_extension(path, "sql")
+}
+
+fn has_ruby_files(path: &str) -> bool {
+    has_files_with_extension(path, "rb")
+}
+
+/// Explore Rust codebase
+pub fn explore_rust(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== RUST ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.rs' . 2>/dev/null | grep -v '/target/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Dependencies (Cargo.toml)
+    report.push_str("--- Dependencies (Cargo.toml) ---\n");
+    let cargo = run_command("cat Cargo.toml 2>/dev/null | head -50", path);
+    report.push_str(&cargo);
+    report.push('\n');
+
+    // Data structures
+    report.push_str("--- Data Structures (Structs, Enums, Types) ---\n");
+    let structs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union) ' . 2>/dev/null | grep -v '/target/' | head -100"#,
+        path,
+    );
+    report.push_str(&structs);
+    report.push('\n');
+
+    // Traits and implementations
+    report.push_str("--- Traits & Implementations ---\n");
+    let traits = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?trait |^impl ' . 2>/dev/null | grep -v '/target/' | head -100"#,
+        path,
+    );
+    report.push_str(&traits);
+    report.push('\n');
+
+    // Public functions
+    report.push_str("--- Public Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^pub (async )?fn ' . 2>/dev/null | grep -v '/target/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Java codebase
+pub fn explore_java(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== JAVA ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.java' . 2>/dev/null | grep -v '/build/' | grep -v '/target/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Build files
+    report.push_str("--- Build Configuration ---\n");
+    let build = run_command(
+        "cat pom.xml 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50",
+        path,
+    );
+    report.push_str(&build);
+    report.push('\n');
+
+    // Classes and interfaces
+    report.push_str("--- Classes & Interfaces ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^(public |private |protected )?(abstract )?(class|interface|enum|record) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Public methods
+    report.push_str("--- Public Methods ---\n");
+    let methods = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^\s+public .+\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&methods);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Kotlin codebase
+pub fn explore_kotlin(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== KOTLIN ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.kt' -g '*.kts' . 2>/dev/null | grep -v '/build/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Build files
+    report.push_str("--- Build Configuration ---\n");
+    let build = run_command(
+        "cat build.gradle.kts 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50",
+        path,
+    );
+    report.push_str(&build);
+    report.push('\n');
+
+    // Classes, objects, interfaces
+    report.push_str("--- Classes, Objects & Interfaces ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(data |sealed |open |abstract )?(class|interface|object|enum class) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(suspend |private |internal |public )?fun ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Swift codebase
+pub fn explore_swift(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== SWIFT ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.swift' . 2>/dev/null | grep -v '/.build/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Package.swift
+    report.push_str("--- Package Configuration ---\n");
+    let pkg = run_command("cat Package.swift 2>/dev/null | head -50", path);
+    report.push_str(&pkg);
+    report.push('\n');
+
+    // Classes, structs, protocols
+    report.push_str("--- Types (Classes, Structs, Protocols, Enums) ---\n");
+    let types = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^(public |private |internal |open |final )?(class|struct|protocol|enum|actor) ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
+        path,
+    );
+    report.push_str(&types);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^\s*(public |private |internal |open )?func ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Go codebase
+pub fn explore_go(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== GO ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.go' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // go.mod
+    report.push_str("--- Module Configuration ---\n");
+    let gomod = run_command("cat go.mod 2>/dev/null | head -50", path);
+    report.push_str(&gomod);
+    report.push('\n');
+
+    // Types (structs, interfaces)
+    report.push_str("--- Types (Structs & Interfaces) ---\n");
+    let types = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^type .+ (struct|interface)' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&types);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^func ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Python codebase
+pub fn explore_python(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== PYTHON ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.py' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | grep -v '/.venv/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Requirements/setup
+    report.push_str("--- Dependencies ---\n");
+    let deps = run_command(
+        "cat requirements.txt 2>/dev/null | head -30 || cat pyproject.toml 2>/dev/null | head -50 || cat setup.py 2>/dev/null | head -30",
+        path,
+    );
+    report.push_str(&deps);
+    report.push('\n');
+
+    // Classes
+    report.push_str("--- Classes ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^class ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^def |^async def ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore TypeScript codebase
+pub fn explore_typescript(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== TYPESCRIPT ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.ts' -g '*.tsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // package.json
+    report.push_str("--- Package Configuration ---\n");
+    let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
+    report.push_str(&pkg);
+    report.push('\n');
+
+    // Types, interfaces, classes
+    report.push_str("--- Types, Interfaces & Classes ---\n");
+    let types = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (type|interface|class|enum|abstract class) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&types);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Exported Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (async )?function |^export const .+ = (async )?\(' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore JavaScript codebase
+pub fn explore_javascript(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== JAVASCRIPT ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.js' -g '*.jsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // package.json
+    report.push_str("--- Package Configuration ---\n");
+    let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
+    report.push_str(&pkg);
+    report.push('\n');
+
+    // Classes
+    report.push_str("--- Classes ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(default )?(class ) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Exported Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(async )?function |^module\.exports' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore C/C++ codebase
+pub fn explore_cpp(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== C/C++ ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.c' -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' . 2>/dev/null | grep -v '/build/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Build files
+    report.push_str("--- Build Configuration ---\n");
+    let build = run_command(
+        "cat CMakeLists.txt 2>/dev/null | head -50 || cat Makefile 2>/dev/null | head -50",
+        path,
+    );
+    report.push_str(&build);
+    report.push('\n');
+
+    // Classes and structs
+    report.push_str("--- Classes & Structs ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' '^(class|struct|enum|union|typedef) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions (simplified pattern)
+    report.push_str("--- Function Declarations ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.h' -g '*.hpp' '^[a-zA-Z_][a-zA-Z0-9_<>: ]*\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Markdown documentation
+pub fn explore_markdown(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== MARKDOWN DOCUMENTATION ===\n\n");
+
+    // File structure
+    report.push_str("--- Documentation Files ---\n");
+    let files = run_command(
+        "rg --files -g '*.md' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // README content
+    report.push_str("--- README Overview ---\n");
+    let readme = run_command(
+        "cat README.md 2>/dev/null | head -100 || cat readme.md 2>/dev/null | head -100",
+        path,
+    );
+    report.push_str(&readme);
+    report.push('\n');
+
+    // Headers from all markdown files
+    report.push_str("--- Document Headers ---\n");
+    let headers = run_command(
+        r#"rg --no-heading --line-number --with-filename -g '*.md' '^#{1,3} ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&headers);
+    report.push('\n');
+
+    report
+}
+
+/// Explore YAML configuration files
+pub fn explore_yaml(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== YAML CONFIGURATION ===\n\n");
+
+    // File structure
+    report.push_str("--- YAML Files ---\n");
+    let files = run_command(
+        "rg --files -g '*.yaml' -g '*.yml' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Top-level keys from YAML files
+    report.push_str("--- Top-Level Keys ---\n");
+    let keys = run_command(
+        r#"rg --no-heading --line-number --with-filename -g '*.yaml' -g '*.yml' '^[a-zA-Z_][a-zA-Z0-9_-]*:' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&keys);
+    report.push('\n');
+
+    report
+}
+
+/// Explore SQL files
+pub fn explore_sql(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== SQL ===\n\n");
+
+    // File structure
+    report.push_str("--- SQL Files ---\n");
+    let files = run_command(
+        "rg --files -g '*.sql' . 2>/dev/null | sort | head -50",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Tables
+    report.push_str("--- Table Definitions ---\n");
+    let tables = run_command(
+        r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE TABLE' . 2>/dev/null | head -100"#,
+        path,
+    );
+    report.push_str(&tables);
+    report.push('\n');
+
+    // Views and procedures
+    report.push_str("--- Views & Procedures ---\n");
+    let views = run_command(
+        r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE (VIEW|PROCEDURE|FUNCTION)' . 2>/dev/null | head -100"#,
+        path,
+    );
+    report.push_str(&views);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Ruby codebase
+pub fn explore_ruby(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== RUBY ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.rb' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Gemfile
+    report.push_str("--- Dependencies (Gemfile) ---\n");
+    let gemfile = run_command("cat Gemfile 2>/dev/null | head -50", path);
+    report.push_str(&gemfile);
+    report.push('\n');
+
+    // Classes and modules
+    report.push_str("--- Classes & Modules ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^(class|module) ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Methods
+    report.push_str("--- Methods ---\n");
+    let methods = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^\s*def ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&methods);
+    report.push('\n');
+
+    report
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_expand_tilde() {
+        let path = expand_tilde("~/test");
+        assert!(!path.starts_with("~"));
+    }
+
+    #[test]
+    fn test_explore_codebase_returns_string() {
+        // Test with current directory
+        let result = explore_codebase(".");
+        assert!(!result.is_empty());
+    }
+}
--- a/crates/g3-planner/src/lib.rs
+++ b/crates/g3-planner/src/lib.rs
@@ -0,0 +1,329 @@
+//! g3-planner: Fast-discovery planner for G3 AI coding agent
+//!
+//! This crate provides functionality to generate initial discovery tool calls
+//! that are injected into the conversation before the first LLM turn.
+
+mod code_explore;
+pub mod prompts;
+
+pub use code_explore::explore_codebase;
+
+use anyhow::Result;
+use chrono::Local;
+use g3_providers::{CompletionRequest, LLMProvider, Message, MessageRole};
+use prompts::{DISCOVERY_REQUIREMENTS_PROMPT, DISCOVERY_SYSTEM_PROMPT};
+use std::fs::{self, OpenOptions};
+use std::io::Write;
+
+/// Type alias for a status callback function
+pub type StatusCallback = Box<dyn Fn(&str) + Send + Sync>;
+
+/// Generates initial discovery messages for fast codebase exploration.
+///
+/// This function:
+/// 1. Runs explore_codebase to get a codebase report
+/// 2. Sends the report to the LLM with DISCOVERY_SYSTEM_PROMPT
+/// 3. Extracts shell commands from the LLM response
+/// 4. Returns Assistant messages with tool calls for each command
+///
+/// # Arguments
+///
+/// * `codebase_path` - The path to the codebase to explore
+/// * `provider` - An LLM provider to query for exploration commands
+/// * `requirements_text` - Optional requirements text to include in the discovery prompt
+/// * `status_callback` - Optional callback for status updates
+///
+/// # Returns
+///
+/// A `Result<Vec<Message>>` containing Assistant messages with JSON tool call strings.
+pub async fn get_initial_discovery_messages(
+    codebase_path: &str,
+    requirements_text: Option<&str>,
+    provider: &dyn LLMProvider,
+    status_callback: Option<&StatusCallback>,
+) -> Result<Vec<Message>> {
+    // Helper to call status callback if provided
+    let status = |msg: &str| {
+        if let Some(cb) = status_callback {
+            cb(msg);
+        }
+    };
+
+    status("🔍 Starting code discovery...");
+
+    // Step 1: Run explore_codebase to get the codebase report
+    let codebase_report = explore_codebase(codebase_path);
+
+    // Write the codebase report to logs directory
+    write_code_report(&codebase_report)?;
+
+    // Step 2: Build the prompt with the codebase report appended
+    let user_prompt = if let Some(requirements) = requirements_text {
+        format!(
+            "{}\n\n
+            === REQUIREMENTS ===\n\n{}\n\n
+            === CODEBASE REPORT ===\n\n{}",
+            DISCOVERY_REQUIREMENTS_PROMPT, requirements, codebase_report
+        )
+    } else {
+        format!(
+            "{}\n\n=== CODEBASE REPORT ===\n\n{}",
+            DISCOVERY_REQUIREMENTS_PROMPT, codebase_report
+        )
+    };
+
+    // Step 3: Create messages for the LLM
+    let messages = vec![
+        Message::new(MessageRole::System, DISCOVERY_SYSTEM_PROMPT.to_string()),
+        Message::new(MessageRole::User, user_prompt),
+    ];
+
+    // Step 4: Send to LLM
+    let request = CompletionRequest {
+        messages,
+        max_tokens: Some(provider.max_tokens()),
+        temperature: Some(provider.temperature()),
+        stream: false,
+        tools: None,
+        disable_thinking: false,
+    };
+
+    status("🤖 Calling LLM for discovery commands...");
+
+    let response = provider.complete(request).await?;
+
+    // Step 5: Extract shell commands from the response
+    let shell_commands = extract_shell_commands(&response.content);
+
+    status(&format!(
+        "📋 Extracted {} discovery commands",
+        shell_commands.len()
+    ));
+
+    // Write the discovery commands to logs directory
+    write_discovery_commands(&shell_commands)?;
+
+    // Step 6: Format as tool messages
+    let tool_messages = shell_commands
+        .into_iter()
+        .map(|cmd| create_tool_message("shell", &cmd))
+        .collect();
+
+    Ok(tool_messages)
+}
+
+/// Creates an Assistant message with a tool call in g3's JSON format.
+pub fn create_tool_message(tool: &str, command: &str) -> Message {
+    let tool_call = serde_json::json!({
+        "tool": tool,
+        "args": {
+            "command": command
+        }
+    });
+
+    Message::new(MessageRole::Assistant, tool_call.to_string())
+}
+
+/// Extract shell commands from the LLM response.
+/// Looks for {{CODE EXPLORATION COMMANDS}} section and extracts commands from code blocks.
+pub fn extract_shell_commands(response: &str) -> Vec<String> {
+    let mut commands = Vec::new();
+
+    let section_marker = "{{CODE EXPLORATION COMMANDS}}";
+    let section_start = match response.find(section_marker) {
+        Some(pos) => pos + section_marker.len(),
+        None => return commands,
+    };
+
+    let section_content = &response[section_start..];
+    let mut in_code_block = false;
+    let mut current_block = String::new();
+
+    for line in section_content.lines() {
+        let trimmed = line.trim();
+
+        if trimmed.starts_with("```") {
+            if in_code_block {
+                // End of code block - extract commands
+                for cmd_line in current_block.lines() {
+                    let cmd = cmd_line.trim();
+                    if !cmd.is_empty() && !cmd.starts_with('#') {
+                        commands.push(cmd.to_string());
+                    }
+                }
+                current_block.clear();
+            }
+            in_code_block = !in_code_block;
+        } else if in_code_block {
+            current_block.push_str(line);
+            current_block.push('\n');
+        }
+    }
+
+    commands
+}
+
+/// Extract the summary section from the LLM response
+pub fn extract_summary(response: &str) -> Option<String> {
+    let section_marker = "{{SUMMARY BASED ON INITIAL INFO}}";
+    let section_start = match response.find(section_marker) {
+        Some(pos) => pos + section_marker.len(),
+        None => return None,
+    };
+
+    let section_content = &response[section_start..];
+    let section_end = section_content.find("{{").unwrap_or(section_content.len());
+
+    let summary = section_content[..section_end].trim().to_string();
+    if summary.is_empty() {
+        None
+    } else {
+        Some(summary)
+    }
+}
+
+/// Write the codebase report to logs directory
+fn write_code_report(report: &str) -> Result<()> {
+    // Ensure logs directory exists
+    fs::create_dir_all("logs")?;
+
+    // Generate timestamp in same format as tool_calls log
+    let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
+    let filename = format!("logs/code_report_{}.log", timestamp);
+
+    // Write the report to file
+    let mut file = OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true)
+        .open(&filename)?;
+
+    file.write_all(report.as_bytes())?;
+    file.flush()?;
+
+    Ok(())
+}
+
+/// Write the discovery commands to logs directory
+fn write_discovery_commands(commands: &[String]) -> Result<()> {
+    // Ensure logs directory exists
+    fs::create_dir_all("logs")?;
+
+    // Generate timestamp in same format as tool_calls log
+    let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
+    let filename = format!("logs/discovery_commands_{}.log", timestamp);
+
+    // Write the commands to file
+    let mut file = OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true)
+        .open(&filename)?;
+
+    // Write header
+    file.write_all(b"# Discovery Commands\n")?;
+    file.write_all(b"# Generated by g3-planner\n\n")?;
+
+    // Write each command on a separate line
+    for cmd in commands {
+        file.write_all(cmd.as_bytes())?;
+        file.write_all(b"\n")?;
+    }
+    file.flush()?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_create_tool_message_format() {
+        let msg = create_tool_message("shell", "ls -la");
+
+        assert!(matches!(msg.role, MessageRole::Assistant));
+
+        let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
+        assert_eq!(parsed["tool"], "shell");
+        assert_eq!(parsed["args"]["command"], "ls -la");
+    }
+
+    #[test]
+    fn test_extract_shell_commands_basic() {
+        let response = r#"
+Some text here.
+
+{{CODE EXPLORATION COMMANDS}}
+
+```bash
+ls -la
+cat README.md
+rg --files -g '*.rs'
+```
+
+More text.
+"#;
+
+        let commands = extract_shell_commands(response);
+        assert_eq!(commands.len(), 3);
+        assert_eq!(commands[0], "ls -la");
+        assert_eq!(commands[1], "cat README.md");
+        assert_eq!(commands[2], "rg --files -g '*.rs'");
+    }
+
+    #[test]
+    fn test_extract_shell_commands_with_comments() {
+        let response = r#"
+{{CODE EXPLORATION COMMANDS}}
+
+```
+# This is a comment
+ls -la
+# Another comment
+cat file.txt
+```
+"#;
+
+        let commands = extract_shell_commands(response);
+        assert_eq!(commands.len(), 2);
+        assert_eq!(commands[0], "ls -la");
+        assert_eq!(commands[1], "cat file.txt");
+    }
+
+    #[test]
+    fn test_extract_shell_commands_no_section() {
+        let response = "Some response without the expected section.";
+        let commands = extract_shell_commands(response);
+        assert!(commands.is_empty());
+    }
+
+    #[test]
+    fn test_extract_summary() {
+        let response = r#"
+{{SUMMARY BASED ON INITIAL INFO}}
+
+This is a summary of the codebase.
+It has multiple lines.
+
+{{CODE EXPLORATION COMMANDS}}
+
+```
+ls -la
+```
+"#;
+
+        let summary = extract_summary(response);
+        assert!(summary.is_some());
+        let summary_text = summary.unwrap();
+        assert!(summary_text.contains("This is a summary"));
+        assert!(summary_text.contains("multiple lines"));
+    }
+
+    #[test]
+    fn test_extract_summary_no_section() {
+        let response = "Response without summary section.";
+        let summary = extract_summary(response);
+        assert!(summary.is_none());
+    }
+}
--- a/crates/g3-planner/src/prompts.rs
+++ b/crates/g3-planner/src/prompts.rs
@@ -0,0 +1,37 @@
+//! Prompts used for discovery phase
+
+/// System prompt for discovery mode - instructs the LLM to analyze codebase and generate exploration commands
+pub const DISCOVERY_SYSTEM_PROMPT: &str = r#"You are an expert code analyst. Your task is to analyze a codebase structure and generate shell commands to explore it further.
+
+You will receive:
+1. User requirements describing what needs to be implemented
+2. A codebase report showing the structure and key elements of the codebase
+
+Your job is to:
+1. Understand the requirements and identify what parts of the codebase are relevant
+2. Generate shell commands to explore those parts in more detail
+
+IMPORTANT: Do NOT attempt to implement anything. Only generate exploration commands."#;
+
+/// Discovery prompt template - used when we have a codebase report.
+/// The codebase report should be appended after this prompt.
+pub const DISCOVERY_REQUIREMENTS_PROMPT: &str = r#"**CRITICAL**: DO ABSOLUTELY NOT ATTEMPT TO IMPLEMENT THESE REQUIREMENTS AT THIS POINT. ONLY USE THEM TO
+UNDERSTAND WHICH PARTS OF THE CODE YOU MIGHT BE INTERESTED IN, AND WHAT SEARCH/GREP EXPRESSIONS YOU MIGHT WANT TO USE
+TO GET A BETTER UNDERSTANDING OF THE CODEBASE.
+
+Your task is to analyze the codebase overview provided below and generate shell commands to explore it further - in particular, those
+you deem most relevant to the requirements given below.
+
+Your output MUST include:
+1. A summary report.  Use the heading {{SUMMARY BASED ON INITIAL INFO}}.
+   - retain as much information of that as you consider relevant to the requirements, and for making an implementation plan.
+   - Ideally that should not be more than 10000 tokens.
+2. A list of shell commands to explore the code. Use the heading {{CODE EXPLORATION COMMANDS}}.
+   - Try plan ahead for what you need for a deep dive into the code. Make sure the information is sparing.
+   - Carefully consider which commands give you the most relevant information, pick the top 25 commands.
+   - Use tools like `ls`, `rg` (ripgrep), `grep`, `sed`, `cat`, `head`, `tail` etc.
+   - Focus on commands that will help understand the code STRUCTURE without dumping large sections of file.
+   - e.g. for Rust you might try `rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union)`
+   - Mark the beginning and end of the commands with "```".
+
+DO NOT ADD ANY COMMENTS OR OTHER EXPLANATION IN THE COMMANDS SECTION, JUST INCLUDE THE SHELL COMMANDS."#;
--- a/crates/g3-planner/tests/logging_test.rs
+++ b/crates/g3-planner/tests/logging_test.rs
@@ -0,0 +1,62 @@
+//! Integration tests for logging functionality
+
+use std::fs;
+use std::path::Path;
+
+#[test]
+fn test_log_files_created() {
+    // This test verifies that the logging functions work correctly
+    // by checking that files can be created in the logs directory
+
+    // Clean up any existing test logs
+    let _ = fs::remove_dir_all("logs");
+
+    // Create logs directory
+    fs::create_dir_all("logs").expect("Failed to create logs directory");
+
+    // Verify directory exists
+    assert!(Path::new("logs").exists());
+    assert!(Path::new("logs").is_dir());
+
+    // Test writing a code report
+    let test_report = "Test codebase report\nLine 2\nLine 3";
+    let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
+    let report_filename = format!("logs/code_report_{}.log", timestamp);
+
+    fs::write(&report_filename, test_report).expect("Failed to write code report");
+    assert!(Path::new(&report_filename).exists());
+
+    let content = fs::read_to_string(&report_filename).expect("Failed to read code report");
+    assert_eq!(content, test_report);
+
+    // Test writing discovery commands
+    let commands_filename = format!("logs/discovery_commands_{}.log", timestamp);
+    let test_commands =
+        "# Discovery Commands\n# Generated by g3-planner\n\nls -la\ncat README.md\n";
+
+    fs::write(&commands_filename, test_commands).expect("Failed to write discovery commands");
+    assert!(Path::new(&commands_filename).exists());
+
+    let content =
+        fs::read_to_string(&commands_filename).expect("Failed to read discovery commands");
+    assert_eq!(content, test_commands);
+
+    // Clean up
+    let _ = fs::remove_file(&report_filename);
+    let _ = fs::remove_file(&commands_filename);
+}
+
+#[test]
+fn test_filename_format() {
+    // Verify the filename format matches the tool_calls log format
+    let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
+
+    // Check format: YYYYMMDD_HHMMSS
+    assert_eq!(timestamp.len(), 15); // 8 digits + underscore + 6 digits
+    assert!(timestamp.contains('_'));
+
+    let parts: Vec<&str> = timestamp.split('_').collect();
+    assert_eq!(parts.len(), 2);
+    assert_eq!(parts[0].len(), 8); // YYYYMMDD
+    assert_eq!(parts[1].len(), 6); // HHMMSS
+}
--- a/crates/g3-planner/tests/planner_test.rs
+++ b/crates/g3-planner/tests/planner_test.rs
@@ -0,0 +1,103 @@
+//! Integration tests for g3-planner
+
+use g3_planner::{create_tool_message, explore_codebase, extract_shell_commands};
+use g3_providers::MessageRole;
+
+#[test]
+fn test_create_tool_message_format() {
+    let msg = create_tool_message("shell", "ls -la");
+
+    assert!(matches!(msg.role, MessageRole::Assistant));
+
+    let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
+    assert_eq!(parsed["tool"], "shell");
+    assert_eq!(parsed["args"]["command"], "ls -la");
+}
+
+#[test]
+fn test_explore_codebase_returns_report() {
+    // Test with current directory (should find Rust files in g3 project)
+    let report = explore_codebase(".");
+
+    // Should return a non-empty report
+    assert!(!report.is_empty(), "Report should not be empty");
+
+    // Should contain the codebase analysis header
+    assert!(
+        report.contains("CODEBASE ANALYSIS") || report.contains("No recognized"),
+        "Report should have analysis header or indicate no languages found"
+    );
+}
+
+#[test]
+fn test_extract_shell_commands_basic() {
+    let response = r#"
+Some text here.
+
+{{CODE EXPLORATION COMMANDS}}
+
+```bash
+ls -la
+cat README.md
+rg --files -g '*.rs'
+```
+
+More text.
+"#;
+
+    let commands = extract_shell_commands(response);
+    assert_eq!(commands.len(), 3);
+    assert_eq!(commands[0], "ls -la");
+    assert_eq!(commands[1], "cat README.md");
+    assert_eq!(commands[2], "rg --files -g '*.rs'");
+}
+
+#[test]
+fn test_extract_shell_commands_with_comments() {
+    let response = r#"
+{{CODE EXPLORATION COMMANDS}}
+
+```
+# This is a comment
+ls -la
+# Another comment
+cat file.txt
+```
+"#;
+
+    let commands = extract_shell_commands(response);
+    assert_eq!(commands.len(), 2);
+    assert_eq!(commands[0], "ls -la");
+    assert_eq!(commands[1], "cat file.txt");
+}
+
+#[test]
+fn test_extract_shell_commands_no_section() {
+    let response = "Some response without the expected section.";
+    let commands = extract_shell_commands(response);
+    assert!(commands.is_empty());
+}
+
+#[test]
+fn test_extract_shell_commands_multiple_code_blocks() {
+    let response = r#"
+{{CODE EXPLORATION COMMANDS}}
+
+```bash
+ls -la
+```
+
+Some explanation text.
+
+```
+cat README.md
+head -50 src/main.rs
+```
+"#;
+
+    let commands = extract_shell_commands(response);
+    assert_eq!(commands.len(), 3);
+    assert_eq!(commands[0], "ls -la");
+    assert_eq!(commands[1], "cat README.md");
+    assert_eq!(commands[2], "head -50 src/main.rs");
+}
--- a/crates/g3-providers/Cargo.toml
+++ b/crates/g3-providers/Cargo.toml
@@ -29,3 +29,4 @@ tokio-util = "0.7"
 dirs = "5.0"
 llama_cpp = { version = "0.3.2", features = ["metal"] }
 shellexpand = "3.1"
+rand = "0.8"
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -21,27 +21,25 @@
 //!     // Create the provider with your API key
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         Some("claude-3-5-sonnet-20241022".to_string()), // Optional: defaults to claude-3-5-sonnet-20241022
-//!         Some(4096),  // Optional: max tokens
-//!         Some(0.1),   // Optional: temperature
+//!         Some("claude-3-5-sonnet-20241022".to_string()),
+//!         Some(4096),
+//!         Some(0.1),
+//!         None, // cache_config
+//!         None, // enable_1m_context
+//!         None, // thinking_budget_tokens
 //!     )?;
 //!
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::System,
-//!                 content: "You are a helpful assistant.".to_string(),
-//!             },
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Hello! How are you?".to_string(),
-//!             },
+//!             Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+//!             Message::new(MessageRole::User, "Hello! How are you?".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
 //!         stream: false,
 //!         tools: None,
+//!         disable_thinking: false,
 //!     };
 //!
 //!     // Get a completion
@@ -62,20 +60,23 @@
 //! async fn main() -> anyhow::Result<()> {
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         None, None, None,
+//!         None,
+//!         None,
+//!         None,
+//!         None, // cache_config
+//!         None, // enable_1m_context
+//!         None, // thinking_budget_tokens
 //!     )?;
 //!
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Write a short story about a robot.".to_string(),
-//!             },
+//!             Message::new(MessageRole::User, "Write a short story about a robot.".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
 //!         stream: true,
 //!         tools: None,
+//!         disable_thinking: false,
 //!     };
 //!
 //!     let mut stream = provider.stream(request).await?;
@@ -106,7 +107,7 @@ use serde::{Deserialize, Serialize};
 use std::time::Duration;
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
-use tracing::{debug, error, warn};
+use tracing::{debug, error};

 use crate::{
    CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
@@ -123,6 +124,9 @@ pub struct AnthropicProvider {
    model: String,
    max_tokens: u32,
    temperature: f32,
+    cache_config: Option<String>,
+    enable_1m_context: bool,
+    thinking_budget_tokens: Option<u32>,
 }

 impl AnthropicProvider {
@@ -131,6 +135,9 @@ impl AnthropicProvider {
        model: Option<String>,
        max_tokens: Option<u32>,
        temperature: Option<f32>,
+        cache_config: Option<String>,
+        enable_1m_context: Option<bool>,
+        thinking_budget_tokens: Option<u32>,
    ) -> Result<Self> {
        let client = Client::builder()
            .timeout(Duration::from_secs(300))
@@ -138,7 +145,7 @@ impl AnthropicProvider {
            .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;

        let model = model.unwrap_or_else(|| "claude-3-5-sonnet-20241022".to_string());
-        
+
        debug!("Initialized Anthropic provider with model: {}", model);

        Ok(Self {
@@ -147,6 +154,9 @@ impl AnthropicProvider {
            model,
            max_tokens: max_tokens.unwrap_or(4096),
            temperature: temperature.unwrap_or(0.1),
+            cache_config,
+            enable_1m_context: enable_1m_context.unwrap_or(false),
+            thinking_budget_tokens,
        })
    }

@@ -156,9 +166,12 @@ impl AnthropicProvider {
            .post(ANTHROPIC_API_URL)
            .header("x-api-key", &self.api_key)
            .header("anthropic-version", ANTHROPIC_VERSION)
-            // Anthropic beta 1m context window. Enable if needed. It costs extra, so check first.
-            // .header("anthropic-beta", "context-1m-2025-08-07")
            .header("content-type", "application/json");
+
+        if self.enable_1m_context {
+            builder = builder.header("anthropic-beta", "context-1m-2025-08-07");
+        }
+
        if streaming {
            builder = builder.header("accept", "text/event-stream");
        }
@@ -166,6 +179,11 @@ impl AnthropicProvider {
        builder
    }

+    fn convert_cache_control(cache_control: &crate::CacheControl) -> crate::CacheControl {
+        // Anthropic uses the same format, so just clone it
+        cache_control.clone()
+    }
+
    fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
        tools
            .iter()
@@ -177,12 +195,17 @@ impl AnthropicProvider {
                };

                // Extract properties and required fields from the input schema
-                if let Ok(schema_obj) = serde_json::from_value::<serde_json::Map<String, serde_json::Value>>(tool.input_schema.clone()) {
+                if let Ok(schema_obj) = serde_json::from_value::<
+                    serde_json::Map<String, serde_json::Value>,
+                >(tool.input_schema.clone())
+                {
                    if let Some(properties) = schema_obj.get("properties") {
                        schema.properties = properties.clone();
                    }
                    if let Some(required) = schema_obj.get("required") {
-                        if let Ok(required_vec) = serde_json::from_value::<Vec<String>>(required.clone()) {
+                        if let Ok(required_vec) =
+                            serde_json::from_value::<Vec<String>>(required.clone())
+                        {
                            schema.required = Some(required_vec);
                        }
                    }
@@ -197,23 +220,32 @@ impl AnthropicProvider {
            .collect()
    }

-    fn convert_messages(&self, messages: &[Message]) -> Result<(Option<String>, Vec<AnthropicMessage>)> {
+    fn convert_messages(
+        &self,
+        messages: &[Message],
+    ) -> Result<(Option<String>, Vec<AnthropicMessage>)> {
        let mut system_message = None;
        let mut anthropic_messages = Vec::new();

        for message in messages {
            match message.role {
                MessageRole::System => {
-                    if system_message.is_some() {
-                        warn!("Multiple system messages found, using the last one");
+                    if let Some(existing) = system_message {
+                        // Concatenate system messages instead of replacing
+                        system_message = Some(format!("{}\n\n{}", existing, message.content));
+                    } else {
+                        system_message = Some(message.content.clone());
                    }
-                    system_message = Some(message.content.clone());
                }
                MessageRole::User => {
                    anthropic_messages.push(AnthropicMessage {
                        role: "user".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
+                            cache_control: message
+                                .cache_control
+                                .as_ref()
+                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -222,6 +254,10 @@ impl AnthropicProvider {
                        role: "assistant".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
+                            cache_control: message
+                                .cache_control
+                                .as_ref()
+                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -238,16 +274,46 @@ impl AnthropicProvider {
        streaming: bool,
        max_tokens: u32,
        temperature: f32,
+        disable_thinking: bool,
    ) -> Result<AnthropicRequest> {
        let (system, anthropic_messages) = self.convert_messages(messages)?;

        if anthropic_messages.is_empty() {
-            return Err(anyhow!("At least one user or assistant message is required"));
+            return Err(anyhow!(
+                "At least one user or assistant message is required"
+            ));
        }

        // Convert tools if provided
        let anthropic_tools = tools.map(|t| self.convert_tools(t));

+        // Add thinking configuration if budget_tokens is set AND max_tokens is sufficient AND not explicitly disabled
+        // Anthropic requires: max_tokens > thinking.budget_tokens
+        // We add 1024 as minimum buffer for actual response content
+        tracing::debug!("create_request_body called: max_tokens={}, disable_thinking={}, thinking_budget_tokens={:?}", max_tokens, disable_thinking, self.thinking_budget_tokens);
+
+        let thinking = if disable_thinking {
+            tracing::info!(
+                "Thinking mode explicitly disabled for this request (max_tokens={})",
+                max_tokens
+            );
+            None
+        } else {
+            self.thinking_budget_tokens.and_then(|budget| {
+            let min_required = budget + 1024;
+            if max_tokens > min_required {
+                Some(ThinkingConfig::enabled(budget))
+            } else {
+                tracing::warn!(
+                    "Disabling thinking mode: max_tokens ({}) is not greater than thinking.budget_tokens ({}) + 1024 buffer. \
+                     Required: max_tokens > {}",
+                    max_tokens, budget, min_required
+                );
+                None
+            }
+            })
+        };
+
        let request = AnthropicRequest {
            model: self.model.clone(),
            max_tokens,
@@ -256,6 +322,7 @@ impl AnthropicProvider {
            system,
            tools: anthropic_tools,
            stream: streaming,
+            thinking,
        };

        // Ensure the conversation starts with a user message
@@ -277,13 +344,13 @@ impl AnthropicProvider {
        let mut accumulated_usage: Option<Usage> = None;
        let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
        let mut message_stopped = false; // Track if we've received message_stop
-        
+
        while let Some(chunk_result) = stream.next().await {
            match chunk_result {
                Ok(chunk) => {
                    // Append new bytes to our buffer
                    byte_buffer.extend_from_slice(&chunk);
-                    
+
                    // Try to convert the entire buffer to UTF-8
                    let chunk_str = match std::str::from_utf8(&byte_buffer) {
                        Ok(s) => {
@@ -297,7 +364,8 @@ impl AnthropicProvider {
                            let valid_up_to = e.valid_up_to();
                            if valid_up_to > 0 {
                                // We have some valid UTF-8, extract it and keep the rest for next iteration
-                                let valid_bytes = byte_buffer.drain(..valid_up_to).collect::<Vec<_>>();
+                                let valid_bytes =
+                                    byte_buffer.drain(..valid_up_to).collect::<Vec<_>>();
                                std::str::from_utf8(&valid_bytes).unwrap().to_string()
                            } else {
                                // No valid UTF-8 at all, skip this chunk and continue
@@ -331,7 +399,11 @@ impl AnthropicProvider {
                                    content: String::new(),
                                    finished: true,
                                    usage: accumulated_usage.clone(),
-                                    tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
+                                    tool_calls: if current_tool_calls.is_empty() {
+                                        None
+                                    } else {
+                                        Some(current_tool_calls.clone())
+                                    },
                                };
                                if tx.send(Ok(final_chunk)).await.is_err() {
                                    debug!("Receiver dropped, stopping stream");
@@ -343,7 +415,10 @@ impl AnthropicProvider {

                            match serde_json::from_str::<AnthropicStreamEvent>(data) {
                                Ok(event) => {
-                                    debug!("Parsed event type: {}, event: {:?}", event.event_type, event);
+                                    debug!(
+                                        "Parsed event type: {}, event: {:?}",
+                                        event.event_type, event
+                                    );
                                    match event.event_type.as_str() {
                                        "message_start" => {
                                            // Extract usage data from message_start event
@@ -352,19 +427,30 @@ impl AnthropicProvider {
                                                    accumulated_usage = Some(Usage {
                                                        prompt_tokens: usage.input_tokens,
                                                        completion_tokens: usage.output_tokens,
-                                                        total_tokens: usage.input_tokens + usage.output_tokens,
+                                                        total_tokens: usage.input_tokens
+                                                            + usage.output_tokens,
                                                    });
-                                                    debug!("Captured usage from message_start: {:?}", accumulated_usage);
+                                                    debug!(
+                                                        "Captured usage from message_start: {:?}",
+                                                        accumulated_usage
+                                                    );
                                                }
                                            }
                                        }
                                        "content_block_start" => {
-                                            debug!("Received content_block_start event: {:?}", event);
+                                            debug!(
+                                                "Received content_block_start event: {:?}",
+                                                event
+                                            );
                                            if let Some(content_block) = event.content_block {
                                                match content_block {
-                                                    AnthropicContent::ToolUse { id, name, input } => {
+                                                    AnthropicContent::ToolUse {
+                                                        id,
+                                                        name,
+                                                        input,
+                                                    } => {
                                                        debug!("Found tool use in content_block_start: id={}, name={}, input={:?}", id, name, input);
-                                                        
+
                                                        // For native tool calls, create the tool call immediately if we have complete args
                                                        // If args are empty, we'll wait for partial_json to accumulate them
                                                        let tool_call = ToolCall {
@@ -372,9 +458,14 @@ impl AnthropicProvider {
                                                            tool: name.clone(),
                                                            args: input.clone(),
                                                        };
-                                                        
+
                                                        // Check if we already have complete arguments
-                                                        if !input.is_null() && input != serde_json::Value::Object(serde_json::Map::new()) {
+                                                        if !input.is_null()
+                                                            && input
+                                                                != serde_json::Value::Object(
+                                                                    serde_json::Map::new(),
+                                                                )
+                                                        {
                                                            // We have complete arguments, send the tool call immediately
                                                            debug!("Tool call has complete args, sending immediately: {:?}", tool_call);
                                                            let chunk = CompletionChunk {
@@ -395,7 +486,10 @@ impl AnthropicProvider {
                                                        }
                                                    }
                                                    _ => {
-                                                        debug!("Non-tool content block: {:?}", content_block);
+                                                        debug!(
+                                                            "Non-tool content block: {:?}",
+                                                            content_block
+                                                        );
                                                    }
                                                }
                                            }
@@ -403,7 +497,11 @@ impl AnthropicProvider {
                                        "content_block_delta" => {
                                            if let Some(delta) = event.delta {
                                                if let Some(text) = delta.text {
-                                                    debug!("Sending text chunk of length {}: '{}'", text.len(), text);
+                                                    debug!(
+                                                        "Sending text chunk of length {}: '{}'",
+                                                        text.len(),
+                                                        text
+                                                    );
                                                    let chunk = CompletionChunk {
                                                        content: text,
                                                        finished: false,
@@ -417,31 +515,51 @@ impl AnthropicProvider {
                                                }
                                                // Handle partial JSON for tool calls
                                                if let Some(partial_json) = delta.partial_json {
-                                                    debug!("Received partial JSON: {}", partial_json);
+                                                    debug!(
+                                                        "Received partial JSON: {}",
+                                                        partial_json
+                                                    );
                                                    partial_tool_json.push_str(&partial_json);
-                                                    debug!("Accumulated tool JSON: {}", partial_tool_json);
+                                                    debug!(
+                                                        "Accumulated tool JSON: {}",
+                                                        partial_tool_json
+                                                    );
                                                }
                                            }
                                        }
                                        "content_block_stop" => {
                                            // Tool call block is complete - now parse the accumulated JSON
-                                            if !current_tool_calls.is_empty() && !partial_tool_json.is_empty() {
-                                                debug!("Parsing complete tool JSON: {}", partial_tool_json);
-                                                
+                                            if !current_tool_calls.is_empty()
+                                                && !partial_tool_json.is_empty()
+                                            {
+                                                debug!(
+                                                    "Parsing complete tool JSON: {}",
+                                                    partial_tool_json
+                                                );
+
                                                // Parse the accumulated JSON and update the last tool call
-                                                if let Ok(parsed_args) = serde_json::from_str::<serde_json::Value>(&partial_tool_json) {
-                                                    if let Some(last_tool) = current_tool_calls.last_mut() {
+                                                if let Ok(parsed_args) =
+                                                    serde_json::from_str::<serde_json::Value>(
+                                                        &partial_tool_json,
+                                                    )
+                                                {
+                                                    if let Some(last_tool) =
+                                                        current_tool_calls.last_mut()
+                                                    {
                                                        last_tool.args = parsed_args;
                                                        debug!("Updated tool call with complete args: {:?}", last_tool);
                                                    }
                                                } else {
-                                                    debug!("Failed to parse accumulated JSON: {}", partial_tool_json);
+                                                    debug!(
+                                                        "Failed to parse accumulated JSON: {}",
+                                                        partial_tool_json
+                                                    );
                                                }
-                                                
+
                                                // Clear the accumulator
                                                partial_tool_json.clear();
                                            }
-                                            
+
                                            // Send the complete tool call
                                            if !current_tool_calls.is_empty() {
                                                let chunk = CompletionChunk {
@@ -463,7 +581,11 @@ impl AnthropicProvider {
                                                content: String::new(),
                                                finished: true,
                                                usage: accumulated_usage.clone(),
-                                                tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
+                                                tool_calls: if current_tool_calls.is_empty() {
+                                                    None
+                                                } else {
+                                                    Some(current_tool_calls.clone())
+                                                },
                                            };
                                            if tx.send(Ok(final_chunk)).await.is_err() {
                                                debug!("Receiver dropped, stopping stream");
@@ -475,7 +597,10 @@ impl AnthropicProvider {
                                            if let Some(error) = event.error {
                                                error!("Anthropic API error: {:?}", error);
                                                let _ = tx
-                                                    .send(Err(anyhow!("Anthropic API error: {:?}", error)))
+                                                    .send(Err(anyhow!(
+                                                        "Anthropic API error: {:?}",
+                                                        error
+                                                    )))
                                                    .await;
                                                break; // Break to let stream exhaust naturally
                                            }
@@ -509,7 +634,11 @@ impl AnthropicProvider {
            content: String::new(),
            finished: true,
            usage: accumulated_usage.clone(),
-            tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls) },
+            tool_calls: if current_tool_calls.is_empty() {
+                None
+            } else {
+                Some(current_tool_calls)
+            },
        };
        let _ = tx.send(Ok(final_chunk)).await;
        accumulated_usage
@@ -528,15 +657,18 @@ impl LLMProvider for AnthropicProvider {
        let temperature = request.temperature.unwrap_or(self.temperature);

        let request_body = self.create_request_body(
-            &request.messages, 
-            request.tools.as_deref(), 
-            false, 
-            max_tokens, 
-            temperature
+            &request.messages,
+            request.tools.as_deref(),
+            false,
+            max_tokens,
+            temperature,
+            request.disable_thinking,
        )?;

-        debug!("Sending request to Anthropic API: model={}, max_tokens={}, temperature={}", 
-               request_body.model, request_body.max_tokens, request_body.temperature);
+        debug!(
+            "Sending request to Anthropic API: model={}, max_tokens={}, temperature={}",
+            request_body.model, request_body.max_tokens, request_body.temperature
+        );

        let response = self
            .create_request_builder(false)
@@ -564,7 +696,7 @@ impl LLMProvider for AnthropicProvider {
            .content
            .iter()
            .filter_map(|c| match c {
-                AnthropicContent::Text { text } => Some(text.as_str()),
+                AnthropicContent::Text { text, .. } => Some(text.as_str()),
                _ => None,
            })
            .collect::<Vec<_>>()
@@ -573,7 +705,8 @@ impl LLMProvider for AnthropicProvider {
        let usage = Usage {
            prompt_tokens: anthropic_response.usage.input_tokens,
            completion_tokens: anthropic_response.usage.output_tokens,
-            total_tokens: anthropic_response.usage.input_tokens + anthropic_response.usage.output_tokens,
+            total_tokens: anthropic_response.usage.input_tokens
+                + anthropic_response.usage.output_tokens,
        };

        debug!(
@@ -598,18 +731,25 @@ impl LLMProvider for AnthropicProvider {
        let temperature = request.temperature.unwrap_or(self.temperature);

        let request_body = self.create_request_body(
-            &request.messages, 
-            request.tools.as_deref(), 
-            true, 
-            max_tokens, 
-            temperature
+            &request.messages,
+            request.tools.as_deref(),
+            true,
+            max_tokens,
+            temperature,
+            request.disable_thinking,
        )?;

-        debug!("Sending streaming request to Anthropic API: model={}, max_tokens={}, temperature={}", 
-               request_body.model, request_body.max_tokens, request_body.temperature);
-        
+        debug!(
+            "Sending streaming request to Anthropic API: model={}, max_tokens={}, temperature={}",
+            request_body.model, request_body.max_tokens, request_body.temperature
+        );
+
        // Debug: Log the full request body
-        debug!("Full request body: {}", serde_json::to_string_pretty(&request_body).unwrap_or_else(|_| "Failed to serialize".to_string()));
+        debug!(
+            "Full request body: {}",
+            serde_json::to_string_pretty(&request_body)
+                .unwrap_or_else(|_| "Failed to serialize".to_string())
+        );

        let response = self
            .create_request_builder(true)
@@ -658,10 +798,36 @@ impl LLMProvider for AnthropicProvider {
        // Claude models support native tool calling
        true
    }
+
+    fn supports_cache_control(&self) -> bool {
+        // Anthropic supports cache control
+        true
+    }
+
+    fn max_tokens(&self) -> u32 {
+        self.max_tokens
+    }
+
+    fn temperature(&self) -> f32 {
+        self.temperature
+    }
 }

 // Anthropic API request/response structures

+#[derive(Debug, Serialize)]
+struct ThinkingConfig {
+    #[serde(rename = "type")]
+    thinking_type: String,
+    budget_tokens: u32,
+}
+
+impl ThinkingConfig {
+    fn enabled(budget_tokens: u32) -> Self {
+        Self { thinking_type: "enabled".to_string(), budget_tokens }
+    }
+}
+
 #[derive(Debug, Serialize)]
 struct AnthropicRequest {
    model: String,
@@ -673,6 +839,8 @@ struct AnthropicRequest {
    #[serde(skip_serializing_if = "Option::is_none")]
    tools: Option<Vec<AnthropicTool>>,
    stream: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    thinking: Option<ThinkingConfig>,
 }

 #[derive(Debug, Serialize)]
@@ -701,7 +869,17 @@ struct AnthropicMessage {
 #[serde(tag = "type")]
 enum AnthropicContent {
    #[serde(rename = "text")]
-    Text { text: String },
+    Text {
+        text: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<crate::CacheControl>,
+    },
+    #[serde(rename = "thinking")]
+    Thinking {
+        thinking: String,
+        #[serde(default)]
+        signature: Option<String>,
+    },
    #[serde(rename = "tool_use")]
    ToolUse {
        id: String,
@@ -766,26 +944,16 @@ mod tests {

    #[test]
    fn test_message_conversion() {
-        let provider = AnthropicProvider::new(
-            "test-key".to_string(),
-            None,
-            None,
-            None,
-        ).unwrap();
+        let provider =
+            AnthropicProvider::new("test-key".to_string(), None, None, None, None, None, None).unwrap();

        let messages = vec![
-            Message {
-                role: MessageRole::System,
-                content: "You are a helpful assistant.".to_string(),
-            },
-            Message {
-                role: MessageRole::User,
-                content: "Hello!".to_string(),
-            },
-            Message {
-                role: MessageRole::Assistant,
-                content: "Hi there!".to_string(),
-            },
+            Message::new(
+                MessageRole::System,
+                "You are a helpful assistant.".to_string(),
+            ),
+            Message::new(MessageRole::User, "Hello!".to_string()),
+            Message::new(MessageRole::Assistant, "Hi there!".to_string()),
        ];

        let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
@@ -803,17 +971,16 @@ mod tests {
            Some("claude-3-haiku-20240307".to_string()),
            Some(1000),
            Some(0.5),
-        ).unwrap();
+            None,
+            None,
+            None,
+        )
+        .unwrap();

-        let messages = vec![
-            Message {
-                role: MessageRole::User,
-                content: "Test message".to_string(),
-            },
-        ];
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];

        let request_body = provider
-            .create_request_body(&messages, None, false, 1000, 0.5)
+            .create_request_body(&messages, None, false, 1000, 0.5, false)
            .unwrap();

        assert_eq!(request_body.model, "claude-3-haiku-20240307");
@@ -826,29 +993,23 @@ mod tests {

    #[test]
    fn test_tool_conversion() {
-        let provider = AnthropicProvider::new(
-            "test-key".to_string(),
-            None,
-            None,
-            None,
-        ).unwrap();
+        let provider =
+            AnthropicProvider::new("test-key".to_string(), None, None, None, None, None, None).unwrap();

-        let tools = vec![
-            Tool {
-                name: "get_weather".to_string(),
-                description: "Get the current weather".to_string(),
-                input_schema: serde_json::json!({
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description": "The city and state"
-                        }
-                    },
-                    "required": ["location"]
-                }),
-            },
-        ];
+        let tools = vec![Tool {
+            name: "get_weather".to_string(),
+            description: "Get the current weather".to_string(),
+            input_schema: serde_json::json!({
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state"
+                    }
+                },
+                "required": ["location"]
+            }),
+        }];

        let anthropic_tools = provider.convert_tools(&tools);

@@ -857,6 +1018,165 @@ mod tests {
        assert_eq!(anthropic_tools[0].description, "Get the current weather");
        assert_eq!(anthropic_tools[0].input_schema.schema_type, "object");
        assert!(anthropic_tools[0].input_schema.required.is_some());
-        assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
+        assert_eq!(
+            anthropic_tools[0].input_schema.required.as_ref().unwrap()[0],
+            "location"
+        );
+    }
+
+    #[test]
+    fn test_cache_control_serialization() {
+        let provider =
+            AnthropicProvider::new("test-key".to_string(), None, None, None, None, None, None).unwrap();
+
+        // Test message WITHOUT cache_control
+        let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
+        let (_, anthropic_messages_without) = provider.convert_messages(&messages_without).unwrap();
+        let json_without = serde_json::to_string(&anthropic_messages_without).unwrap();
+
+        println!("Anthropic JSON without cache_control: {}", json_without);
+        // Check if cache_control appears in the JSON
+        if json_without.contains("cache_control") {
+            println!("WARNING: JSON contains 'cache_control' field when not configured!");
+            assert!(
+                !json_without.contains("\"cache_control\":null"),
+                "JSON should not contain 'cache_control: null'"
+            );
+        }
+
+        // Test message WITH cache_control
+        let messages_with = vec![Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            crate::CacheControl::ephemeral(),
+        )];
+        let (_, anthropic_messages_with) = provider.convert_messages(&messages_with).unwrap();
+        let json_with = serde_json::to_string(&anthropic_messages_with).unwrap();
+
+        println!("Anthropic JSON with cache_control: {}", json_with);
+        assert!(
+            json_with.contains("cache_control"),
+            "JSON should contain 'cache_control' field when configured"
+        );
+        assert!(
+            json_with.contains("ephemeral"),
+            "JSON should contain 'ephemeral' type"
+        );
+
+        // The key assertion: when cache_control is None, it should not appear in JSON
+        assert!(
+            !json_without.contains("cache_control") || !json_without.contains("null"),
+            "JSON should not contain 'cache_control' field or null values when not configured"
+        );
+    }
+
+    #[test]
+    fn test_thinking_parameter_serialization() {
+        // Test WITHOUT thinking parameter
+        let provider_without = AnthropicProvider::new(
+            "test-key".to_string(),
+            Some("claude-sonnet-4-5".to_string()),
+            Some(1000),
+            Some(0.5),
+            None,
+            None,
+            None, // No thinking budget
+        )
+        .unwrap();
+
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
+        let request_without = provider_without
+            .create_request_body(&messages, None, false, 1000, 0.5, false)
+            .unwrap();
+        let json_without = serde_json::to_string(&request_without).unwrap();
+        assert!(!json_without.contains("thinking"), "JSON should not contain 'thinking' field when not configured");
+
+        // Test WITH thinking parameter - max_tokens must be > budget_tokens + 1024
+        // Using budget=10000 requires max_tokens > 11024
+        let provider_with = AnthropicProvider::new(
+            "test-key".to_string(),
+            Some("claude-sonnet-4-5".to_string()),
+            Some(20000),  // Sufficient for thinking budget
+            Some(0.5),
+            None,
+            None,
+            Some(10000), // With thinking budget
+        )
+        .unwrap();
+
+        let request_with = provider_with
+            .create_request_body(&messages, None, false, 20000, 0.5, false)
+            .unwrap();
+        let json_with = serde_json::to_string(&request_with).unwrap();
+        assert!(json_with.contains("thinking"), "JSON should contain 'thinking' field when configured");
+        assert!(json_with.contains("\"type\":\"enabled\""), "JSON should contain type: enabled");
+        assert!(json_with.contains("\"budget_tokens\":10000"), "JSON should contain budget_tokens: 10000");
+
+        // Test WITH thinking parameter but INSUFFICIENT max_tokens - thinking should be disabled
+        let request_insufficient = provider_with
+            .create_request_body(&messages, None, false, 5000, 0.5, false)  // Less than budget + 1024
+            .unwrap();
+        let json_insufficient = serde_json::to_string(&request_insufficient).unwrap();
+        assert!(!json_insufficient.contains("thinking"), "JSON should NOT contain 'thinking' field when max_tokens is insufficient");
+    }
+
+    #[test]
+    fn test_disable_thinking_flag() {
+        // Test that disable_thinking=true prevents thinking even with sufficient max_tokens
+        let provider = AnthropicProvider::new(
+            "test-key".to_string(),
+            Some("claude-sonnet-4-5".to_string()),
+            Some(20000),
+            Some(0.5),
+            None,
+            None,
+            Some(10000), // With thinking budget
+        )
+        .unwrap();
+
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
+        
+        // With disable_thinking=false, thinking should be enabled (max_tokens is sufficient)
+        let request_with_thinking = provider
+            .create_request_body(&messages, None, false, 20000, 0.5, false)
+            .unwrap();
+        let json_with = serde_json::to_string(&request_with_thinking).unwrap();
+        assert!(json_with.contains("thinking"), "JSON should contain 'thinking' field when not disabled");
+
+        // With disable_thinking=true, thinking should be disabled even with sufficient max_tokens
+        let request_without_thinking = provider
+            .create_request_body(&messages, None, false, 20000, 0.5, true)
+            .unwrap();
+        let json_without = serde_json::to_string(&request_without_thinking).unwrap();
+        assert!(!json_without.contains("thinking"), "JSON should NOT contain 'thinking' field when explicitly disabled");
+    }
+
+    #[test]
+    fn test_thinking_content_block_deserialization() {
+        // Test that we can deserialize a response containing a "thinking" content block
+        // This is what Anthropic returns when extended thinking is enabled
+        let json_response = r#"{
+            "content": [
+                {"type": "thinking", "thinking": "Let me analyze this...", "signature": "abc123"},
+                {"type": "text", "text": "Here is my response."}
+            ],
+            "model": "claude-sonnet-4-5",
+            "usage": {"input_tokens": 100, "output_tokens": 50}
+        }"#;
+
+        let response: AnthropicResponse = serde_json::from_str(json_response)
+            .expect("Should be able to deserialize response with thinking block");
+        
+        assert_eq!(response.content.len(), 2);
+        assert_eq!(response.model, "claude-sonnet-4-5");
+        
+        // Extract only text content (thinking should be filtered out)
+        let text_content: Vec<_> = response.content.iter().filter_map(|c| match c {
+            AnthropicContent::Text { text, .. } => Some(text.as_str()),
+            _ => None,
+        }).collect();
+        
+        assert_eq!(text_content.len(), 1);
+        assert_eq!(text_content[0], "Here is my response.");
    }
 }
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jochen	4aa84e2144	disable thinking if there is no token budget	2025-12-09 16:45:28 +11:00
Jochen	2283d9ddbf	small fix to provider name check	2025-12-09 14:43:35 +11:00
Jochen	fb2cf6f898	fix for thinking budget and hardcoded max token on summary	2025-12-09 12:41:52 +11:00
Jochen	696c441a47	validate max_tokens for call, also fallbacks for summary When the CW is full, max_tokens is often passed at 0 or tiny. The LLM will fail. For Anthropic with thining, there is also the thinking budget. This can happen during summary attempts, in that case first try thinnify, skinnify etc..	2025-12-09 10:15:32 +11:00
Dhanji R. Prasanna	48e6d594bc	tweak todo tool output	2025-12-08 11:05:01 +11:00
Dhanji R. Prasanna	678403da35	add a force thinnify cmd	2025-12-05 15:32:13 +11:00
Jochen	0970e4f356	Merge pull request #40 from dhanji/jochen-fix-coach-feedback now coach feedback works again	2025-12-03 10:55:15 +11:00
Jochen	758a313de0	Merge pull request #39 from dhanji/jochen-sonnet-thinking Fix temperature param + add thinking for anthropic	2025-12-03 10:54:34 +11:00
Jochen	0327a6dfdf	make sure coach feedback is extracted.	2025-12-02 22:00:58 +11:00
Jochen	928f2bfa9d	actually record coach feedback and use it	2025-12-02 21:23:50 +11:00
Jochen	21af6ba574	fix temperature for summary request too.	2025-12-02 21:20:16 +11:00
Jochen	ae16243f49	Fix temperature param + add thinking for anthropic The temperature param was not passed to the llm. Now support anthropic models in 'thinking' mode.	2025-12-02 17:24:55 +11:00
Dhanji R. Prasanna	9ee0468b87	test for system message	2025-12-02 14:45:12 +11:00
Dhanji R. Prasanna	d9ad244197	add markdown format only to final_output and fix todo duplication	2025-12-02 14:26:22 +11:00
Dhanji R. Prasanna	a6537e4dba	todo_write outputs entire list	2025-12-02 13:48:05 +11:00
Dhanji R. Prasanna	df3f25f2f0	test for resume unfinished todos	2025-12-02 11:07:13 +11:00
Dhanji R. Prasanna	f8f989d4c6	resume unfinished TODOs	2025-12-02 11:06:58 +11:00
Dhanji R. Prasanna	0e4c935a70	clean up TODO output	2025-12-02 06:48:58 +11:00
Dhanji R. Prasanna	1b4ea93ba4	token counting bugfix	2025-12-01 14:52:10 +11:00
Dhanji R. Prasanna	4496eee046	fix compaction to restore system message	2025-12-01 14:38:21 +11:00
Dhanji R. Prasanna	8928fb92be	append instead of replace system msg	2025-11-29 16:13:00 +11:00
Dhanji R. Prasanna	81fd2ab92f	unused var	2025-11-29 15:44:30 +11:00
Jochen	af7fb8f7f1	Merge pull request #38 from dhanji/jochen-debug-with-ids dumps context window for monitoring sizes, also add message id for internal debugging	2025-11-28 16:43:26 +11:00
Jochen	bad906b8b1	Merge branch 'main' into jochen-debug-with-ids	2025-11-28 16:43:15 +11:00
Jochen	dcfd681b05	add summary context window	2025-11-28 16:33:31 +11:00
Jochen	6dcae1e3f4	fix use import	2025-11-28 10:21:06 +11:00
Jochen	0d504d6422	temporarily disable codebase_fast_start it seems the llm gets "lazy" and assumes all the tool calls meant it's done most of the work. I need to revise this approach.	2025-11-27 21:02:01 +11:00
Jochen	52f78653b4	add context window monitor Writes the current context window to logs/current_context_window (uses a symlink to a session ID). This PR was unfortunately generated by a different LLM and did a ton of superficial reformating, it's actually a fairly small and benign change, but I don't want to roll back everything. Hope that's ok.	2025-11-27 21:00:02 +11:00
Jochen	93dc4acf86	generate internal id (debugging only) NOT set to provider... Anthropic will reject a message with id	2025-11-27 18:30:42 +11:00
Jochen	40e8b3aee2	Merge pull request #37 from dhanji/jochen-fast-start-check temporarily disable codebase_fast_start	2025-11-27 16:37:06 +11:00
Jochen	bbeaaea2e3	temporarily disable codebase_fast_start it seems the llm gets "lazy" and assumes all the tool calls meant it's done most of the work. I need to revise this approach.	2025-11-27 16:36:40 +11:00
Jochen	7e1ce36a4b	Merge pull request #35 from dhanji/jochen_write_existing_file remove check for whether a file exists in the workspace	2025-11-27 13:44:45 +11:00
Jochen	9f6592efc2	remove redundant 'if'	2025-11-27 13:34:54 +11:00
Jochen	99125fc39e	completely remove the skipping first player logic	2025-11-27 13:21:40 +11:00
Jochen	a2a82a2526	Merge pull request #36 from dhanji/jochen_fix_cache_control_if add cache_control to user messages	2025-11-27 13:13:54 +11:00
Jochen	5170744099	add cache_control to user messages	2025-11-27 13:12:42 +11:00
Jochen	fb0aabb5c4	Merge pull request #34 from dhanji/jochen-g3-ensemble-fork a fixed fork of dhanji/g3-ensembles	2025-11-27 11:41:23 +11:00
Jochen	4655516c15	Merge pull request #33 from dhanji/jochen_fix_multi_cache never add more than 4 cache controls	2025-11-27 11:41:05 +11:00
Jochen	c58aa80932	explain what file was found in workspace	2025-11-26 21:43:59 +11:00
Jochen	fdb3080fc2	fix partitions parser	2025-11-26 21:07:45 +11:00
Jochen	c837308148	never add more than 4 cache controls Anthropic API throws errors otherwise.	2025-11-26 18:38:30 +11:00
Jochen	9bbedd869a	Fixed JSON encoding in partition	2025-11-26 18:08:12 +11:00
Dhanji Prasanna	4cfa0147ca	first cut of horizontal partitioning # Conflicts: # Cargo.lock # Conflicts: # Cargo.lock # crates/g3-cli/src/lib.rs	2025-11-26 17:12:07 +11:00
Jochen	c6c35bf2ca	Merge pull request #31 from dhanji/jochen_fast_start add code exploration fast start	2025-11-26 17:10:42 +11:00
Jochen	c9fde4ecef	Merge pull request #32 from dhanji/jochen_reorder_system_prompt minor change: reorder system prompt	2025-11-26 11:07:08 +11:00
Jochen	1e1702001c	Add logging for discovery	2025-11-26 10:41:35 +11:00
Jochen	c419833ddf	updated the prompt	2025-11-26 10:26:52 +11:00
Jochen	c19127f809	make sure user requirements are included	2025-11-26 10:26:52 +11:00
Jochen	bd29addefa	reorder system prompt	2025-11-26 10:26:52 +11:00
Jochen	467e300ec2	reorder system prompt	2025-11-26 09:30:26 +11:00
Jochen	2e252cd298	added timer	2025-11-25 22:51:33 +11:00
Jochen	ad198a8501	add code exploration fast start This tries to short-circuit multiple round-trips to llm for reading code. It's a precursor to trying to context engineer tailored to specific tasks. In initial experiments, it's only marginally faster than regular mode, and burns more tokens.	2025-11-25 22:51:32 +11:00
Jochen	f501751bdf	Merge pull request #30 from dhanji/fix_tests Fix tests & add code coverage tool	2025-11-25 10:18:18 +11:00
Jochen	a96a15d1fc	add code coverage command	2025-11-21 14:38:58 +11:00
Jochen	24dc7ad642	fix build target	2025-11-21 14:07:31 +11:00
Jochen	a097c3abef	first cut	2025-11-21 13:56:36 +11:00
Jochen	34e55050b3	Merge pull request #28 from dhanji/jochen_force_todo_check_at_start check for stale TODO at startup of autonomous	2025-11-21 12:41:45 +11:00
Jochen	551a577ee1	changed user choice for TODO stale check user can ignore, mark stale or quit.	2025-11-21 12:35:14 +11:00
Jochen	84718223bc	remove minor comment	2025-11-21 12:26:41 +11:00
Jochen	28a83d2dcf	check for stale TODOs on by default, can be disabled	2025-11-21 12:09:01 +11:00
Jochen	0ce905dc74	Merge pull request #26 from dhanji/jochen_log_tool_calls__with_tool_logs log tool calls, allow multiple calls (optional)	2025-11-21 11:07:23 +11:00
Jochen	9f0d5add1e	remove redundant SYSTEM_NATIVE_TOOL_CALLS_MULTIPLE	2025-11-21 11:04:14 +11:00
Jochen	be6c6bfca4	fix ref to system prompt	2025-11-21 10:49:39 +11:00
Jochen	94a41c5c34	don't write warning to console	2025-11-21 10:49:27 +11:00
Jochen	09dbad2d68	allow multiple tool calls, log warnings if there are duplicate calls. controlled via a flag to the agent config: allow_multiple_tool_calls = true	2025-11-21 10:49:15 +11:00
Jochen	ffbf410b17	log tool calls	2025-11-21 10:49:02 +11:00
Jochen	c6f3f12b71	Merge pull request #27 from dhanji/jochen_tool_tail useful shell command for tailing tool logs	2025-11-20 13:31:09 +11:00
Dhanji Prasanna	14c8d066c9	ensure system prompt is always added first	2025-11-20 08:45:03 +11:00
Jochen	e556f06b15	useful command for tailing tool logs	2025-11-19 21:02:42 +11:00
Jochen	b6e226df67	Merge pull request #23 from dhanji/jochen-add-code-instructions system prompt now includes code style guide	2025-11-19 16:25:20 +11:00
Dhanji R. Prasanna	5b46922047	Merge pull request #25 from dhanji/fix_max_tokens fix bad max_tokens and context_window logic	2025-11-19 15:55:34 +11:00
Jochen	1069664e16	fix bad max_tokens and context_window logic for non-databricks code	2025-11-19 13:51:16 +11:00
Dhanji R. Prasanna	725f54b99b	Merge pull request #24 from dhanji/jochen_cache_control Add cache control for Anthropic (won't work via Databricks)	2025-11-19 13:39:09 +11:00
Dhanji R. Prasanna	325aab6b0e	Merge pull request #22 from dhanji/micn/console-detection patching console for detecting g3	2025-11-19 13:37:22 +11:00
Jochen	3f21bdc7b2	fix tests	2025-11-19 12:42:37 +11:00
Jochen	9bffd8b1bf	cache_control removed from databricks	2025-11-19 12:15:49 +11:00
Jochen	bfee8040e9	regression tests added	2025-11-19 11:32:14 +11:00
Jochen	a150ba6a55	adds ttl to cache control	2025-11-18 23:23:49 +11:00
Jochen	296bf5a449	adds cache_control	2025-11-18 22:38:52 +11:00
Jochen	7f73b664a3	system prompt now includes code style guide	2025-11-18 18:21:16 +11:00