Add logging for discovery

updated the prompt
make sure user requirements are included
2025-11-26 10:41:35 +11:00 · 2025-11-26 10:26:52 +11:00 · 2025-11-26 10:26:52 +11:00 · 2025-11-26 10:26:52 +11:00 · 2025-11-25 22:51:33 +11:00 · 2025-11-25 22:51:32 +11:00
49 changed files with 3737 additions and 371 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1365,11 +1365,15 @@ dependencies = [
 "dirs 5.0.1",
 "g3-config",
 "g3-core",
+ "g3-planner",
+ "g3-providers",
+ "hex",
 "indicatif",
 "ratatui",
 "rustyline",
 "serde",
 "serde_json",
+ "sha2",
 "termimad",
 "tokio",
 "tokio-util",
@@ -1409,6 +1413,7 @@ dependencies = [
 "config",
 "dirs 5.0.1",
 "serde",
+ "serde_json",
 "shellexpand",
 "tempfile",
 "thiserror 1.0.69",
@@ -1496,6 +1501,19 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "g3-planner"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "chrono",
+ "const_format",
+ "g3-providers",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
 [[package]]
 name = "g3-providers"
 version = "0.1.0"
@@ -1652,6 +1670,12 @@ version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"

+[[package]]
+name = "hex"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
+
 [[package]]
 name = "home"
 version = "0.5.9"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,6 +2,7 @@
 members = [
    "crates/g3-cli",
    "crates/g3-core", 
+    "crates/g3-planner",
    "crates/g3-providers",
    "crates/g3-config",
    "crates/g3-execution",
--- a/config.coach-player.example.toml
+++ b/config.coach-player.example.toml
@@ -11,14 +11,27 @@ model = "databricks-claude-sonnet-4"
 max_tokens = 4096
 temperature = 0.1
 use_oauth = true
+# cache_config = "ephemeral"  # Optional: Enable prompt caching for Claude models
+                              # Options: "ephemeral", "5minute", "1hour"
+                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+                                # The cache control will be automatically applied to:
+                                # - The system prompt at the start of each session
+                                # - Assistant responses after every 10 tool calls
+                                # - 5minute costs $3/mtok, more details below
+                                # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing

 [providers.anthropic]
 api_key = "your-anthropic-api-key"
-model = "claude-3-haiku-20240307"  # Using a faster model for player
+model = "claude-sonnet-4-5"
 max_tokens = 4096
 temperature = 0.3  # Slightly higher temperature for more creative implementations
+# cache_config = "ephemeral"  # Optional: Enable prompt caching
+                              # Options: "ephemeral", "5minute", "1hour"
+                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+# enable_1m_context = true    # optional, more expensive

 [agent]
 fallback_default_max_tokens = 8192
 enable_streaming = true
-timeout_seconds = 60
+timeout_seconds = 60
+allow_multiple_tool_calls = true  # Enable multiple tool calls, will usually only work with Anthropic
--- a/config.example.toml
+++ b/config.example.toml
@@ -15,6 +15,17 @@ max_tokens = 4096  # Per-request output limit (how many tokens the model can gen
 temperature = 0.1
 use_oauth = true

+[providers.anthropic]
+api_key = "your-anthropic-api-key"
+model = "claude-sonnet-4-5"
+max_tokens = 4096
+temperature = 0.3  # Slightly higher temperature for more creative implementations
+# cache_config = "ephemeral"  # Optional: Enable prompt caching
+# Options: "ephemeral", "5minute", "1hour"
+# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+# enable_1m_context = true    # optional, more expensive
+
+
 # Multiple OpenAI-compatible providers can be configured with custom names
 # Each provider gets its own section under [providers.openai_compatible.<name>]
 # [providers.openai_compatible.openrouter]
@@ -46,6 +57,7 @@ timeout_seconds = 60
 # Retry configuration for recoverable errors (timeouts, rate limits, etc.)
 max_retry_attempts = 3              # Default mode retry attempts
 autonomous_max_retry_attempts = 6   # Autonomous mode retry attempts (higher for long-running tasks)
+allow_multiple_tool_calls = true  # Enable multiple tool calls

 [computer_control]
 enabled = false  # Set to true to enable computer control (requires OS permissions)
--- a/crates/g3-cli/Cargo.toml
+++ b/crates/g3-cli/Cargo.toml
@@ -7,6 +7,8 @@ description = "CLI interface for G3 AI coding agent"
 [dependencies]
 g3-core = { path = "../g3-core" }
 g3-config = { path = "../g3-config" }
+g3-planner = { path = "../g3-planner" }
+g3-providers = { path = "../g3-providers" }
 clap = { workspace = true }
 tokio = { workspace = true }
 anyhow = { workspace = true }
@@ -17,6 +19,8 @@ serde_json = { workspace = true }
 rustyline = "17.0.1"
 dirs = "5.0"
 tokio-util = "0.7"
+sha2 = "0.10"
+hex = "0.4"
 indicatif = "0.17"
 chrono = { version = "0.4", features = ["serde"] }
 crossterm = "0.29.0"
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -98,6 +98,25 @@ fn generate_turn_histogram(turn_metrics: &[TurnMetrics]) -> String {
    histogram
 }

+/// Format a Duration as human-readable elapsed time (e.g., "1h 23m 45s", "5m 30s", "45s")
+fn format_elapsed_time(duration: Duration) -> String {
+    let total_secs = duration.as_secs();
+    let hours = total_secs / 3600;
+    let minutes = (total_secs % 3600) / 60;
+    let seconds = total_secs % 60;
+    
+    if hours > 0 {
+        format!("{}h {}m {}s", hours, minutes, seconds)
+    } else if minutes > 0 {
+        format!("{}m {}s", minutes, seconds)
+    } else if seconds > 0 {
+        format!("{}s", seconds)
+    } else {
+        // For very short durations, show milliseconds
+        format!("{}ms", duration.as_millis())
+    }
+}
+
 /// Extract coach feedback by reading from the coach agent's specific log file
 /// Uses the coach agent's session ID to find the exact log file
 fn extract_coach_feedback_from_logs(
@@ -159,11 +178,12 @@ fn extract_coach_feedback_from_logs(

 use clap::Parser;
 use g3_config::Config;
-use g3_core::{project::Project, ui_writer::UiWriter, Agent};
+use g3_core::{project::Project, ui_writer::UiWriter, Agent, DiscoveryOptions};
 use rustyline::error::ReadlineError;
 use rustyline::DefaultEditor;
 use std::path::Path;
 use std::path::PathBuf;
+use sha2::{Digest, Sha256};
 use tokio_util::sync::CancellationToken;
 use tracing::{error, info};

@@ -246,6 +266,10 @@ pub struct Cli {
    /// Enable WebDriver browser automation tools
    #[arg(long)]
    pub webdriver: bool,
+
+    /// Enable fast codebase discovery before first LLM turn
+    #[arg(long, value_name = "PATH")]
+    pub codebase_fast_start: Option<PathBuf>,
 }

 pub async fn run() -> Result<()> {
@@ -675,6 +699,7 @@ async fn run_accumulative_mode(
                    cli.show_code,
                    cli.max_turns,
                    cli.quiet,
+                    cli.codebase_fast_start.clone(),
                    ) => result,
                    _ = tokio::signal::ctrl_c() => {
                        output.print("\n⚠️  Autonomous run cancelled by user (Ctrl+C)");
@@ -726,6 +751,7 @@ async fn run_autonomous_machine(
    show_code: bool,
    max_turns: usize,
    _quiet: bool,
+    _codebase_fast_start: Option<PathBuf>,
 ) -> Result<()> {
    println!("AUTONOMOUS_MODE_STARTED");
    println!("WORKSPACE: {}", project.workspace().display());
@@ -756,7 +782,7 @@ async fn run_autonomous_machine(
    );

    println!("TASK_START");
-    let result = agent.execute_task_with_timing(&task, None, false, show_prompt, show_code, true).await?;
+    let result = agent.execute_task_with_timing(&task, None, false, show_prompt, show_code, true, None).await?;
    println!("AGENT_RESPONSE:");
    println!("{}", result.response);
    println!("END_AGENT_RESPONSE");
@@ -783,13 +809,14 @@ async fn run_with_console_mode(
            cli.show_code,
            cli.max_turns,
            cli.quiet,
+            cli.codebase_fast_start.clone(),
        )
        .await?;
    } else if let Some(task) = cli.task {
        // Single-shot mode
        let output = SimpleOutput::new();
        let result = agent
-            .execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
+            .execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true, None)
            .await?;
        output.print_smart(&result.response);
    } else {
@@ -814,12 +841,13 @@ async fn run_with_machine_mode(
            cli.show_code,
            cli.max_turns,
            cli.quiet,
+            cli.codebase_fast_start.clone(),
        )
        .await?;
    } else if let Some(task) = cli.task {
        // Single-shot mode
        let result = agent
-            .execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
+            .execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true, None)
            .await?;
        println!("AGENT_RESPONSE:");
        println!("{}", result.response);
@@ -1211,7 +1239,7 @@ async fn execute_task<W: UiWriter>(
        // Execute task with cancellation support
        let execution_result = tokio::select! {
            result = agent.execute_task_with_timing_cancellable(
-                input, None, false, show_prompt, show_code, true, cancellation_token.clone()
+                input, None, false, show_prompt, show_code, true, cancellation_token.clone(), None
            ) => {
                result
            }
@@ -1402,7 +1430,7 @@ async fn execute_task_machine(
        // Execute task with cancellation support
        let execution_result = tokio::select! {
            result = agent.execute_task_with_timing_cancellable(
-                input, None, false, show_prompt, show_code, true, cancellation_token.clone()
+                input, None, false, show_prompt, show_code, true, cancellation_token.clone(), None
            ) => {
                result
            }
@@ -1551,6 +1579,7 @@ async fn run_autonomous(
    show_code: bool,
    max_turns: usize,
    quiet: bool,
+    codebase_fast_start: Option<PathBuf>,
 ) -> Result<()> {
    let start_time = std::time::Instant::now();
    let output = SimpleOutput::new();
@@ -1660,6 +1689,18 @@ async fn run_autonomous(
    } else {
        output.print("📋 Requirements loaded from requirements.md");
    }
+
+    // Calculate SHA256 of requirements
+    let mut hasher = Sha256::new();
+    hasher.update(requirements.as_bytes());
+    let requirements_sha = hex::encode(hasher.finalize());
+    
+    output.print(&format!("🔒 Requirements SHA256: {}", requirements_sha));
+    
+    // Pass SHA to agent for staleness checking
+    agent.set_requirements_sha(requirements_sha.clone());
+
+    let loop_start = Instant::now();
    output.print("🔄 Starting coach-player feedback loop...");

    // Check if implementation files already exist
@@ -1672,6 +1713,39 @@ async fn run_autonomous(
        output.print("🎯 Starting with player implementation");
    }

+    // Load fast-discovery messages before the loop starts (if enabled)
+    let (discovery_messages, discovery_working_dir): (Vec<g3_providers::Message>, Option<String>) = 
+    if let Some(ref codebase_path) = codebase_fast_start {
+        // Canonicalize the path to ensure it's absolute
+        let canonical_path = codebase_path.canonicalize().unwrap_or_else(|_| codebase_path.clone());
+        let path_str = canonical_path.to_string_lossy();
+        output.print(&format!("🔍 Fast-discovery mode: will explore codebase at {}", path_str));
+        // Get the provider from the agent and use async LLM-based discovery
+        match agent.get_provider() {
+            Ok(provider) => {
+                // Create a status callback that prints to output
+                let output_clone = output.clone();
+                let status_callback: g3_planner::StatusCallback = Box::new(move |msg: &str| {
+                    output_clone.print(msg);
+                });
+                match g3_planner::get_initial_discovery_messages(&path_str, Some(&requirements), provider, Some(&status_callback)).await {
+                    Ok(messages) => (messages, Some(path_str.to_string())),
+                    Err(e) => {
+                        output.print(&format!("⚠️ LLM discovery failed: {}, skipping fast-start", e));
+                        (Vec::new(), None)
+                    }
+                }
+            }
+            Err(e) => {
+                output.print(&format!("⚠️ Could not get provider: {}, skipping fast-start", e));
+                (Vec::new(), None)
+            }
+        }
+    } else {
+        (Vec::new(), None)
+    };
+    let has_discovery = !discovery_messages.is_empty();
+
    let mut turn = 1;
    let mut coach_feedback = String::new();
    let mut implementation_approved = false;
@@ -1686,11 +1760,14 @@ async fn run_autonomous(
                turn, max_turns
            ));

+            // Surface provider info for player agent
+            agent.print_provider_banner("Player");
+
            // Player mode: implement requirements (with coach feedback if available)
            let player_prompt = if coach_feedback.is_empty() {
                format!(
-                    "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
-                    requirements
+                    "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nRequirements SHA256: {}\n\nImplement this step by step, creating all necessary files and code.",
+                    requirements, requirements_sha
                )
            } else {
                format!(
@@ -1699,7 +1776,7 @@ async fn run_autonomous(
                )
            };

-            output.print("🎯 Starting player implementation...");
+            output.print(&format!("🎯 Starting player implementation... (elapsed: {})", format_elapsed_time(loop_start.elapsed())));

            // Display what feedback the player is receiving
            // If there's no coach feedback on subsequent turns, this is an error
@@ -1734,6 +1811,12 @@ async fn run_autonomous(
                        show_prompt,
                        show_code,
                        true,
+                        if has_discovery {
+                            Some(DiscoveryOptions {
+                                messages: &discovery_messages,
+                                fast_start_path: discovery_working_dir.as_deref(),
+                            })
+                        } else { None },
                    )
                    .await
                {
@@ -1879,6 +1962,9 @@ async fn run_autonomous(
        let mut coach_agent =
            Agent::new_autonomous_with_readme_and_quiet(coach_config, ui_writer, None, quiet).await?;

+        // Surface provider info for coach agent
+        coach_agent.print_provider_banner("Coach");
+
        // Ensure coach agent is also in the workspace directory
        project.enter_workspace()?;

@@ -1918,7 +2004,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
            requirements
        );

-        output.print("🎓 Starting coach review...");
+        output.print(&format!("🎓 Starting coach review... (elapsed: {})", format_elapsed_time(loop_start.elapsed())));

        // Execute coach task with retry on error
        let mut coach_retry_count = 0;
@@ -1928,7 +2014,13 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the

        loop {
            match coach_agent
-                .execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true)
+                .execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true, 
+                    if has_discovery {
+                        Some(DiscoveryOptions {
+                            messages: &discovery_messages,
+                            fast_start_path: discovery_working_dir.as_deref(),
+                        })
+                    } else { None })
                .await
            {
                Ok(result) => {
@@ -2158,9 +2250,9 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
    output.print(&"=".repeat(60));

    if implementation_approved {
-        output.print("\n🎉 Autonomous mode completed successfully");
+        output.print(&format!("\n🎉 Autonomous mode completed successfully (total loop time: {})", format_elapsed_time(loop_start.elapsed())));
    } else {
-        output.print("\n🔄 Autonomous mode terminated (max iterations)");
+        output.print(&format!("\n🔄 Autonomous mode terminated (max iterations) (total loop time: {})", format_elapsed_time(loop_start.elapsed())));
    }

    Ok(())
--- a/crates/g3-cli/src/machine_ui_writer.rs
+++ b/crates/g3-cli/src/machine_ui_writer.rs
@@ -91,4 +91,18 @@ impl UiWriter for MachineUiWriter {
    fn wants_full_output(&self) -> bool {
        true  // Machine mode wants complete, untruncated output
    }
+
+    fn prompt_user_yes_no(&self, message: &str) -> bool {
+        // In machine mode, we can't interactively prompt, so we log the request and return true
+        // to allow automation to proceed.
+        println!("PROMPT_USER_YES_NO: {}", message);
+        true
+    }
+
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
+        println!("PROMPT_USER_CHOICE: {}", message);
+        println!("OPTIONS: {:?}", options);
+        // Default to first option (index 0) for automation
+        0
+    }
 }
--- a/crates/g3-cli/src/simple_output.rs
+++ b/crates/g3-cli/src/simple_output.rs
@@ -1,4 +1,5 @@
 /// Simple output helper for printing messages
+#[derive(Clone)]
 pub struct SimpleOutput {
    machine_mode: bool,
 }
--- a/crates/g3-cli/src/ui_writer_impl.rs
+++ b/crates/g3-cli/src/ui_writer_impl.rs
@@ -343,5 +343,40 @@ impl UiWriter for ConsoleUiWriter {
    fn flush(&self) {
        let _ = io::stdout().flush();
    }
+
+    fn prompt_user_yes_no(&self, message: &str) -> bool {
+        print!("{} [y/N] ", message);
+        let _ = io::stdout().flush();
+
+        let mut input = String::new();
+        if io::stdin().read_line(&mut input).is_ok() {
+            let trimmed = input.trim().to_lowercase();
+            trimmed == "y" || trimmed == "yes"
+        } else {
+            false
+        }
+    }
+
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
+        println!("{} ", message);
+        for (i, option) in options.iter().enumerate() {
+            println!("  [{}] {}", i + 1, option);
+        }
+        print!("Select an option (1-{}): ", options.len());
+        let _ = io::stdout().flush();
+
+        loop {
+            let mut input = String::new();
+            if io::stdin().read_line(&mut input).is_ok() {
+                if let Ok(choice) = input.trim().parse::<usize>() {
+                    if choice > 0 && choice <= options.len() {
+                        return choice - 1;
+                    }
+                }
+            }
+            print!("Invalid choice. Please select (1-{}): ", options.len());
+            let _ = io::stdout().flush();
+        }
+    }
 }

--- a/crates/g3-computer-control/build.rs
+++ b/crates/g3-computer-control/build.rs
@@ -36,11 +36,20 @@ fn main() {
    // Copy the dylib to the output directory so it can be found at runtime
    let target_dir = manifest_dir.parent().unwrap().parent().unwrap().join("target");
    let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string());
-    let output_dir = target_dir.join(&profile);
+    
+    // Determine the actual target directory (could be llvm-cov-target or regular target)
+    let target_dir_name = env::var("CARGO_TARGET_DIR")
+        .unwrap_or_else(|_| target_dir.to_string_lossy().to_string());
+    let actual_target_dir = PathBuf::from(&target_dir_name);
+    let output_dir = actual_target_dir.join(&profile);
    
    let dylib_src = lib_path.join("libVisionBridge.dylib");
    let dylib_dst = output_dir.join("libVisionBridge.dylib");
    
+    // Create output directory if it doesn't exist
+    std::fs::create_dir_all(&output_dir)
+        .expect(&format!("Failed to create output directory {}", output_dir.display()));
+    
    std::fs::copy(&dylib_src, &dylib_dst)
        .expect(&format!("Failed to copy dylib from {} to {}", dylib_src.display(), dylib_dst.display()));
    
--- a/crates/g3-config/Cargo.toml
+++ b/crates/g3-config/Cargo.toml
@@ -15,3 +15,4 @@ dirs = "5.0"

 [dev-dependencies]
 tempfile = "3.8"
+serde_json = { workspace = true }
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -40,6 +40,8 @@ pub struct AnthropicConfig {
    pub model: String,
    pub max_tokens: Option<u32>,
    pub temperature: Option<f32>,
+    pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
+    pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra)
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -68,10 +70,17 @@ pub struct AgentConfig {
    pub max_context_length: Option<u32>,
    pub fallback_default_max_tokens: usize,
    pub enable_streaming: bool,
+    pub allow_multiple_tool_calls: bool,
    pub timeout_seconds: u64,
    pub auto_compact: bool,
    pub max_retry_attempts: u32,
    pub autonomous_max_retry_attempts: u32,
+    #[serde(default = "default_check_todo_staleness")]
+    pub check_todo_staleness: bool,
+}
+
+fn default_check_todo_staleness() -> bool {
+    true
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -143,10 +152,12 @@ impl Default for Config {
                max_context_length: None,
                fallback_default_max_tokens: 8192,
                enable_streaming: true,
+                allow_multiple_tool_calls: false,
                timeout_seconds: 60,
                auto_compact: true,
                max_retry_attempts: 3,
                autonomous_max_retry_attempts: 6,
+                check_todo_staleness: true,
            },
            computer_control: ComputerControlConfig::default(),
            webdriver: WebDriverConfig::default(),
@@ -263,10 +274,12 @@ impl Config {
                max_context_length: None,
                fallback_default_max_tokens: 8192,
                enable_streaming: true,
+                allow_multiple_tool_calls: false,
                timeout_seconds: 60,
                auto_compact: true,
                max_retry_attempts: 3,
                autonomous_max_retry_attempts: 6,
+                check_todo_staleness: true,
            },
            computer_control: ComputerControlConfig::default(),
            webdriver: WebDriverConfig::default(),
--- a/crates/g3-config/tests/test_multiple_tool_calls.rs
+++ b/crates/g3-config/tests/test_multiple_tool_calls.rs
@@ -0,0 +1,40 @@
+#[cfg(test)]
+mod test_multiple_tool_calls {
+    use g3_config::{Config, AgentConfig};
+    
+    #[test]
+    fn test_config_has_multiple_tool_calls_field() {
+        let config = Config::default();
+        
+        // Test that the field exists and defaults to false
+        assert_eq!(config.agent.allow_multiple_tool_calls, false);
+        
+        // Test that we can create a config with the field set to true
+        let mut custom_config = Config::default();
+        custom_config.agent.allow_multiple_tool_calls = true;
+        assert_eq!(custom_config.agent.allow_multiple_tool_calls, true);
+    }
+    
+    #[test]
+    fn test_agent_config_serialization() {
+        let agent_config = AgentConfig {
+            max_context_length: Some(100000),
+            fallback_default_max_tokens: 8192,
+            enable_streaming: true,
+            allow_multiple_tool_calls: true,
+            timeout_seconds: 60,
+            auto_compact: true,
+            max_retry_attempts: 3,
+            autonomous_max_retry_attempts: 6,
+            check_todo_staleness: true,
+        };
+        
+        // Test serialization
+        let json = serde_json::to_string(&agent_config).unwrap();
+        assert!(json.contains("\"allow_multiple_tool_calls\":true"));
+        
+        // Test deserialization
+        let deserialized: AgentConfig = serde_json::from_str(&json).unwrap();
+        assert_eq!(deserialized.allow_multiple_tool_calls, true);
+    }
+}
--- a/crates/g3-console/Cargo.toml
+++ b/crates/g3-console/Cargo.toml
@@ -6,6 +6,9 @@ authors = ["G3 Team"]
 description = "Web console for monitoring and managing g3 instances"
 license = "MIT"

+[lib]
+path = "src/lib.rs"
+
 [[bin]]
 name = "g3-console"
 path = "src/main.rs"
--- a/crates/g3-console/src/lib.rs
+++ b/crates/g3-console/src/lib.rs
@@ -0,0 +1,5 @@
+pub mod api;
+pub mod logs;
+pub mod models;
+pub mod process;
+pub mod launch;
--- a/crates/g3-console/src/logs.rs
+++ b/crates/g3-console/src/logs.rs
@@ -0,0 +1,256 @@
+use crate::models::{InstanceStats, TurnInfo};
+use anyhow::{Context, Result};
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::fs;
+use std::path::Path;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LogEntry {
+    pub timestamp: Option<DateTime<Utc>>,
+    pub role: Option<String>,
+    pub content: Option<String>,
+    pub tool_calls: Option<Vec<Value>>,
+    pub raw: Value,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ChatMessage {
+    pub role: String,
+    pub content: String,
+    pub timestamp: Option<DateTime<Utc>>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ToolCall {
+    pub name: String,
+    pub parameters: Value,
+    pub result: Option<String>,
+    pub timestamp: Option<DateTime<Utc>>,
+}
+
+pub struct LogParser;
+
+impl LogParser {
+    /// Parse logs from a workspace directory
+    pub fn parse_logs(workspace: &Path) -> Result<Vec<LogEntry>> {
+        let logs_dir = workspace.join("logs");
+        
+        if !logs_dir.exists() {
+            return Ok(Vec::new());
+        }
+
+        let mut entries = Vec::new();
+
+        // Read all JSON log files
+        for entry in fs::read_dir(&logs_dir).context("Failed to read logs directory")? {
+            let entry = entry?;
+            let path = entry.path();
+            
+            if path.extension().and_then(|s| s.to_str()) == Some("json") {
+                if let Ok(content) = fs::read_to_string(&path) {
+                    if let Ok(json) = serde_json::from_str::<Value>(&content) {
+                        // Try to parse as a log session
+                        if let Some(messages) = json.get("messages").and_then(|m| m.as_array()) {
+                            for msg in messages {
+                                entries.push(LogEntry {
+                                    timestamp: msg.get("timestamp")
+                                        .and_then(|t| t.as_str())
+                                        .and_then(|s| DateTime::parse_from_rfc3339(s).ok())
+                                        .map(|dt| dt.with_timezone(&Utc)),
+                                    role: msg.get("role")
+                                        .and_then(|r| r.as_str())
+                                        .map(String::from),
+                                    content: msg.get("content")
+                                        .and_then(|c| c.as_str())
+                                        .map(String::from),
+                                    tool_calls: msg.get("tool_calls")
+                                        .and_then(|tc| tc.as_array())
+                                        .map(|arr| arr.clone()),
+                                    raw: msg.clone(),
+                                });
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // Sort by timestamp
+        entries.sort_by(|a, b| {
+            match (&a.timestamp, &b.timestamp) {
+                (Some(t1), Some(t2)) => t1.cmp(t2),
+                (Some(_), None) => std::cmp::Ordering::Less,
+                (None, Some(_)) => std::cmp::Ordering::Greater,
+                (None, None) => std::cmp::Ordering::Equal,
+            }
+        });
+
+        Ok(entries)
+    }
+
+    /// Extract chat messages from log entries
+    pub fn extract_chat_messages(entries: &[LogEntry]) -> Vec<ChatMessage> {
+        entries
+            .iter()
+            .filter_map(|entry| {
+                let role = entry.role.clone()?;
+                let content = entry.content.clone()?;
+                
+                Some(ChatMessage {
+                    role,
+                    content,
+                    timestamp: entry.timestamp,
+                })
+            })
+            .collect()
+    }
+
+    /// Extract tool calls from log entries
+    pub fn extract_tool_calls(entries: &[LogEntry]) -> Vec<ToolCall> {
+        let mut tool_calls = Vec::new();
+
+        for entry in entries {
+            if let Some(calls) = &entry.tool_calls {
+                for call in calls {
+                    if let Some(name) = call.get("name").and_then(|n| n.as_str()) {
+                        tool_calls.push(ToolCall {
+                            name: name.to_string(),
+                            parameters: call.get("parameters")
+                                .cloned()
+                                .unwrap_or(Value::Object(serde_json::Map::new())),
+                            result: call.get("result")
+                                .and_then(|r| r.as_str())
+                                .map(String::from),
+                            timestamp: entry.timestamp,
+                        });
+                    }
+                }
+            }
+        }
+
+        tool_calls
+    }
+}
+
+pub struct StatsAggregator;
+
+impl StatsAggregator {
+    /// Aggregate statistics from log entries
+    pub fn aggregate_stats(
+        entries: &[LogEntry],
+        start_time: DateTime<Utc>,
+        is_ensemble: bool,
+    ) -> InstanceStats {
+        let total_tokens = Self::count_tokens(entries);
+        let tool_calls = Self::count_tool_calls(entries);
+        let errors = Self::count_errors(entries);
+        
+        let duration_secs = if let Some(last_entry) = entries.last() {
+            if let Some(last_time) = last_entry.timestamp {
+                (last_time - start_time).num_seconds().max(0) as u64
+            } else {
+                (Utc::now() - start_time).num_seconds().max(0) as u64
+            }
+        } else {
+            (Utc::now() - start_time).num_seconds().max(0) as u64
+        };
+
+        let turns = if is_ensemble {
+            Some(Self::extract_turns(entries))
+        } else {
+            None
+        };
+
+        InstanceStats {
+            total_tokens,
+            tool_calls,
+            errors,
+            duration_secs,
+            turns,
+        }
+    }
+
+    /// Get the latest message content from log entries
+    pub fn get_latest_message(entries: &[LogEntry]) -> Option<String> {
+        entries
+            .iter()
+            .rev()
+            .find(|entry| entry.role.as_deref() == Some("assistant"))
+            .and_then(|entry| entry.content.clone())
+            .or_else(|| {
+                entries
+                    .iter()
+                    .rev()
+                    .find(|entry| entry.content.is_some())
+                    .and_then(|entry| entry.content.clone())
+            })
+    }
+
+    fn count_tokens(entries: &[LogEntry]) -> u64 {
+        // Try to extract token counts from metadata
+        entries
+            .iter()
+            .filter_map(|entry| {
+                entry.raw.get("usage")
+                    .and_then(|u| u.get("total_tokens"))
+                    .and_then(|t| t.as_u64())
+            })
+            .sum()
+    }
+
+    fn count_tool_calls(entries: &[LogEntry]) -> u64 {
+        entries
+            .iter()
+            .filter_map(|entry| entry.tool_calls.as_ref())
+            .map(|calls| calls.len() as u64)
+            .sum()
+    }
+
+    fn count_errors(entries: &[LogEntry]) -> u64 {
+        entries
+            .iter()
+            .filter(|entry| {
+                entry.raw.get("error").is_some()
+                    || entry.content.as_ref().map(|c| c.to_lowercase().contains("error")).unwrap_or(false)
+            })
+            .count() as u64
+    }
+
+    fn extract_turns(entries: &[LogEntry]) -> Vec<TurnInfo> {
+        // Simple implementation: group consecutive assistant messages as turns
+        let mut turns = Vec::new();
+        let mut current_turn_start: Option<DateTime<Utc>> = None;
+        let mut turn_count = 0;
+
+        for entry in entries {
+            if entry.role.as_deref() == Some("assistant") {
+                if current_turn_start.is_none() {
+                    current_turn_start = entry.timestamp;
+                    turn_count += 1;
+                }
+            } else if entry.role.as_deref() == Some("user") {
+                if let Some(start) = current_turn_start {
+                    if let Some(end) = entry.timestamp {
+                        let duration = (end - start).num_seconds().max(0) as u64;
+                        turns.push(TurnInfo {
+                            agent: format!("agent-{}", turn_count),
+                            duration_secs: duration,
+                            status: "completed".to_string(),
+                            color: Self::get_turn_color(turn_count),
+                        });
+                    }
+                    current_turn_start = None;
+                }
+            }
+        }
+
+        turns
+    }
+
+    fn get_turn_color(turn_number: usize) -> String {
+        let colors = vec!["blue", "green", "purple", "orange", "pink", "teal"];
+        colors[turn_number % colors.len()].to_string()
+    }
+}
--- a/crates/g3-console/src/main.rs
+++ b/crates/g3-console/src/main.rs
@@ -1,8 +1,6 @@
-mod api;
-mod logs;
-mod models;
-mod process;
-mod launch;
+use g3_console::api;
+use g3_console::process;
+use g3_console::launch;

 use api::control::{kill_instance, launch_instance, restart_instance};
 use api::instances::{get_instance, get_file_content, list_instances};
--- a/crates/g3-console/src/process/detector.rs
+++ b/crates/g3-console/src/process/detector.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use chrono::{DateTime, Utc};
 use std::path::PathBuf;
 use sysinfo::{System, Pid, Process};
-use tracing::{debug, warn};
+use tracing::{debug, info, warn};

 pub struct ProcessDetector {
    system: System,
@@ -17,7 +17,11 @@ impl ProcessDetector {
    }

    pub fn detect_instances(&mut self) -> Result<Vec<Instance>> {
-        self.system.refresh_processes();
+        info!("Scanning for g3 processes...");
+        // Refresh all processes to ensure we catch newly started ones
+        // Using refresh_all() instead of just refresh_processes() to ensure
+        // we get complete information about new processes
+        self.system.refresh_all();
        let mut instances = Vec::new();

        // Find all g3 processes
@@ -33,7 +37,7 @@ impl ProcessDetector {
            }
        }

-        debug!("Detected {} g3 instances", instances.len());
+        info!("Detected {} g3 instances", instances.len());
        Ok(instances)
    }

@@ -45,24 +49,27 @@ impl ProcessDetector {
    ) -> Option<Instance> {
        let cmd_str = cmd.join(" ");
        
+        // Exclude g3-console itself
+        if cmd_str.contains("g3-console") {
+            return None;
+        }
+        
        // Check if this is a g3 binary (more comprehensive check)
        let is_g3_binary = cmd.get(0).map(|s| {
-            s.ends_with("g3") || s.ends_with("/g3") || s.contains("/target/release/g3") || s.contains("/target/debug/g3")
+            (s.ends_with("g3") || s.ends_with("/g3") || s.contains("/target/release/g3") || s.contains("/target/debug/g3"))
+            && !s.contains("g3-") // Exclude other g3-* binaries
        }).unwrap_or(false);
        
-        // Check if this is cargo run with g3
-        let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false) && cmd.iter().any(|s| s == "run");
+        // Check if this is cargo run with g3 (not g3-console or other variants)
+        let is_cargo_run = cmd.get(0).map(|s| s.contains("cargo")).unwrap_or(false) 
+            && cmd.iter().any(|s| s == "run")
+            && !cmd_str.contains("g3-console");
        
-        // Also check if any part of the command line contains g3-related patterns
-        let has_g3_pattern = cmd_str.contains("g3 ") 
-            || cmd_str.contains("/g3 ")
-            || cmd_str.contains("g3-")
-            || cmd_str.ends_with("g3")
-            || cmd_str.contains("--workspace") // g3-specific flag
-            || cmd_str.contains("--autonomous"); // g3-specific flag
+        // Also check if command line has g3-specific flags
+        let has_g3_flags = cmd_str.contains("--workspace") || cmd_str.contains("--autonomous");
        
-        // Accept if it's a g3 binary, cargo run with g3 patterns, or has g3-specific flags
-        let is_g3_process = is_g3_binary || (is_cargo_run && has_g3_pattern) || has_g3_pattern;
+        // Accept if it's a g3 binary or cargo run with g3, and has typical g3 patterns
+        let is_g3_process = is_g3_binary || (is_cargo_run && has_g3_flags);
        
        if !is_g3_process {
            return None;
@@ -165,7 +172,7 @@ impl ProcessDetector {
    }

    pub fn get_process_status(&mut self, pid: u32) -> Option<InstanceStatus> {
-        self.system.refresh_processes();
+        self.system.refresh_all();
        
        let sysinfo_pid = Pid::from_u32(pid);
        if self.system.process(sysinfo_pid).is_some() {
--- a/crates/g3-console/web/index.html
+++ b/crates/g3-console/web/index.html
@@ -15,7 +15,7 @@
    <div id="app">
        <header class="header">
            <div class="header-content">
-                <h1 class="header-title">G3 Console</h1>
+                <h1 class="header-title">G3 Console <span id="live-indicator" class="live-indicator" title="Scanning for processes every 3 seconds">● LIVE</span></h1>
                <div class="header-actions">
                    <button id="new-run-btn" class="btn btn-primary">+ New Run</button>
                    <button id="theme-toggle" class="btn btn-secondary">🌙</button>
--- a/crates/g3-console/web/js/router.js
+++ b/crates/g3-console/web/js/router.js
@@ -6,6 +6,7 @@ const router = {
    currentInstanceId: null,
    initialized: false,
    renderInProgress: false,
+    REFRESH_INTERVAL_MS: 3000, // Refresh every 3 seconds for live updates
    
    init() {
        console.log('[Router] init() called');
@@ -84,6 +85,9 @@ const router = {
        this.renderInProgress = true;
        
        try {
+            // Flash live indicator
+            this.flashLiveIndicator();
+            
            // Check if we already have a container for instances
            let instancesList = container.querySelector('.instances-list');
            const isInitialLoad = !instancesList;
@@ -167,11 +171,11 @@ const router = {
            
            // Schedule next refresh only if still on home route
            if (this.currentRoute === '/' || this.currentRoute === '') {
-                console.log('[Router] Scheduling auto-refresh in 5 seconds');
+                console.log(`[Router] Scheduling auto-refresh in ${this.REFRESH_INTERVAL_MS}ms`);
                this.refreshTimeout = setTimeout(() => {
                    console.log('[Router] Auto-refresh triggered');
                    this.renderHome(container);
-                }, 5000);
+                }, this.REFRESH_INTERVAL_MS);
            }
        } catch (error) {
            console.error('[Router] Error in renderHome:', error);
@@ -187,12 +191,26 @@ const router = {
        }
    },
    
+    flashLiveIndicator() {
+        const indicator = document.getElementById('live-indicator');
+        if (indicator) {
+            indicator.style.animation = 'none';
+            // Force reflow
+            void indicator.offsetWidth;
+            indicator.style.animation = null;
+            indicator.style.opacity = '1';
+        }
+    },
+    
    async renderDetail(container, id) {
        console.log('[Router] renderDetail called for', id);
        
        this.currentInstanceId = id;
        
        try {
+            // Flash live indicator
+            this.flashLiveIndicator();
+            
            // Check if we already have a detail view for this instance
            let detailView = container.querySelector('.detail-view');
            const isInitialLoad = !detailView || detailView.getAttribute('data-instance-id') !== id;
--- a/crates/g3-console/web/styles/app.css
+++ b/crates/g3-console/web/styles/app.css
@@ -64,6 +64,22 @@ body {
    color: var(--text-primary);
 }

+.live-indicator {
+    font-size: 0.625rem; /* 75% of 0.833rem */
+    font-weight: 600;
+    color: var(--success);
+    margin-left: 0.75rem;
+    display: inline-flex;
+    align-items: center;
+    gap: 0.25rem;
+    animation: pulse 2s ease-in-out infinite;
+}
+
+@keyframes pulse {
+    0%, 100% { opacity: 1; }
+    50% { opacity: 0.5; }
+}
+
 .header-actions {
    display: flex;
    gap: 1rem;
--- a/crates/g3-core/examples/inspect_ast.rs
+++ b/crates/g3-core/examples/inspect_ast.rs
@@ -48,7 +48,7 @@ pub async fn another_async(x: i32) -> Result<(), ()> {
    println!("{}\n", "=".repeat(80));

    let mut parser = Parser::new();
-    let language: Language = tree_sitter_rust::language().into();
+    let language: Language = tree_sitter_rust::LANGUAGE.into();
    parser.set_language(&language)?;

    let tree = parser.parse(source_code, None).unwrap();
--- a/crates/g3-core/examples/inspect_python_ast.rs
+++ b/crates/g3-core/examples/inspect_python_ast.rs
@@ -46,7 +46,7 @@ class MyClass:
    println!("{}\n", "=".repeat(80));

    let mut parser = Parser::new();
-    let language: Language = tree_sitter_python::language().into();
+    let language: Language = tree_sitter_python::LANGUAGE.into();
    parser.set_language(&language)?;

    let tree = parser.parse(source_code, None).unwrap();
--- a/crates/g3-core/examples/test_python_query.rs
+++ b/crates/g3-core/examples/test_python_query.rs
@@ -1,6 +1,7 @@
 //! Test Python async query

 use tree_sitter::{Parser, Query, QueryCursor, Language};
+use streaming_iterator::StreamingIterator;

 fn main() -> anyhow::Result<()> {
    let source_code = r#"
@@ -12,7 +13,7 @@ async def async_function():
 "#;

    let mut parser = Parser::new();
-    let language: Language = tree_sitter_python::language().into();
+    let language: Language = tree_sitter_python::LANGUAGE.into();
    parser.set_language(&language)?;

    let tree = parser.parse(source_code, None).unwrap();
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
--- a/crates/g3-core/src/prompts.rs
+++ b/crates/g3-core/src/prompts.rs
@@ -71,9 +71,13 @@ Every multi-step task follows this pattern:
 1. **Start**: Call todo_read, then todo_write to create your plan
 2. **During**: Execute steps, then todo_read and todo_write to mark progress
 3. **End**: Call todo_read to verify all items complete
-
+    
 Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory.

+IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
+`{{Based on the requirements file with SHA256: <SHA>}}`
+This ensures the TODO list is tracked against the specific version of requirements it was generated from.
+
 ## Examples

 **Example 1: Feature Implementation**
@@ -185,7 +189,25 @@ Do not explain what you're going to do - just do it by calling the tools.
 ";

 pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
-concatcp!(CODING_STYLE, SYSTEM_NATIVE_TOOL_CALLS);
+concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
+
+/// Generate system prompt based on whether multiple tool calls are allowed
+pub fn get_system_prompt_for_native(allow_multiple: bool) -> String {
+    if allow_multiple {
+        // Replace the "ONE tool" instruction with multiple tools instruction
+        let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
+        base.replace(
+            "2. Call the appropriate tool with the required parameters",
+            "2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate. 
+              <use_parallel_tool_calls>
+  For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
+  </use_parallel_tool_calls>
+"
+        )
+    } else {
+        SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
+    }
+}

 const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
 "You are G3, a general-purpose AI agent. Your goal is to analyze and solve problems by writing code.
@@ -285,6 +307,10 @@ Every multi-step task follows this pattern:

 Note: todo_write replaces the entire list, so always read first to preserve content.

+IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
+`{{Based on the requirements file with SHA256: <SHA>}}`
+This ensures the TODO list is tracked against the specific version of requirements it was generated from.
+
 ## Examples

 **Example 1: Feature Implementation**
@@ -345,4 +371,4 @@ If you can complete it with 1-2 tool calls, skip TODO.
 ";

 pub const SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE: &'static str =
-    concatcp!(CODING_STYLE, SYSTEM_NON_NATIVE_TOOL_USE);
+    concatcp!(SYSTEM_NON_NATIVE_TOOL_USE, CODING_STYLE);
--- a/crates/g3-core/src/task_result_comprehensive_tests.rs
+++ b/crates/g3-core/src/task_result_comprehensive_tests.rs
@@ -6,14 +6,10 @@ use std::sync::Arc;
 fn test_task_result_basic_functionality() {
    // Create a context window with some messages
    let mut context = ContextWindow::new(10000);
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Test message 1".to_string(),
-    });
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: "Response 1".to_string(),
-    });
+    context.add_message(Message::new(MessageRole::User, "Test message 1".to_string())
+    );
+    context.add_message(Message::new(MessageRole::Assistant, "Response 1".to_string())
+    );
    
    // Create a TaskResult
    let response = "This is the response\n\nFinal output block".to_string();
@@ -100,10 +96,7 @@ fn test_context_window_preservation() {
    
    // Add some messages
    for i in 0..5 {
-        context.add_message(Message {
-            role: if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant },
-            content: format!("Message {}", i),
-        });
+        context.add_message(Message::new(if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant }, format!("Message {}", i)));
    }
    
    // Create TaskResult
--- a/crates/g3-core/src/ui_writer.rs
+++ b/crates/g3-core/src/ui_writer.rs
@@ -56,6 +56,13 @@ pub trait UiWriter: Send + Sync {
    /// Returns true if this UI writer wants full, untruncated output
    /// Default is false (truncate for human readability)
    fn wants_full_output(&self) -> bool { false }
+
+    /// Prompt the user for a yes/no confirmation
+    fn prompt_user_yes_no(&self, message: &str) -> bool;
+
+    /// Prompt the user to choose from a list of options
+    /// Returns the index of the selected option
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize;
 }

 /// A no-op implementation for when UI output is not needed
@@ -80,4 +87,6 @@ impl UiWriter for NullUiWriter {
    fn notify_sse_received(&self) {}
    fn flush(&self) {}
    fn wants_full_output(&self) -> bool { false }
+    fn prompt_user_yes_no(&self, _message: &str) -> bool { true }
+    fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize { 0 }
 }
--- a/crates/g3-core/tests/code_search_test.rs
+++ b/crates/g3-core/tests/code_search_test.rs
@@ -551,6 +551,7 @@ async fn test_cpp_search() {
 }

 #[tokio::test]
+#[ignore]
 async fn test_kotlin_search() {
    let request = CodeSearchRequest {
        searches: vec![SearchSpec {
--- a/crates/g3-core/tests/test_context_thinning.rs
+++ b/crates/g3-core/tests/test_context_thinning.rs
@@ -46,10 +46,10 @@ fn test_thin_context_basic() {
    // Add some messages to the first third
    for i in 0..9 {
        if i % 2 == 0 {
-            context.add_message(Message {
-                role: MessageRole::Assistant,
-                content: format!("Assistant message {}", i),
-            });
+            context.add_message(Message::new(
+                MessageRole::Assistant,
+                format!("Assistant message {}", i),
+            ));
        } else {
            // Add tool results with varying sizes
            let content = if i == 1 {
@@ -63,10 +63,10 @@ fn test_thin_context_basic() {
                format!("Tool result: small result {}", i)
            };
            
-            context.add_message(Message {
-                role: MessageRole::User,
+            context.add_message(Message::new(
+                MessageRole::User,
                content,
-            });
+            ));
        }
    }
    
@@ -98,10 +98,10 @@ fn test_thin_write_file_tool_calls() {
    let mut context = ContextWindow::new(10000);
    
    // Add some messages including a write_file tool call with large content
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Please create a large file".to_string(),
-    });
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Please create a large file".to_string(),
+    ));
    
    // Add an assistant message with a write_file tool call containing large content
    let large_content = "x".repeat(1500);
@@ -109,22 +109,22 @@ fn test_thin_write_file_tool_calls() {
        r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
        large_content
    );
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: format!("I'll create that file.\n\n{}", tool_call_json),
-    });
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        format!("I'll create that file.\n\n{}", tool_call_json),
+    ));
    
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
-    });
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
+    ));
    
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ));
    }
    
    // Trigger thinning at 50%
@@ -154,10 +154,10 @@ fn test_thin_str_replace_tool_calls() {
    let mut context = ContextWindow::new(10000);
    
    // Add some messages including a str_replace tool call with large diff
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Please update the file".to_string(),
-    });
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Please update the file".to_string(),
+    ));
    
    // Add an assistant message with a str_replace tool call containing large diff
    let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
@@ -165,22 +165,22 @@ fn test_thin_str_replace_tool_calls() {
        r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
        large_diff.replace('\n', "\\n")
    );
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: format!("I'll update that file.\n\n{}", tool_call_json),
-    });
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        format!("I'll update that file.\n\n{}", tool_call_json),
+    ));
    
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: "Tool result: ✅ applied unified diff".to_string(),
-    });
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Tool result: ✅ applied unified diff".to_string(),
+    ));
    
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::Assistant,
+            format!("Response {}", i),
+        ));
    }
    
    // Trigger thinning at 50%
@@ -212,10 +212,10 @@ fn test_thin_context_no_large_results() {
    
    // Add only small messages
    for i in 0..9 {
-        context.add_message(Message {
-            role: MessageRole::User,
-            content: format!("Tool result: small {}", i),
-        });
+        context.add_message(Message::new(
+            MessageRole::User,
+            format!("Tool result: small {}", i),
+        ));
    }
    
    context.used_tokens = 5000;
@@ -244,7 +244,7 @@ fn test_thin_context_only_affects_first_third() {
            MessageRole::Assistant
        };
        
-        context.add_message(Message { role, content });
+        context.add_message(Message::new(role, content));
    }
    
    context.used_tokens = 5000;
--- a/crates/g3-core/tests/test_todo_context_thinning.rs
+++ b/crates/g3-core/tests/test_todo_context_thinning.rs
@@ -8,27 +8,18 @@ fn test_todo_read_results_not_thinned() {
    let mut context = ContextWindow::new(10000);
    
    // Add a todo_read tool call
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
-    });
+    context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
    
    // Add a large TODO result (> 500 chars)
    let large_todo_result = format!(
        "Tool result: 📝 TODO list:\n{}",
        "- [ ] Task with long description\n".repeat(50)
    );
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_todo_result.clone(),
-    });
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
    
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
    }
    
    // Trigger thinning at 50%
@@ -65,27 +56,18 @@ fn test_todo_write_results_not_thinned() {
    
    // Add a todo_write tool call
    let large_content = "- [ ] Task\n".repeat(100);
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content),
-    });
+    context.add_message(Message::new(MessageRole::Assistant, format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content)));
    
    // Add a large TODO write result
    let large_todo_result = format!(
        "Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
        large_content.len()
    );
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_todo_result.clone(),
-    });
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
    
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
    }
    
    // Trigger thinning at 50%
@@ -119,24 +101,15 @@ fn test_non_todo_results_still_thinned() {
    let mut context = ContextWindow::new(10000);
    
    // Add a non-TODO tool call (e.g., read_file)
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
-    });
+    context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string()));
    
    // Add a large read_file result (> 500 chars)
    let large_result = format!("Tool result: {}", "x".repeat(1500));
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_result,
-    });
+    context.add_message(Message::new(MessageRole::User, large_result));
    
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
    }
    
    // Trigger thinning at 50%
@@ -172,27 +145,18 @@ fn test_todo_read_with_spaces_in_tool_name() {
    let mut context = ContextWindow::new(10000);
    
    // Add a todo_read tool call with spaces (JSON formatting variation)
-    context.add_message(Message {
-        role: MessageRole::Assistant,
-        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
-    });
+    context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
    
    // Add a large TODO result
    let large_todo_result = format!(
        "Tool result: 📝 TODO list:\n{}",
        "- [ ] Task\n".repeat(50)
    );
-    context.add_message(Message {
-        role: MessageRole::User,
-        content: large_todo_result.clone(),
-    });
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
    
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
-            role: MessageRole::Assistant,
-            content: format!("Response {}", i),
-        });
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
    }
    
    // Trigger thinning
--- a/crates/g3-core/tests/test_todo_persistence.rs
+++ b/crates/g3-core/tests/test_todo_persistence.rs
@@ -27,7 +27,7 @@ fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
 #[serial]
 async fn test_todo_write_creates_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    
    // Initially, todo.g3.md should not exist
@@ -67,7 +67,7 @@ async fn test_todo_read_from_file() {
    fs::write(&todo_path, test_content).unwrap();
    
    // Create agent (should load from file)
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    
    // Create a tool call to read TODO
    let tool_call = g3_core::ToolCall {
@@ -88,7 +88,7 @@ async fn test_todo_read_from_file() {
 #[serial]
 async fn test_todo_read_empty_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    
    // Create a tool call to read TODO (file doesn't exist)
    let tool_call = g3_core::ToolCall {
@@ -111,7 +111,7 @@ async fn test_todo_persistence_across_agents() {
    
    // Agent 1: Write TODO
    {
-        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let mut agent = create_test_agent_in_dir(&temp_dir).await;
        let tool_call = g3_core::ToolCall {
            tool: "todo_write".to_string(),
            args: serde_json::json!({
@@ -126,7 +126,7 @@ async fn test_todo_persistence_across_agents() {
    
    // Agent 2: Read TODO (new agent instance)
    {
-        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let mut agent = create_test_agent_in_dir(&temp_dir).await;
        let tool_call = g3_core::ToolCall {
            tool: "todo_read".to_string(),
            args: serde_json::json!({}),
@@ -143,7 +143,7 @@ async fn test_todo_persistence_across_agents() {
 #[serial]
 async fn test_todo_update_preserves_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    
    // Write initial TODO
@@ -173,7 +173,7 @@ async fn test_todo_update_preserves_file() {
 #[serial]
 async fn test_todo_handles_large_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    
    // Create a large TODO (but under the 50k limit)
@@ -202,7 +202,7 @@ async fn test_todo_handles_large_content() {
 #[serial]
 async fn test_todo_respects_size_limit() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    
    // Create content that exceeds the default 50k limit
    let huge_content = "x".repeat(60_000);
@@ -232,7 +232,7 @@ async fn test_todo_agent_initialization_loads_file() {
    fs::write(&todo_path, initial_content).unwrap();
    
    // Create agent - should load the file during initialization
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    
    // Read TODO - should return the pre-existing content
    let tool_call = g3_core::ToolCall {
@@ -248,7 +248,7 @@ async fn test_todo_agent_initialization_loads_file() {
 #[serial]
 async fn test_todo_handles_unicode_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    
    // Create TODO with unicode characters
@@ -283,7 +283,7 @@ async fn test_todo_handles_unicode_content() {
 #[serial]
 async fn test_todo_empty_content_creates_empty_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    
    // Write empty TODO
@@ -306,7 +306,7 @@ async fn test_todo_empty_content_creates_empty_file() {
 #[serial]
 async fn test_todo_whitespace_only_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    
    // Write whitespace-only TODO
    let tool_call = g3_core::ToolCall {
--- a/crates/g3-core/tests/todo_staleness_test.rs
+++ b/crates/g3-core/tests/todo_staleness_test.rs
@@ -0,0 +1,193 @@
+use g3_core::{Agent, ToolCall};
+use g3_core::ui_writer::UiWriter;
+use g3_config::Config;
+use std::sync::{Arc, Mutex};
+use tempfile::TempDir;
+use serial_test::serial;
+
+// Mock UI Writer for testing
+#[derive(Clone)]
+struct MockUiWriter {
+    output: Arc<Mutex<Vec<String>>>,
+    prompt_responses: Arc<Mutex<Vec<bool>>>,
+    choice_responses: Arc<Mutex<Vec<usize>>>,
+}
+
+impl MockUiWriter {
+    fn new() -> Self {
+        Self {
+            output: Arc::new(Mutex::new(Vec::new())),
+            prompt_responses: Arc::new(Mutex::new(Vec::new())),
+            choice_responses: Arc::new(Mutex::new(Vec::new())),
+        }
+    }
+
+    fn set_prompt_response(&self, response: bool) {
+        self.prompt_responses.lock().unwrap().push(response);
+    }
+
+    fn set_choice_response(&self, response: usize) {
+        self.choice_responses.lock().unwrap().push(response);
+    }
+
+    fn get_output(&self) -> Vec<String> {
+        self.output.lock().unwrap().clone()
+    }
+}
+
+impl UiWriter for MockUiWriter {
+    fn print(&self, message: &str) {
+        self.output.lock().unwrap().push(message.to_string());
+    }
+    fn println(&self, message: &str) {
+        self.output.lock().unwrap().push(message.to_string());
+    }
+    fn print_inline(&self, message: &str) {
+        self.output.lock().unwrap().push(message.to_string());
+    }
+    fn print_system_prompt(&self, _prompt: &str) {}
+    fn print_context_status(&self, message: &str) {
+        self.output.lock().unwrap().push(format!("STATUS: {}", message));
+    }
+    fn print_context_thinning(&self, _message: &str) {}
+    fn print_tool_header(&self, _tool_name: &str) {}
+    fn print_tool_arg(&self, _key: &str, _value: &str) {}
+    fn print_tool_output_header(&self) {}
+    fn update_tool_output_line(&self, _line: &str) {}
+    fn print_tool_output_line(&self, _line: &str) {}
+    fn print_tool_output_summary(&self, _hidden_count: usize) {}
+    fn print_tool_timing(&self, _duration_str: &str) {}
+    fn print_agent_prompt(&self) {}
+    fn print_agent_response(&self, _content: &str) {}
+    fn notify_sse_received(&self) {}
+    fn flush(&self) {}
+    fn wants_full_output(&self) -> bool { false }
+    fn prompt_user_yes_no(&self, message: &str) -> bool {
+        self.output.lock().unwrap().push(format!("PROMPT: {}", message));
+        self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
+    }
+    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
+        self.output.lock().unwrap().push(format!("CHOICE: {} Options: {:?}", message, options));
+        self.choice_responses.lock().unwrap().pop().unwrap_or(0)
+    }
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_matching_sha() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha = "abc123hash";
+    let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha);
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = true;
+
+    let ui_writer = MockUiWriter::new();
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("📝 TODO list:"));
+    assert!(!result.contains("⚠️ TODO list is stale"));
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_mismatch_sha_ignore() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha_file = "old_sha";
+    let sha_req = "new_sha";
+    let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = true;
+
+    let ui_writer = MockUiWriter::new();
+    ui_writer.set_choice_response(0); // Ignore
+
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha_req.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("📝 TODO list:"));
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_mismatch_sha_mark_stale() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha_file = "old_sha";
+    let sha_req = "new_sha";
+    let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = true;
+
+    let ui_writer = MockUiWriter::new();
+    ui_writer.set_choice_response(1); // Mark as Stale
+
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha_req.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("⚠️ TODO list is stale"));
+    assert!(result.contains("Please regenerate"));
+}
+
+// Note: We cannot easily test "Quit" (index 2) because it calls std::process::exit(0)
+// which would kill the test runner. We skip that test case here.
+
+#[tokio::test]
+#[serial]
+async fn test_todo_staleness_check_disabled() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = temp_dir.path().join("todo.g3.md");
+    std::env::set_current_dir(&temp_dir).unwrap();
+
+    let sha_file = "old_sha";
+    let sha_req = "new_sha";
+    let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
+    std::fs::write(&todo_path, content).unwrap();
+
+    let mut config = Config::default();
+    config.agent.check_todo_staleness = false;
+
+    let ui_writer = MockUiWriter::new();
+    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
+    agent.set_requirements_sha(sha_req.to_string());
+
+    let tool_call = ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+
+    assert!(result.contains("📝 TODO list:"));
+}
--- a/crates/g3-execution/examples/setup_coverage_tools.rs
+++ b/crates/g3-execution/examples/setup_coverage_tools.rs
@@ -0,0 +1,13 @@
+use g3_execution::ensure_coverage_tools_installed;
+
+fn main() -> anyhow::Result<()> {
+    // Ensure coverage tools are installed
+    let already_installed = ensure_coverage_tools_installed()?;
+    
+    if already_installed {
+        println!("All coverage tools are already installed!");
+    } else {
+        println!("Coverage tools have been installed successfully!");
+    }
+    Ok(())
+}
--- a/crates/g3-execution/src/lib.rs
+++ b/crates/g3-execution/src/lib.rs
@@ -5,6 +5,17 @@ use tempfile::NamedTempFile;
 use std::io::Write;
 use tracing::{info, debug, error};

+/// Expand tilde (~) in a path to the user's home directory
+fn expand_tilde(path: &str) -> String {
+    if path.starts_with("~") {
+        if let Some(home) = std::env::var_os("HOME") {
+            let home_str = home.to_string_lossy();
+            return path.replacen("~", &home_str, 1);
+        }
+    }
+    path.to_string()
+}
+
 pub struct CodeExecutor {
    // Future: add configuration for execution limits, sandboxing, etc.
 }
@@ -241,11 +252,33 @@ impl CodeExecutor {
        &self, 
        code: &str, 
        receiver: &R
+    ) -> Result<ExecutionResult> {
+        self.execute_bash_streaming_in_dir(code, receiver, None).await
+    }
+
+    /// Execute bash command with streaming output in a specific directory
+    pub async fn execute_bash_streaming_in_dir<R: OutputReceiver>(
+        &self, 
+        code: &str, 
+        receiver: &R,
+        working_dir: Option<&str>,
    ) -> Result<ExecutionResult> {
        use std::process::Stdio;
        use tokio::io::{AsyncBufReadExt, BufReader};
        use tokio::process::Command as TokioCommand;
        
+        // CRITICAL DEBUG: Print to stderr so it's always visible
+        debug!("========== execute_bash_streaming_in_dir START ==========");
+        debug!("Code to execute: {}", code);
+        debug!("Working directory parameter: {:?}", working_dir);
+        debug!("FULL DIAGNOSTIC: code='{}', working_dir={:?}", code, working_dir);
+        
+        if let Some(dir) = working_dir {
+            debug!("Working dir exists check: {}", std::path::Path::new(dir).exists());
+            debug!("Working dir is_dir check: {}", std::path::Path::new(dir).is_dir());
+        }
+        debug!("Current process working directory: {:?}", std::env::current_dir());
+        
        // Check if this is a detached/daemon command that should run independently
        // Look for patterns like: setsid, nohup with &, or explicit backgrounding with disown
        let is_detached = code.trim_start().starts_with("setsid ") 
@@ -255,10 +288,17 @@ impl CodeExecutor {
        
        if is_detached {
            // For detached commands, just spawn and return immediately
-            TokioCommand::new("bash")
-                .arg("-c")
-                .arg(code)
-                .spawn()?;
+            let mut cmd = TokioCommand::new("bash");
+            cmd.arg("-c")
+                .arg(code);
+            
+            // Set working directory if provided
+            if let Some(dir) = working_dir {
+                let expanded_dir = expand_tilde(dir);
+                cmd.current_dir(&expanded_dir);
+            }
+            
+            cmd.spawn()?;
            
            // Don't wait for the process - it's meant to run independently
            return Ok(ExecutionResult {
@@ -269,12 +309,33 @@ impl CodeExecutor {
            });
        }
        
-        let mut child = TokioCommand::new("bash")
-            .arg("-c")
+        let mut cmd = TokioCommand::new("bash");
+        cmd.arg("-c")
            .arg(code)
            .stdout(Stdio::piped())
-            .stderr(Stdio::piped())
-            .spawn()?;
+            .stderr(Stdio::piped());
+        
+        // Set working directory if provided
+        if let Some(dir) = working_dir {
+            debug!("Setting current_dir on command to: {}", dir);
+            let expanded_dir = expand_tilde(dir);
+            debug!("Expanded working dir: {}", expanded_dir);
+            debug!("Expanded dir exists: {}", std::path::Path::new(&expanded_dir).exists());
+            debug!("Expanded dir is_dir: {}", std::path::Path::new(&expanded_dir).is_dir());
+            cmd.current_dir(&expanded_dir);
+        }
+        
+        debug!("About to spawn command...");
+        let spawn_result = cmd.spawn();
+        debug!("Spawn result: {:?}", spawn_result.is_ok());
+        let mut child = match spawn_result {
+            Ok(c) => c,
+            Err(e) => {
+                debug!("SPAWN ERROR: {:?}", e);
+                return Err(e.into());
+            }
+        };
+        debug!("Command spawned successfully");
        
        let stdout = child.stdout.take().unwrap();
        let stderr = child.stderr.take().unwrap();
@@ -322,11 +383,106 @@ impl CodeExecutor {
        
        let status = child.wait().await?;
        
-        Ok(ExecutionResult {
+        let result = ExecutionResult {
            stdout: stdout_output.join("\n"),
            stderr: stderr_output.join("\n"),
            exit_code: status.code().unwrap_or(-1),
            success: status.success(),
-        })
+        };
+        
+        debug!("========== execute_bash_streaming_in_dir END ==========");
+        debug!("Exit code: {}", result.exit_code);
+        debug!("Success: {}", result.success);
+        debug!("Stdout length: {}", result.stdout.len());
+        debug!("Stderr length: {}", result.stderr.len());
+        if !result.stderr.is_empty() {
+            debug!("Stderr content: {}", result.stderr);
+        }
+
+        Ok(result)
    }
 }
+
+/// Check if rustup component llvm-tools-preview is installed
+pub fn is_llvm_tools_installed() -> Result<bool> {
+    let output = Command::new("rustup")
+        .args(&["component", "list", "--installed"])
+        .output()?;
+    
+    let installed = String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .any(|line| line.trim() == "llvm-tools-preview" || line.starts_with("llvm-tools"));
+    
+    Ok(installed)
+}
+
+/// Check if cargo-llvm-cov is installed
+pub fn is_cargo_llvm_cov_installed() -> Result<bool> {
+    let output = Command::new("cargo")
+        .args(&["--list"])
+        .output()?;
+    
+    let installed = String::from_utf8_lossy(&output.stdout)
+        .lines()
+        .any(|line| line.trim().starts_with("llvm-cov"));
+    
+    Ok(installed)
+}
+
+/// Install llvm-tools-preview via rustup
+pub fn install_llvm_tools() -> Result<()> {
+    info!("Installing llvm-tools-preview...");
+    let output = Command::new("rustup")
+        .args(&["component", "add", "llvm-tools-preview"])
+        .output()?;
+    
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr);
+    }
+    
+    info!("✅ llvm-tools-preview installed successfully");
+    Ok(())
+}
+
+/// Install cargo-llvm-cov via cargo install
+pub fn install_cargo_llvm_cov() -> Result<()> {
+    info!("Installing cargo-llvm-cov... (this may take a few minutes)");
+    let output = Command::new("cargo")
+        .args(&["install", "cargo-llvm-cov"])
+        .output()?;
+    
+    if !output.status.success() {
+        let stderr = String::from_utf8_lossy(&output.stderr);
+        anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr);
+    }
+    
+    info!("✅ cargo-llvm-cov installed successfully");
+    Ok(())
+}
+
+/// Ensure both llvm-tools-preview and cargo-llvm-cov are installed
+/// Returns Ok(true) if tools were already installed, Ok(false) if they were installed by this function
+pub fn ensure_coverage_tools_installed() -> Result<bool> {
+    let mut already_installed = true;
+    
+    // Check and install llvm-tools-preview
+    if !is_llvm_tools_installed()? {
+        info!("llvm-tools-preview not found, installing...");
+        install_llvm_tools()?;
+        already_installed = false;
+    } else {
+        info!("✅ llvm-tools-preview is already installed");
+    }
+    
+    // Check and install cargo-llvm-cov
+    if !is_cargo_llvm_cov_installed()? {
+        info!("cargo-llvm-cov not found, installing...");
+        install_cargo_llvm_cov()?;
+        already_installed = false;
+    } else {
+        info!("✅ cargo-llvm-cov is already installed");
+    }
+    
+    Ok(already_installed)
+}
--- a/crates/g3-planner/Cargo.toml
+++ b/crates/g3-planner/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "g3-planner"
+version = "0.1.0"
+edition = "2021"
+description = "Fast-discovery planner for G3 AI coding agent"
+
+[dependencies]
+g3-providers = { path = "../g3-providers" }
+serde = { workspace = true }
+serde_json = { workspace = true }
+const_format = "0.2"
+anyhow = { workspace = true }
+tokio = { workspace = true }
+chrono = { version = "0.4", features = ["serde"] }
--- a/crates/g3-planner/src/code_explore.rs
+++ b/crates/g3-planner/src/code_explore.rs
@@ -0,0 +1,724 @@
+//! Code exploration module for analyzing codebases
+//!
+//! This module provides functions to explore and analyze codebases
+//! for various programming languages, returning structured reports
+//! about the code structure.
+
+use std::path::Path;
+use std::process::Command;
+
+/// Main entry point for exploring a codebase at the given path.
+/// Detects which languages are present and generates a comprehensive report.
+pub fn explore_codebase(path: &str) -> String {
+    let path = expand_tilde(path);
+    let mut report = String::new();
+    let mut languages_found = Vec::new();
+
+    // Check for each language and add to report if found
+    if has_rust_files(&path) {
+        languages_found.push("Rust".to_string());
+        report.push_str(&explore_rust(&path));
+    }
+    if has_java_files(&path) {
+        languages_found.push("Java".to_string());
+        report.push_str(&explore_java(&path));
+    }
+    if has_kotlin_files(&path) {
+        languages_found.push("Kotlin".to_string());
+        report.push_str(&explore_kotlin(&path));
+    }
+    if has_swift_files(&path) {
+        languages_found.push("Swift".to_string());
+        report.push_str(&explore_swift(&path));
+    }
+    if has_go_files(&path) {
+        languages_found.push("Go".to_string());
+        report.push_str(&explore_go(&path));
+    }
+    if has_python_files(&path) {
+        languages_found.push("Python".to_string());
+        report.push_str(&explore_python(&path));
+    }
+    if has_typescript_files(&path) {
+        languages_found.push("TypeScript".to_string());
+        report.push_str(&explore_typescript(&path));
+    }
+    if has_javascript_files(&path) {
+        languages_found.push("JavaScript".to_string());
+        report.push_str(&explore_javascript(&path));
+    }
+    if has_cpp_files(&path) {
+        languages_found.push("C/C++".to_string());
+        report.push_str(&explore_cpp(&path));
+    }
+    if has_markdown_files(&path) {
+        languages_found.push("Markdown".to_string());
+        report.push_str(&explore_markdown(&path));
+    }
+    if has_yaml_files(&path) {
+        languages_found.push("YAML".to_string());
+        report.push_str(&explore_yaml(&path));
+    }
+    if has_sql_files(&path) {
+        languages_found.push("SQL".to_string());
+        report.push_str(&explore_sql(&path));
+    }
+    if has_ruby_files(&path) {
+        languages_found.push("Ruby".to_string());
+        report.push_str(&explore_ruby(&path));
+    }
+
+    if languages_found.is_empty() {
+        report.push_str("No recognized programming languages found in the codebase.\n");
+    } else {
+        let header = format!(
+            "=== CODEBASE ANALYSIS ===\nLanguages detected: {}\n\n",
+            languages_found.join(", ")
+        );
+        report = header + &report;
+    }
+
+    report
+}
+
+/// Expand tilde to home directory
+fn expand_tilde(path: &str) -> String {
+    if path.starts_with("~/") {
+        if let Some(home) = std::env::var_os("HOME") {
+            return path.replacen("~", &home.to_string_lossy(), 1);
+        }
+    }
+    path.to_string()
+}
+
+/// Run a shell command and return its output
+fn run_command(cmd: &str, working_dir: &str) -> String {
+    let output = Command::new("sh")
+        .arg("-c")
+        .arg(cmd)
+        .current_dir(working_dir)
+        .output();
+
+    match output {
+        Ok(out) => {
+            let stdout = String::from_utf8_lossy(&out.stdout);
+            let stderr = String::from_utf8_lossy(&out.stderr);
+            if !stdout.is_empty() {
+                stdout.to_string()
+            } else if !stderr.is_empty() {
+                format!("(stderr): {}", stderr)
+            } else {
+                String::new()
+            }
+        }
+        Err(e) => format!("Error running command: {}", e),
+    }
+}
+
+/// Check if files with given extension exist
+fn has_files_with_extension(path: &str, extension: &str) -> bool {
+    let cmd = format!(
+        "find . -name '.git' -prune -o -type f -name '*.{}' -print | head -1",
+        extension
+    );
+    !run_command(&cmd, path).trim().is_empty()
+}
+
+// Language detection functions
+fn has_rust_files(path: &str) -> bool {
+    has_files_with_extension(path, "rs") || Path::new(path).join("Cargo.toml").exists()
+}
+
+fn has_java_files(path: &str) -> bool {
+    has_files_with_extension(path, "java")
+}
+
+fn has_kotlin_files(path: &str) -> bool {
+    has_files_with_extension(path, "kt") || has_files_with_extension(path, "kts")
+}
+
+fn has_swift_files(path: &str) -> bool {
+    has_files_with_extension(path, "swift")
+}
+
+fn has_go_files(path: &str) -> bool {
+    has_files_with_extension(path, "go")
+}
+
+fn has_python_files(path: &str) -> bool {
+    has_files_with_extension(path, "py")
+}
+
+fn has_typescript_files(path: &str) -> bool {
+    has_files_with_extension(path, "ts") || has_files_with_extension(path, "tsx")
+}
+
+fn has_javascript_files(path: &str) -> bool {
+    has_files_with_extension(path, "js") || has_files_with_extension(path, "jsx")
+}
+
+fn has_cpp_files(path: &str) -> bool {
+    has_files_with_extension(path, "cpp")
+        || has_files_with_extension(path, "cc")
+        || has_files_with_extension(path, "c")
+        || has_files_with_extension(path, "h")
+        || has_files_with_extension(path, "hpp")
+}
+
+fn has_markdown_files(path: &str) -> bool {
+    has_files_with_extension(path, "md")
+}
+
+fn has_yaml_files(path: &str) -> bool {
+    has_files_with_extension(path, "yaml") || has_files_with_extension(path, "yml")
+}
+
+fn has_sql_files(path: &str) -> bool {
+    has_files_with_extension(path, "sql")
+}
+
+fn has_ruby_files(path: &str) -> bool {
+    has_files_with_extension(path, "rb")
+}
+
+/// Explore Rust codebase
+pub fn explore_rust(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== RUST ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.rs' . 2>/dev/null | grep -v '/target/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Dependencies (Cargo.toml)
+    report.push_str("--- Dependencies (Cargo.toml) ---\n");
+    let cargo = run_command("cat Cargo.toml 2>/dev/null | head -50", path);
+    report.push_str(&cargo);
+    report.push('\n');
+
+    // Data structures
+    report.push_str("--- Data Structures (Structs, Enums, Types) ---\n");
+    let structs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union) ' . 2>/dev/null | grep -v '/target/' | head -100"#,
+        path,
+    );
+    report.push_str(&structs);
+    report.push('\n');
+
+    // Traits and implementations
+    report.push_str("--- Traits & Implementations ---\n");
+    let traits = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?trait |^impl ' . 2>/dev/null | grep -v '/target/' | head -100"#,
+        path,
+    );
+    report.push_str(&traits);
+    report.push('\n');
+
+    // Public functions
+    report.push_str("--- Public Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^pub (async )?fn ' . 2>/dev/null | grep -v '/target/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Java codebase
+pub fn explore_java(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== JAVA ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.java' . 2>/dev/null | grep -v '/build/' | grep -v '/target/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Build files
+    report.push_str("--- Build Configuration ---\n");
+    let build = run_command(
+        "cat pom.xml 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50",
+        path,
+    );
+    report.push_str(&build);
+    report.push('\n');
+
+    // Classes and interfaces
+    report.push_str("--- Classes & Interfaces ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^(public |private |protected )?(abstract )?(class|interface|enum|record) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Public methods
+    report.push_str("--- Public Methods ---\n");
+    let methods = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^\s+public .+\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&methods);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Kotlin codebase
+pub fn explore_kotlin(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== KOTLIN ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.kt' -g '*.kts' . 2>/dev/null | grep -v '/build/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Build files
+    report.push_str("--- Build Configuration ---\n");
+    let build = run_command("cat build.gradle.kts 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50", path);
+    report.push_str(&build);
+    report.push('\n');
+
+    // Classes, objects, interfaces
+    report.push_str("--- Classes, Objects & Interfaces ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(data |sealed |open |abstract )?(class|interface|object|enum class) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(suspend |private |internal |public )?fun ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Swift codebase
+pub fn explore_swift(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== SWIFT ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.swift' . 2>/dev/null | grep -v '/.build/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Package.swift
+    report.push_str("--- Package Configuration ---\n");
+    let pkg = run_command("cat Package.swift 2>/dev/null | head -50", path);
+    report.push_str(&pkg);
+    report.push('\n');
+
+    // Classes, structs, protocols
+    report.push_str("--- Types (Classes, Structs, Protocols, Enums) ---\n");
+    let types = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^(public |private |internal |open |final )?(class|struct|protocol|enum|actor) ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
+        path,
+    );
+    report.push_str(&types);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^\s*(public |private |internal |open )?func ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Go codebase
+pub fn explore_go(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== GO ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.go' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // go.mod
+    report.push_str("--- Module Configuration ---\n");
+    let gomod = run_command("cat go.mod 2>/dev/null | head -50", path);
+    report.push_str(&gomod);
+    report.push('\n');
+
+    // Types (structs, interfaces)
+    report.push_str("--- Types (Structs & Interfaces) ---\n");
+    let types = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^type .+ (struct|interface)' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&types);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^func ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Python codebase
+pub fn explore_python(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== PYTHON ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.py' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | grep -v '/.venv/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Requirements/setup
+    report.push_str("--- Dependencies ---\n");
+    let deps = run_command(
+        "cat requirements.txt 2>/dev/null | head -30 || cat pyproject.toml 2>/dev/null | head -50 || cat setup.py 2>/dev/null | head -30",
+        path,
+    );
+    report.push_str(&deps);
+    report.push('\n');
+
+    // Classes
+    report.push_str("--- Classes ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^class ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^def |^async def ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore TypeScript codebase
+pub fn explore_typescript(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== TYPESCRIPT ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.ts' -g '*.tsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // package.json
+    report.push_str("--- Package Configuration ---\n");
+    let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
+    report.push_str(&pkg);
+    report.push('\n');
+
+    // Types, interfaces, classes
+    report.push_str("--- Types, Interfaces & Classes ---\n");
+    let types = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (type|interface|class|enum|abstract class) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&types);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Exported Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (async )?function |^export const .+ = (async )?\(' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore JavaScript codebase
+pub fn explore_javascript(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== JAVASCRIPT ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.js' -g '*.jsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // package.json
+    report.push_str("--- Package Configuration ---\n");
+    let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
+    report.push_str(&pkg);
+    report.push('\n');
+
+    // Classes
+    report.push_str("--- Classes ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(default )?(class ) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions
+    report.push_str("--- Exported Functions ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(async )?function |^module\.exports' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore C/C++ codebase
+pub fn explore_cpp(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== C/C++ ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.c' -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' . 2>/dev/null | grep -v '/build/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Build files
+    report.push_str("--- Build Configuration ---\n");
+    let build = run_command(
+        "cat CMakeLists.txt 2>/dev/null | head -50 || cat Makefile 2>/dev/null | head -50",
+        path,
+    );
+    report.push_str(&build);
+    report.push('\n');
+
+    // Classes and structs
+    report.push_str("--- Classes & Structs ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' '^(class|struct|enum|union|typedef) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Functions (simplified pattern)
+    report.push_str("--- Function Declarations ---\n");
+    let funcs = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.h' -g '*.hpp' '^[a-zA-Z_][a-zA-Z0-9_<>: ]*\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
+        path,
+    );
+    report.push_str(&funcs);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Markdown documentation
+pub fn explore_markdown(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== MARKDOWN DOCUMENTATION ===\n\n");
+
+    // File structure
+    report.push_str("--- Documentation Files ---\n");
+    let files = run_command(
+        "rg --files -g '*.md' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // README content
+    report.push_str("--- README Overview ---\n");
+    let readme = run_command(
+        "cat README.md 2>/dev/null | head -100 || cat readme.md 2>/dev/null | head -100",
+        path,
+    );
+    report.push_str(&readme);
+    report.push('\n');
+
+    // Headers from all markdown files
+    report.push_str("--- Document Headers ---\n");
+    let headers = run_command(
+        r#"rg --no-heading --line-number --with-filename -g '*.md' '^#{1,3} ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&headers);
+    report.push('\n');
+
+    report
+}
+
+/// Explore YAML configuration files
+pub fn explore_yaml(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== YAML CONFIGURATION ===\n\n");
+
+    // File structure
+    report.push_str("--- YAML Files ---\n");
+    let files = run_command(
+        "rg --files -g '*.yaml' -g '*.yml' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Top-level keys from YAML files
+    report.push_str("--- Top-Level Keys ---\n");
+    let keys = run_command(
+        r#"rg --no-heading --line-number --with-filename -g '*.yaml' -g '*.yml' '^[a-zA-Z_][a-zA-Z0-9_-]*:' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
+        path,
+    );
+    report.push_str(&keys);
+    report.push('\n');
+
+    report
+}
+
+/// Explore SQL files
+pub fn explore_sql(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== SQL ===\n\n");
+
+    // File structure
+    report.push_str("--- SQL Files ---\n");
+    let files = run_command(
+        "rg --files -g '*.sql' . 2>/dev/null | sort | head -50",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Tables
+    report.push_str("--- Table Definitions ---\n");
+    let tables = run_command(
+        r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE TABLE' . 2>/dev/null | head -100"#,
+        path,
+    );
+    report.push_str(&tables);
+    report.push('\n');
+
+    // Views and procedures
+    report.push_str("--- Views & Procedures ---\n");
+    let views = run_command(
+        r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE (VIEW|PROCEDURE|FUNCTION)' . 2>/dev/null | head -100"#,
+        path,
+    );
+    report.push_str(&views);
+    report.push('\n');
+
+    report
+}
+
+/// Explore Ruby codebase
+pub fn explore_ruby(path: &str) -> String {
+    let mut report = String::new();
+    report.push_str("\n=== RUBY ===\n\n");
+
+    // File structure
+    report.push_str("--- File Structure ---\n");
+    let files = run_command(
+        "rg --files -g '*.rb' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
+        path,
+    );
+    report.push_str(&files);
+    report.push('\n');
+
+    // Gemfile
+    report.push_str("--- Dependencies (Gemfile) ---\n");
+    let gemfile = run_command("cat Gemfile 2>/dev/null | head -50", path);
+    report.push_str(&gemfile);
+    report.push('\n');
+
+    // Classes and modules
+    report.push_str("--- Classes & Modules ---\n");
+    let classes = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^(class|module) ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&classes);
+    report.push('\n');
+
+    // Methods
+    report.push_str("--- Methods ---\n");
+    let methods = run_command(
+        r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^\s*def ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
+        path,
+    );
+    report.push_str(&methods);
+    report.push('\n');
+
+    report
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_expand_tilde() {
+        let path = expand_tilde("~/test");
+        assert!(!path.starts_with("~"));
+    }
+
+    #[test]
+    fn test_explore_codebase_returns_string() {
+        // Test with current directory
+        let result = explore_codebase(".");
+        assert!(!result.is_empty());
+    }
+}
--- a/crates/g3-planner/src/lib.rs
+++ b/crates/g3-planner/src/lib.rs
@@ -0,0 +1,325 @@
+//! g3-planner: Fast-discovery planner for G3 AI coding agent
+//!
+//! This crate provides functionality to generate initial discovery tool calls
+//! that are injected into the conversation before the first LLM turn.
+
+mod code_explore;
+pub mod prompts;
+
+pub use code_explore::explore_codebase;
+
+use anyhow::Result;
+use g3_providers::{CompletionRequest, LLMProvider, Message, MessageRole};
+use chrono::Local;
+use std::fs::{self, OpenOptions};
+use std::io::Write;
+use prompts::{DISCOVERY_REQUIREMENTS_PROMPT, DISCOVERY_SYSTEM_PROMPT};
+
+/// Type alias for a status callback function
+pub type StatusCallback = Box<dyn Fn(&str) + Send + Sync>;
+
+/// Generates initial discovery messages for fast codebase exploration.
+///
+/// This function:
+/// 1. Runs explore_codebase to get a codebase report
+/// 2. Sends the report to the LLM with DISCOVERY_SYSTEM_PROMPT
+/// 3. Extracts shell commands from the LLM response
+/// 4. Returns Assistant messages with tool calls for each command
+///
+/// # Arguments
+///
+/// * `codebase_path` - The path to the codebase to explore
+/// * `provider` - An LLM provider to query for exploration commands
+/// * `requirements_text` - Optional requirements text to include in the discovery prompt
+/// * `status_callback` - Optional callback for status updates
+///
+/// # Returns
+///
+/// A `Result<Vec<Message>>` containing Assistant messages with JSON tool call strings.
+pub async fn get_initial_discovery_messages(
+    codebase_path: &str,
+    requirements_text: Option<&str>,
+    provider: &dyn LLMProvider,
+    status_callback: Option<&StatusCallback>,
+) -> Result<Vec<Message>> {
+    // Helper to call status callback if provided
+    let status = |msg: &str| {
+        if let Some(cb) = status_callback {
+            cb(msg);
+        }
+    };
+
+    status("🔍 Starting code discovery...");
+
+    // Step 1: Run explore_codebase to get the codebase report
+    let codebase_report = explore_codebase(codebase_path);
+
+    // Write the codebase report to logs directory
+    write_code_report(&codebase_report)?;
+
+    // Step 2: Build the prompt with the codebase report appended
+    let user_prompt = if let Some(requirements) = requirements_text {
+        format!(
+            "{}\n\n
+            === REQUIREMENTS ===\n\n{}\n\n
+            === CODEBASE REPORT ===\n\n{}",
+            DISCOVERY_REQUIREMENTS_PROMPT, requirements, codebase_report
+        )
+    } else {
+        format!(
+            "{}\n\n=== CODEBASE REPORT ===\n\n{}",
+            DISCOVERY_REQUIREMENTS_PROMPT, codebase_report
+        )
+    };
+
+    // Step 3: Create messages for the LLM
+    let messages = vec![
+        Message::new(MessageRole::System, DISCOVERY_SYSTEM_PROMPT.to_string()),
+        Message::new(MessageRole::User, user_prompt),
+    ];
+
+    // Step 4: Send to LLM
+    let request = CompletionRequest {
+        messages,
+        max_tokens: Some(provider.max_tokens()),
+        temperature: Some(provider.temperature()),
+        stream: false,
+        tools: None,
+    };
+
+    status("🤖 Calling LLM for discovery commands...");
+
+    let response = provider.complete(request).await?;
+
+    // Step 5: Extract shell commands from the response
+    let shell_commands = extract_shell_commands(&response.content);
+
+    status(&format!("📋 Extracted {} discovery commands", shell_commands.len()));
+
+    // Write the discovery commands to logs directory
+    write_discovery_commands(&shell_commands)?;
+
+    // Step 6: Format as tool messages
+    let tool_messages = shell_commands
+        .into_iter()
+        .map(|cmd| create_tool_message("shell", &cmd))
+        .collect();
+
+    Ok(tool_messages)
+}
+
+/// Creates an Assistant message with a tool call in g3's JSON format.
+pub fn create_tool_message(tool: &str, command: &str) -> Message {
+    let tool_call = serde_json::json!({
+        "tool": tool,
+        "args": {
+            "command": command
+        }
+    });
+
+    Message::new(MessageRole::Assistant, tool_call.to_string())
+}
+
+/// Extract shell commands from the LLM response.
+/// Looks for {{CODE EXPLORATION COMMANDS}} section and extracts commands from code blocks.
+pub fn extract_shell_commands(response: &str) -> Vec<String> {
+    let mut commands = Vec::new();
+
+    let section_marker = "{{CODE EXPLORATION COMMANDS}}";
+    let section_start = match response.find(section_marker) {
+        Some(pos) => pos + section_marker.len(),
+        None => return commands,
+    };
+
+    let section_content = &response[section_start..];
+    let mut in_code_block = false;
+    let mut current_block = String::new();
+
+    for line in section_content.lines() {
+        let trimmed = line.trim();
+
+        if trimmed.starts_with("```") {
+            if in_code_block {
+                // End of code block - extract commands
+                for cmd_line in current_block.lines() {
+                    let cmd = cmd_line.trim();
+                    if !cmd.is_empty() && !cmd.starts_with('#') {
+                        commands.push(cmd.to_string());
+                    }
+                }
+                current_block.clear();
+            }
+            in_code_block = !in_code_block;
+        } else if in_code_block {
+            current_block.push_str(line);
+            current_block.push('\n');
+        }
+    }
+
+    commands
+}
+
+/// Extract the summary section from the LLM response
+pub fn extract_summary(response: &str) -> Option<String> {
+    let section_marker = "{{SUMMARY BASED ON INITIAL INFO}}";
+    let section_start = match response.find(section_marker) {
+        Some(pos) => pos + section_marker.len(),
+        None => return None,
+    };
+
+    let section_content = &response[section_start..];
+    let section_end = section_content.find("{{").unwrap_or(section_content.len());
+
+    let summary = section_content[..section_end].trim().to_string();
+    if summary.is_empty() {
+        None
+    } else {
+        Some(summary)
+    }
+}
+
+/// Write the codebase report to logs directory
+fn write_code_report(report: &str) -> Result<()> {
+    // Ensure logs directory exists
+    fs::create_dir_all("logs")?;
+
+    // Generate timestamp in same format as tool_calls log
+    let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
+    let filename = format!("logs/code_report_{}.log", timestamp);
+
+    // Write the report to file
+    let mut file = OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true)
+        .open(&filename)?;
+
+    file.write_all(report.as_bytes())?;
+    file.flush()?;
+
+    Ok(())
+}
+
+/// Write the discovery commands to logs directory
+fn write_discovery_commands(commands: &[String]) -> Result<()> {
+    // Ensure logs directory exists
+    fs::create_dir_all("logs")?;
+
+    // Generate timestamp in same format as tool_calls log
+    let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
+    let filename = format!("logs/discovery_commands_{}.log", timestamp);
+
+    // Write the commands to file
+    let mut file = OpenOptions::new()
+        .create(true)
+        .write(true)
+        .truncate(true)
+        .open(&filename)?;
+
+    // Write header
+    file.write_all(b"# Discovery Commands\n")?;
+    file.write_all(b"# Generated by g3-planner\n\n")?;
+
+    // Write each command on a separate line
+    for cmd in commands {
+        file.write_all(cmd.as_bytes())?;
+        file.write_all(b"\n")?;
+    }
+    file.flush()?;
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_create_tool_message_format() {
+        let msg = create_tool_message("shell", "ls -la");
+
+        assert!(matches!(msg.role, MessageRole::Assistant));
+
+        let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
+        assert_eq!(parsed["tool"], "shell");
+        assert_eq!(parsed["args"]["command"], "ls -la");
+    }
+
+    #[test]
+    fn test_extract_shell_commands_basic() {
+        let response = r#"
+Some text here.
+
+{{CODE EXPLORATION COMMANDS}}
+
+```bash
+ls -la
+cat README.md
+rg --files -g '*.rs'
+```
+
+More text.
+"#;
+
+        let commands = extract_shell_commands(response);
+        assert_eq!(commands.len(), 3);
+        assert_eq!(commands[0], "ls -la");
+        assert_eq!(commands[1], "cat README.md");
+        assert_eq!(commands[2], "rg --files -g '*.rs'");
+    }
+
+    #[test]
+    fn test_extract_shell_commands_with_comments() {
+        let response = r#"
+{{CODE EXPLORATION COMMANDS}}
+
+```
+# This is a comment
+ls -la
+# Another comment
+cat file.txt
+```
+"#;
+
+        let commands = extract_shell_commands(response);
+        assert_eq!(commands.len(), 2);
+        assert_eq!(commands[0], "ls -la");
+        assert_eq!(commands[1], "cat file.txt");
+    }
+
+    #[test]
+    fn test_extract_shell_commands_no_section() {
+        let response = "Some response without the expected section.";
+        let commands = extract_shell_commands(response);
+        assert!(commands.is_empty());
+    }
+
+    #[test]
+    fn test_extract_summary() {
+        let response = r#"
+{{SUMMARY BASED ON INITIAL INFO}}
+
+This is a summary of the codebase.
+It has multiple lines.
+
+{{CODE EXPLORATION COMMANDS}}
+
+```
+ls -la
+```
+"#;
+
+        let summary = extract_summary(response);
+        assert!(summary.is_some());
+        let summary_text = summary.unwrap();
+        assert!(summary_text.contains("This is a summary"));
+        assert!(summary_text.contains("multiple lines"));
+    }
+
+    #[test]
+    fn test_extract_summary_no_section() {
+        let response = "Response without summary section.";
+        let summary = extract_summary(response);
+        assert!(summary.is_none());
+    }
+}
--- a/crates/g3-planner/src/prompts.rs
+++ b/crates/g3-planner/src/prompts.rs
@@ -0,0 +1,37 @@
+//! Prompts used for discovery phase
+
+/// System prompt for discovery mode - instructs the LLM to analyze codebase and generate exploration commands
+pub const DISCOVERY_SYSTEM_PROMPT: &str = r#"You are an expert code analyst. Your task is to analyze a codebase structure and generate shell commands to explore it further.
+
+You will receive:
+1. User requirements describing what needs to be implemented
+2. A codebase report showing the structure and key elements of the codebase
+
+Your job is to:
+1. Understand the requirements and identify what parts of the codebase are relevant
+2. Generate shell commands to explore those parts in more detail
+
+IMPORTANT: Do NOT attempt to implement anything. Only generate exploration commands."#;
+
+/// Discovery prompt template - used when we have a codebase report.
+/// The codebase report should be appended after this prompt.
+pub const DISCOVERY_REQUIREMENTS_PROMPT: &str = r#"**CRITICAL**: DO ABSOLUTELY NOT ATTEMPT TO IMPLEMENT THESE REQUIREMENTS AT THIS POINT. ONLY USE THEM TO
+UNDERSTAND WHICH PARTS OF THE CODE YOU MIGHT BE INTERESTED IN, AND WHAT SEARCH/GREP EXPRESSIONS YOU MIGHT WANT TO USE
+TO GET A BETTER UNDERSTANDING OF THE CODEBASE.
+
+Your task is to analyze the codebase overview provided below and generate shell commands to explore it further - in particular, those
+you deem most relevant to the requirements given below.
+
+Your output MUST include:
+1. A summary report.  Use the heading {{SUMMARY BASED ON INITIAL INFO}}.
+   - retain as much information of that as you consider relevant to the requirements, and for making an implementation plan.
+   - Ideally that should not be more than 10000 tokens.
+2. A list of shell commands to explore the code. Use the heading {{CODE EXPLORATION COMMANDS}}.
+   - Try plan ahead for what you need for a deep dive into the code. Make sure the information is sparing.
+   - Carefully consider which commands give you the most relevant information, pick the top 25 commands.
+   - Use tools like `ls`, `rg` (ripgrep), `grep`, `sed`, `cat`, `head`, `tail` etc.
+   - Focus on commands that will help understand the code STRUCTURE without dumping large sections of file.
+   - e.g. for Rust you might try `rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union)`
+   - Mark the beginning and end of the commands with "```".
+
+DO NOT ADD ANY COMMENTS OR OTHER EXPLANATION IN THE COMMANDS SECTION, JUST INCLUDE THE SHELL COMMANDS."#;
--- a/crates/g3-planner/tests/logging_test.rs
+++ b/crates/g3-planner/tests/logging_test.rs
@@ -0,0 +1,60 @@
+//! Integration tests for logging functionality
+
+use std::fs;
+use std::path::Path;
+
+#[test]
+fn test_log_files_created() {
+    // This test verifies that the logging functions work correctly
+    // by checking that files can be created in the logs directory
+    
+    // Clean up any existing test logs
+    let _ = fs::remove_dir_all("logs");
+    
+    // Create logs directory
+    fs::create_dir_all("logs").expect("Failed to create logs directory");
+    
+    // Verify directory exists
+    assert!(Path::new("logs").exists());
+    assert!(Path::new("logs").is_dir());
+    
+    // Test writing a code report
+    let test_report = "Test codebase report\nLine 2\nLine 3";
+    let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
+    let report_filename = format!("logs/code_report_{}.log", timestamp);
+    
+    fs::write(&report_filename, test_report).expect("Failed to write code report");
+    assert!(Path::new(&report_filename).exists());
+    
+    let content = fs::read_to_string(&report_filename).expect("Failed to read code report");
+    assert_eq!(content, test_report);
+    
+    // Test writing discovery commands
+    let commands_filename = format!("logs/discovery_commands_{}.log", timestamp);
+    let test_commands = "# Discovery Commands\n# Generated by g3-planner\n\nls -la\ncat README.md\n";
+    
+    fs::write(&commands_filename, test_commands).expect("Failed to write discovery commands");
+    assert!(Path::new(&commands_filename).exists());
+    
+    let content = fs::read_to_string(&commands_filename).expect("Failed to read discovery commands");
+    assert_eq!(content, test_commands);
+    
+    // Clean up
+    let _ = fs::remove_file(&report_filename);
+    let _ = fs::remove_file(&commands_filename);
+}
+
+#[test]
+fn test_filename_format() {
+    // Verify the filename format matches the tool_calls log format
+    let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
+    
+    // Check format: YYYYMMDD_HHMMSS
+    assert_eq!(timestamp.len(), 15); // 8 digits + underscore + 6 digits
+    assert!(timestamp.contains('_'));
+    
+    let parts: Vec<&str> = timestamp.split('_').collect();
+    assert_eq!(parts.len(), 2);
+    assert_eq!(parts[0].len(), 8); // YYYYMMDD
+    assert_eq!(parts[1].len(), 6); // HHMMSS
+}
--- a/crates/g3-planner/tests/planner_test.rs
+++ b/crates/g3-planner/tests/planner_test.rs
@@ -0,0 +1,103 @@
+//! Integration tests for g3-planner
+
+use g3_planner::{create_tool_message, explore_codebase, extract_shell_commands};
+use g3_providers::MessageRole;
+
+#[test]
+fn test_create_tool_message_format() {
+    let msg = create_tool_message("shell", "ls -la");
+
+    assert!(matches!(msg.role, MessageRole::Assistant));
+
+    let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
+    assert_eq!(parsed["tool"], "shell");
+    assert_eq!(parsed["args"]["command"], "ls -la");
+}
+
+#[test]
+fn test_explore_codebase_returns_report() {
+    // Test with current directory (should find Rust files in g3 project)
+    let report = explore_codebase(".");
+
+    // Should return a non-empty report
+    assert!(!report.is_empty(), "Report should not be empty");
+
+    // Should contain the codebase analysis header
+    assert!(
+        report.contains("CODEBASE ANALYSIS") || report.contains("No recognized"),
+        "Report should have analysis header or indicate no languages found"
+    );
+}
+
+#[test]
+fn test_extract_shell_commands_basic() {
+    let response = r#"
+Some text here.
+
+{{CODE EXPLORATION COMMANDS}}
+
+```bash
+ls -la
+cat README.md
+rg --files -g '*.rs'
+```
+
+More text.
+"#;
+
+    let commands = extract_shell_commands(response);
+    assert_eq!(commands.len(), 3);
+    assert_eq!(commands[0], "ls -la");
+    assert_eq!(commands[1], "cat README.md");
+    assert_eq!(commands[2], "rg --files -g '*.rs'");
+}
+
+#[test]
+fn test_extract_shell_commands_with_comments() {
+    let response = r#"
+{{CODE EXPLORATION COMMANDS}}
+
+```
+# This is a comment
+ls -la
+# Another comment
+cat file.txt
+```
+"#;
+
+    let commands = extract_shell_commands(response);
+    assert_eq!(commands.len(), 2);
+    assert_eq!(commands[0], "ls -la");
+    assert_eq!(commands[1], "cat file.txt");
+}
+
+#[test]
+fn test_extract_shell_commands_no_section() {
+    let response = "Some response without the expected section.";
+    let commands = extract_shell_commands(response);
+    assert!(commands.is_empty());
+}
+
+#[test]
+fn test_extract_shell_commands_multiple_code_blocks() {
+    let response = r#"
+{{CODE EXPLORATION COMMANDS}}
+
+```bash
+ls -la
+```
+
+Some explanation text.
+
+```
+cat README.md
+head -50 src/main.rs
+```
+"#;
+
+    let commands = extract_shell_commands(response);
+    assert_eq!(commands.len(), 3);
+    assert_eq!(commands[0], "ls -la");
+    assert_eq!(commands[1], "cat README.md");
+    assert_eq!(commands[2], "head -50 src/main.rs");
+}
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -21,22 +21,18 @@
 //!     // Create the provider with your API key
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         Some("claude-3-5-sonnet-20241022".to_string()), // Optional: defaults to claude-3-5-sonnet-20241022
-//!         Some(4096),  // Optional: max tokens
-//!         Some(0.1),   // Optional: temperature
+//!         Some("claude-3-5-sonnet-20241022".to_string()),
+//!         Some(4096),
+//!         Some(0.1),
+//!         None, // cache_config
+//!         None, // enable_1m_context
 //!     )?;
 //!
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::System,
-//!                 content: "You are a helpful assistant.".to_string(),
-//!             },
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Hello! How are you?".to_string(),
-//!             },
+//!             Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+//!             Message::new(MessageRole::User, "Hello! How are you?".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -62,15 +58,16 @@
 //! async fn main() -> anyhow::Result<()> {
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         None, None, None,
+//!         None,
+//!         None,
+//!         None,
+//!         None, // cache_config
+//!         None, // enable_1m_context
 //!     )?;
 //!
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Write a short story about a robot.".to_string(),
-//!             },
+//!             Message::new(MessageRole::User, "Write a short story about a robot.".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -123,6 +120,8 @@ pub struct AnthropicProvider {
    model: String,
    max_tokens: u32,
    temperature: f32,
+    cache_config: Option<String>,
+    enable_1m_context: bool,
 }

 impl AnthropicProvider {
@@ -131,6 +130,8 @@ impl AnthropicProvider {
        model: Option<String>,
        max_tokens: Option<u32>,
        temperature: Option<f32>,
+        cache_config: Option<String>,
+        enable_1m_context: Option<bool>,
    ) -> Result<Self> {
        let client = Client::builder()
            .timeout(Duration::from_secs(300))
@@ -147,6 +148,8 @@ impl AnthropicProvider {
            model,
            max_tokens: max_tokens.unwrap_or(4096),
            temperature: temperature.unwrap_or(0.1),
+            cache_config,
+            enable_1m_context: enable_1m_context.unwrap_or(false),
        })
    }

@@ -156,9 +159,12 @@ impl AnthropicProvider {
            .post(ANTHROPIC_API_URL)
            .header("x-api-key", &self.api_key)
            .header("anthropic-version", ANTHROPIC_VERSION)
-            // Anthropic beta 1m context window. Enable if needed. It costs extra, so check first.
-            // .header("anthropic-beta", "context-1m-2025-08-07")
            .header("content-type", "application/json");
+        
+        if self.enable_1m_context {
+            builder = builder.header("anthropic-beta", "context-1m-2025-08-07");
+        }
+        
        if streaming {
            builder = builder.header("accept", "text/event-stream");
        }
@@ -166,6 +172,11 @@ impl AnthropicProvider {
        builder
    }

+    fn convert_cache_control(cache_control: &crate::CacheControl) -> crate::CacheControl {
+        // Anthropic uses the same format, so just clone it
+        cache_control.clone()
+    }
+
    fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
        tools
            .iter()
@@ -214,6 +225,8 @@ impl AnthropicProvider {
                        role: "user".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
+                            cache_control: message.cache_control.as_ref()
+                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -222,6 +235,8 @@ impl AnthropicProvider {
                        role: "assistant".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
+                            cache_control: message.cache_control.as_ref()
+                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -564,7 +579,7 @@ impl LLMProvider for AnthropicProvider {
            .content
            .iter()
            .filter_map(|c| match c {
-                AnthropicContent::Text { text } => Some(text.as_str()),
+                AnthropicContent::Text { text, .. } => Some(text.as_str()),
                _ => None,
            })
            .collect::<Vec<_>>()
@@ -658,6 +673,19 @@ impl LLMProvider for AnthropicProvider {
        // Claude models support native tool calling
        true
    }
+    
+    fn supports_cache_control(&self) -> bool {
+        // Anthropic supports cache control
+        true
+    }
+    
+    fn max_tokens(&self) -> u32 {
+        self.max_tokens
+    }
+    
+    fn temperature(&self) -> f32 {
+        self.temperature
+    }
 }

 // Anthropic API request/response structures
@@ -701,7 +729,11 @@ struct AnthropicMessage {
 #[serde(tag = "type")]
 enum AnthropicContent {
    #[serde(rename = "text")]
-    Text { text: String },
+    Text { 
+        text: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<crate::CacheControl>,
+    },
    #[serde(rename = "tool_use")]
    ToolUse {
        id: String,
@@ -771,21 +803,14 @@ mod tests {
            None,
            None,
            None,
+            None,
+            None,
        ).unwrap();

        let messages = vec![
-            Message {
-                role: MessageRole::System,
-                content: "You are a helpful assistant.".to_string(),
-            },
-            Message {
-                role: MessageRole::User,
-                content: "Hello!".to_string(),
-            },
-            Message {
-                role: MessageRole::Assistant,
-                content: "Hi there!".to_string(),
-            },
+            Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+            Message::new(MessageRole::User, "Hello!".to_string()),
+            Message::new(MessageRole::Assistant, "Hi there!".to_string()),
        ];

        let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
@@ -803,14 +828,11 @@ mod tests {
            Some("claude-3-haiku-20240307".to_string()),
            Some(1000),
            Some(0.5),
+            None,
+            None,
        ).unwrap();

-        let messages = vec![
-            Message {
-                role: MessageRole::User,
-                content: "Test message".to_string(),
-            },
-        ];
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];

        let request_body = provider
            .create_request_body(&messages, None, false, 1000, 0.5)
@@ -831,6 +853,8 @@ mod tests {
            None,
            None,
            None,
+            None,
+            None,
        ).unwrap();

        let tools = vec![
@@ -859,4 +883,48 @@ mod tests {
        assert!(anthropic_tools[0].input_schema.required.is_some());
        assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
    }
+
+    #[test]
+    fn test_cache_control_serialization() {
+        let provider = AnthropicProvider::new(
+            "test-key".to_string(),
+            None,
+            None,
+            None,
+            None,
+            None,
+        ).unwrap();
+
+        // Test message WITHOUT cache_control
+        let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
+        let (_, anthropic_messages_without) = provider.convert_messages(&messages_without).unwrap();
+        let json_without = serde_json::to_string(&anthropic_messages_without).unwrap();
+        
+        println!("Anthropic JSON without cache_control: {}", json_without);
+        // Check if cache_control appears in the JSON
+        if json_without.contains("cache_control") {
+            println!("WARNING: JSON contains 'cache_control' field when not configured!");
+            assert!(!json_without.contains("\"cache_control\":null"), 
+                    "JSON should not contain 'cache_control: null'");
+        }
+
+        // Test message WITH cache_control
+        let messages_with = vec![Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            crate::CacheControl::ephemeral(),
+        )];
+        let (_, anthropic_messages_with) = provider.convert_messages(&messages_with).unwrap();
+        let json_with = serde_json::to_string(&anthropic_messages_with).unwrap();
+        
+        println!("Anthropic JSON with cache_control: {}", json_with);
+        assert!(json_with.contains("cache_control"), 
+                "JSON should contain 'cache_control' field when configured");
+        assert!(json_with.contains("ephemeral"), 
+                "JSON should contain 'ephemeral' type");
+        
+        // The key assertion: when cache_control is None, it should not appear in JSON
+        assert!(!json_without.contains("cache_control") || !json_without.contains("null"),
+                "JSON should not contain 'cache_control' field or null values when not configured");
+    }
 }
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -39,10 +39,7 @@
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Hello! How are you?".to_string(),
-//!             },
+//!             Message::new(MessageRole::User, "Hello! How are you?".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -251,9 +248,12 @@ impl DatabricksProvider {
                MessageRole::Assistant => "assistant",
            };

+            // Always use simple string format (Databricks doesn't support cache_control)
+            let content = serde_json::Value::String(message.content.clone());
+
            databricks_messages.push(DatabricksMessage {
                role: role.to_string(),
-                content: Some(message.content.clone()),
+                content: Some(content),
                tool_calls: None, // Only used in responses, not requests
            });
        }
@@ -864,8 +864,22 @@ impl LLMProvider for DatabricksProvider {
        let content = databricks_response
            .choices
            .first()
-            .and_then(|choice| choice.message.content.as_ref())
-            .cloned()
+            .and_then(|choice| {
+                choice.message.content.as_ref().map(|c| {
+                    // Handle both string and array formats
+                    if let Some(s) = c.as_str() {
+                        s.to_string()
+                    } else if let Some(arr) = c.as_array() {
+                        // Extract text from content blocks
+                        arr.iter()
+                            .filter_map(|block| block.get("text").and_then(|t| t.as_str()))
+                            .collect::<Vec<_>>()
+                            .join("")
+                    } else {
+                        String::new()
+                    }
+                })
+            })
            .unwrap_or_default();

        // Check if there are tool calls in the response
@@ -1037,6 +1051,18 @@ impl LLMProvider for DatabricksProvider {
        // This includes Claude, Llama, DBRX, and most other models on the platform
        true
    }
+    
+    fn supports_cache_control(&self) -> bool {
+        false
+    }
+    
+    fn max_tokens(&self) -> u32 {
+        self.max_tokens
+    }
+    
+    fn temperature(&self) -> f32 {
+        self.temperature
+    }
 }

 // Databricks API request/response structures
@@ -1067,7 +1093,8 @@ struct DatabricksFunction {
 #[derive(Debug, Serialize, Deserialize)]
 struct DatabricksMessage {
    role: String,
-    content: Option<String>, // Make content optional since tool calls might not have content
+    #[serde(skip_serializing_if = "Option::is_none")]
+    content: Option<serde_json::Value>, // Can be string or array of content blocks
    #[serde(skip_serializing_if = "Option::is_none")]
    tool_calls: Option<Vec<DatabricksToolCall>>, // Add tool_calls field for responses
 }
@@ -1154,18 +1181,9 @@ mod tests {
        .unwrap();

        let messages = vec![
-            Message {
-                role: MessageRole::System,
-                content: "You are a helpful assistant.".to_string(),
-            },
-            Message {
-                role: MessageRole::User,
-                content: "Hello!".to_string(),
-            },
-            Message {
-                role: MessageRole::Assistant,
-                content: "Hi there!".to_string(),
-            },
+            Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+            Message::new(MessageRole::User, "Hello!".to_string()),
+            Message::new(MessageRole::Assistant, "Hi there!".to_string()),
        ];

        let databricks_messages = provider.convert_messages(&messages).unwrap();
@@ -1187,10 +1205,7 @@ mod tests {
        )
        .unwrap();

-        let messages = vec![Message {
-            role: MessageRole::User,
-            content: "Test message".to_string(),
-        }];
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];

        let request_body = provider
            .create_request_body(&messages, None, false, 1000, 0.5)
@@ -1273,4 +1288,62 @@ mod tests {
        assert!(llama_provider.has_native_tool_calling());
        assert!(dbrx_provider.has_native_tool_calling());
    }
+
+    #[test]
+    fn test_cache_control_serialization() {
+        let provider = DatabricksProvider::from_token(
+            "https://test.databricks.com".to_string(),
+            "test-token".to_string(),
+            "databricks-claude-sonnet-4".to_string(),
+            None,
+            None,
+        )
+        .unwrap();
+
+        // Test message WITHOUT cache_control
+        let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
+        let databricks_messages_without = provider.convert_messages(&messages_without).unwrap();
+        let json_without = serde_json::to_string(&databricks_messages_without).unwrap();
+        
+        println!("JSON without cache_control: {}", json_without);
+        assert!(!json_without.contains("cache_control"), 
+                "JSON should not contain 'cache_control' field when not configured");
+
+        // Test message WITH cache_control - should still NOT include it (Databricks doesn't support it)
+        let messages_with = vec![Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            crate::CacheControl::ephemeral(),
+        )];
+        let databricks_messages_with = provider.convert_messages(&messages_with).unwrap();
+        let json_with = serde_json::to_string(&databricks_messages_with).unwrap();
+        
+        println!("JSON with cache_control: {}", json_with);
+        assert!(!json_with.contains("cache_control"), 
+                "JSON should NOT contain 'cache_control' field - Databricks doesn't support it");
+    }
+
+    #[test]
+    fn test_databricks_does_not_support_cache_control() {
+        let claude_provider = DatabricksProvider::from_token(
+            "https://test.databricks.com".to_string(),
+            "test-token".to_string(),
+            "databricks-claude-sonnet-4".to_string(),
+            None,
+            None,
+        )
+        .unwrap();
+
+        let llama_provider = DatabricksProvider::from_token(
+            "https://test.databricks.com".to_string(),
+            "test-token".to_string(),
+            "databricks-meta-llama-3-3-70b-instruct".to_string(),
+            None,
+            None,
+        )
+        .unwrap();
+
+        assert!(!claude_provider.supports_cache_control(), "Databricks should not support cache_control even for Claude models");
+        assert!(!llama_provider.supports_cache_control(), "Databricks should not support cache_control for Llama models");
+    }
 }
--- a/crates/g3-providers/src/embedded.rs
+++ b/crates/g3-providers/src/embedded.rs
@@ -771,4 +771,12 @@ impl LLMProvider for EmbeddedProvider {
    fn model(&self) -> &str {
        &self.model_name
    }
+    
+    fn max_tokens(&self) -> u32 {
+        self.max_tokens
+    }
+    
+    fn temperature(&self) -> f32 {
+        self.temperature
+    }
 }
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -21,6 +21,17 @@ pub trait LLMProvider: Send + Sync {
    fn has_native_tool_calling(&self) -> bool {
        false
    }
+    
+    /// Check if the provider supports cache control
+    fn supports_cache_control(&self) -> bool {
+        false
+    }
+    
+    /// Get the configured max_tokens for this provider
+    fn max_tokens(&self) -> u32;
+    
+    /// Get the configured temperature for this provider
+    fn temperature(&self) -> f32;
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -32,10 +43,40 @@ pub struct CompletionRequest {
    pub tools: Option<Vec<Tool>>,
 }

+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CacheControl {
+    #[serde(rename = "type")]
+    pub cache_type: CacheType,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ttl: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum CacheType {
+    Ephemeral,
+}
+
+impl CacheControl {
+    pub fn ephemeral() -> Self {
+        Self { cache_type: CacheType::Ephemeral, ttl: None }
+    }
+    
+    pub fn five_minute() -> Self {
+        Self { cache_type: CacheType::Ephemeral, ttl: Some("5m".to_string()) }
+    }
+    
+    pub fn one_hour() -> Self {
+        Self { cache_type: CacheType::Ephemeral, ttl: Some("1h".to_string()) }
+    }
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Message {
    pub role: MessageRole,
    pub content: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub cache_control: Option<CacheControl>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -95,6 +136,45 @@ pub use databricks::DatabricksProvider;
 pub use embedded::EmbeddedProvider;
 pub use openai::OpenAIProvider;

+impl Message {
+    /// Create a new message with optional cache control
+    pub fn new(role: MessageRole, content: String) -> Self {
+        Self {
+            role,
+            content,
+            cache_control: None,
+        }
+    }
+
+    /// Create a new message with cache control
+    pub fn with_cache_control(role: MessageRole, content: String, cache_control: CacheControl) -> Self {
+        Self {
+            role,
+            content,
+            cache_control: Some(cache_control),
+        }
+    }
+    
+    /// Create a message with cache control, with provider validation
+    pub fn with_cache_control_validated(
+        role: MessageRole, 
+        content: String, 
+        cache_control: CacheControl,
+        provider: &dyn LLMProvider
+    ) -> Self {
+        if !provider.supports_cache_control() {
+            tracing::warn!(
+                "Cache control requested for provider '{}' which does not support it. \
+                Cache control is only supported by Anthropic and Anthropic via Databricks.",
+                provider.name()
+            );
+            return Self::new(role, content);
+        }
+        
+        Self::with_cache_control(role, content, cache_control)
+    }
+}
+
 /// Provider registry for managing multiple LLM providers
 pub struct ProviderRegistry {
    providers: HashMap<String, Box<dyn LLMProvider>>,
@@ -144,3 +224,68 @@ impl Default for ProviderRegistry {
        Self::new()
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_message_serialization_without_cache_control() {
+        let msg = Message::new(MessageRole::User, "Hello".to_string());
+        let json = serde_json::to_string(&msg).unwrap();
+        
+        println!("Message JSON without cache_control: {}", json);
+        assert!(!json.contains("cache_control"), 
+                "JSON should not contain 'cache_control' field when not configured");
+    }
+
+    #[test]
+    fn test_message_serialization_with_cache_control() {
+        let msg = Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            CacheControl::ephemeral(),
+        );
+        let json = serde_json::to_string(&msg).unwrap();
+        
+        println!("Message JSON with cache_control: {}", json);
+        assert!(json.contains("cache_control"), 
+                "JSON should contain 'cache_control' field when configured");
+        assert!(json.contains("ephemeral"), 
+                "JSON should contain 'ephemeral' value");
+        assert!(json.contains("\"type\":"), 
+                "JSON should contain 'type' field in cache_control");
+        assert!(!json.contains("null"), 
+                "JSON should not contain null values");
+    }
+
+    #[test]
+    fn test_cache_control_five_minute_serialization() {
+        let msg = Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            CacheControl::five_minute(),
+        );
+        let json = serde_json::to_string(&msg).unwrap();
+        
+        println!("Message JSON with 5-minute cache_control: {}", json);
+        assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
+        assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
+        assert!(json.contains("\"ttl\":\"5m\""), "JSON should contain ttl field with 5m value");
+    }
+
+    #[test]
+    fn test_cache_control_one_hour_serialization() {
+        let msg = Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            CacheControl::one_hour(),
+        );
+        let json = serde_json::to_string(&msg).unwrap();
+        
+        println!("Message JSON with 1-hour cache_control: {}", json);
+        assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
+        assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
+        assert!(json.contains("\"ttl\":\"1h\""), "JSON should contain ttl field with 1h value");
+    }
+}
--- a/crates/g3-providers/src/openai.rs
+++ b/crates/g3-providers/src/openai.rs
@@ -384,6 +384,14 @@ impl LLMProvider for OpenAIProvider {
        // OpenAI models support native tool calling
        true
    }
+    
+    fn max_tokens(&self) -> u32 {
+        self.max_tokens.unwrap_or(16000)
+    }
+    
+    fn temperature(&self) -> f32 {
+        self._temperature.unwrap_or(0.1)
+    }
 }

 fn convert_messages(messages: &[Message]) -> Vec<serde_json::Value> {
--- a/crates/g3-providers/tests/cache_control_error_regression_test.rs
+++ b/crates/g3-providers/tests/cache_control_error_regression_test.rs
@@ -0,0 +1,131 @@
+//! Regression test for cache_control serialization bug
+//!
+//! This test verifies that cache_control is NOT serialized in the wrong format.
+//! The bug was that it serialized as:
+//!   - `system.0.cache_control.ephemeral.ttl` (WRONG)
+//!
+//! It should serialize as:
+//!   - `"cache_control": {"type": "ephemeral"}` for ephemeral
+//!   - `"cache_control": {"type": "ephemeral", "ttl": "5m"}` for 5minute
+//!   - `"cache_control": {"type": "ephemeral", "ttl": "1h"}` for 1hour
+
+use g3_providers::{CacheControl, Message, MessageRole};
+
+#[test]
+fn test_no_wrong_serialization_format() {
+    // Test ephemeral
+    let msg = Message::with_cache_control(
+        MessageRole::System,
+        "Test".to_string(),
+        CacheControl::ephemeral(),
+    );
+    let json = serde_json::to_string(&msg).unwrap();
+    
+    println!("Ephemeral message JSON: {}", json);
+    
+    // Should NOT contain the wrong format
+    assert!(!json.contains("system.0.cache_control"), 
+            "JSON should not contain 'system.0.cache_control' path");
+    assert!(!json.contains("cache_control.ephemeral"), 
+            "JSON should not contain 'cache_control.ephemeral' path");
+    
+    // Should contain the correct format
+    assert!(json.contains(r#""cache_control":{"type":"ephemeral"}"#),
+            "JSON should contain correct cache_control format");
+}
+
+#[test]
+fn test_five_minute_no_wrong_format() {
+    let msg = Message::with_cache_control(
+        MessageRole::System,
+        "Test".to_string(),
+        CacheControl::five_minute(),
+    );
+    let json = serde_json::to_string(&msg).unwrap();
+    
+    println!("5-minute message JSON: {}", json);
+    
+    // Should NOT contain the wrong format
+    assert!(!json.contains("system.0.cache_control"), 
+            "JSON should not contain 'system.0.cache_control' path");
+    assert!(!json.contains("cache_control.ephemeral.ttl"), 
+            "JSON should not contain 'cache_control.ephemeral.ttl' path");
+    
+    // Should contain the correct format with ttl as a direct field
+    assert!(json.contains(r#""type":"ephemeral""#),
+            "JSON should contain type field");
+    assert!(json.contains(r#""ttl":"5m""#),
+            "JSON should contain ttl field with value 5m");
+}
+
+#[test]
+fn test_one_hour_no_wrong_format() {
+    let msg = Message::with_cache_control(
+        MessageRole::System,
+        "Test".to_string(),
+        CacheControl::one_hour(),
+    );
+    let json = serde_json::to_string(&msg).unwrap();
+    
+    println!("1-hour message JSON: {}", json);
+    
+    // Should NOT contain the wrong format
+    assert!(!json.contains("system.0.cache_control"), 
+            "JSON should not contain 'system.0.cache_control' path");
+    assert!(!json.contains("cache_control.ephemeral.ttl"), 
+            "JSON should not contain 'cache_control.ephemeral.ttl' path");
+    
+    // Should contain the correct format with ttl as a direct field
+    assert!(json.contains(r#""type":"ephemeral""#),
+            "JSON should contain type field");
+    assert!(json.contains(r#""ttl":"1h""#),
+            "JSON should contain ttl field with value 1h");
+}
+
+#[test]
+fn test_cache_control_structure_is_flat() {
+    // Verify that the cache_control object has a flat structure
+    // with 'type' and optional 'ttl' at the same level
+    
+    let cache_control = CacheControl::five_minute();
+    let json_value = serde_json::to_value(&cache_control).unwrap();
+    
+    println!("Cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
+    
+    let obj = json_value.as_object().expect("Should be an object");
+    
+    // Should have exactly 2 keys at the top level
+    assert_eq!(obj.len(), 2, "Cache control should have exactly 2 top-level fields");
+    
+    // Both 'type' and 'ttl' should be at the same level
+    assert!(obj.contains_key("type"), "Should have 'type' field");
+    assert!(obj.contains_key("ttl"), "Should have 'ttl' field");
+    
+    // 'type' should be a string, not an object
+    assert!(obj["type"].is_string(), "'type' should be a string value");
+    
+    // 'ttl' should be a string, not nested
+    assert!(obj["ttl"].is_string(), "'ttl' should be a string value");
+}
+
+#[test]
+fn test_ephemeral_cache_control_structure() {
+    let cache_control = CacheControl::ephemeral();
+    let json_value = serde_json::to_value(&cache_control).unwrap();
+    
+    println!("Ephemeral cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
+    
+    let obj = json_value.as_object().expect("Should be an object");
+    
+    // Should have exactly 1 key (only 'type', no 'ttl')
+    assert_eq!(obj.len(), 1, "Ephemeral cache control should have exactly 1 top-level field");
+    
+    // Should have 'type' field
+    assert!(obj.contains_key("type"), "Should have 'type' field");
+    
+    // Should NOT have 'ttl' field
+    assert!(!obj.contains_key("ttl"), "Ephemeral should not have 'ttl' field");
+    
+    // 'type' should be a string with value "ephemeral"
+    assert_eq!(obj["type"].as_str().unwrap(), "ephemeral");
+}
--- a/crates/g3-providers/tests/cache_control_integration_test.rs
+++ b/crates/g3-providers/tests/cache_control_integration_test.rs
@@ -0,0 +1,164 @@
+//! Integration tests for cache_control feature
+//!
+//! These tests verify that cache_control is correctly serialized in messages
+//! for both Anthropic and Databricks providers.
+
+use g3_providers::{CacheControl, Message, MessageRole};
+use serde_json::json;
+
+#[test]
+fn test_ephemeral_cache_control_serialization() {
+    let cache_control = CacheControl::ephemeral();
+    let json = serde_json::to_value(&cache_control).unwrap();
+    
+    println!("Ephemeral cache_control JSON: {}", serde_json::to_string(&json).unwrap());
+    
+    assert_eq!(json, json!({
+        "type": "ephemeral"
+    }));
+    
+    // Verify no ttl field is present
+    assert!(!json.as_object().unwrap().contains_key("ttl"));
+}
+
+#[test]
+fn test_five_minute_cache_control_serialization() {
+    let cache_control = CacheControl::five_minute();
+    let json = serde_json::to_value(&cache_control).unwrap();
+    
+    println!("5-minute cache_control JSON: {}", serde_json::to_string(&json).unwrap());
+    
+    assert_eq!(json, json!({
+        "type": "ephemeral",
+        "ttl": "5m"
+    }));
+}
+
+#[test]
+fn test_one_hour_cache_control_serialization() {
+    let cache_control = CacheControl::one_hour();
+    let json = serde_json::to_value(&cache_control).unwrap();
+    
+    println!("1-hour cache_control JSON: {}", serde_json::to_string(&json).unwrap());
+    
+    assert_eq!(json, json!({
+        "type": "ephemeral",
+        "ttl": "1h"
+    }));
+}
+
+#[test]
+fn test_message_with_ephemeral_cache_control() {
+    let msg = Message::with_cache_control(
+        MessageRole::System,
+        "System prompt".to_string(),
+        CacheControl::ephemeral(),
+    );
+    
+    let json = serde_json::to_value(&msg).unwrap();
+    println!("Message with ephemeral cache_control: {}", serde_json::to_string(&json).unwrap());
+    
+    let cache_control = json.get("cache_control").expect("cache_control field should exist");
+    assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
+    assert!(!cache_control.as_object().unwrap().contains_key("ttl"));
+}
+
+#[test]
+fn test_message_with_five_minute_cache_control() {
+    let msg = Message::with_cache_control(
+        MessageRole::System,
+        "System prompt".to_string(),
+        CacheControl::five_minute(),
+    );
+    
+    let json = serde_json::to_value(&msg).unwrap();
+    println!("Message with 5-minute cache_control: {}", serde_json::to_string(&json).unwrap());
+    
+    let cache_control = json.get("cache_control").expect("cache_control field should exist");
+    assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
+    assert_eq!(cache_control.get("ttl").unwrap(), "5m");
+}
+
+#[test]
+fn test_message_with_one_hour_cache_control() {
+    let msg = Message::with_cache_control(
+        MessageRole::System,
+        "System prompt".to_string(),
+        CacheControl::one_hour(),
+    );
+    
+    let json = serde_json::to_value(&msg).unwrap();
+    println!("Message with 1-hour cache_control: {}", serde_json::to_string(&json).unwrap());
+    
+    let cache_control = json.get("cache_control").expect("cache_control field should exist");
+    assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
+    assert_eq!(cache_control.get("ttl").unwrap(), "1h");
+}
+
+#[test]
+fn test_message_without_cache_control() {
+    let msg = Message::new(MessageRole::User, "Hello".to_string());
+    
+    let json = serde_json::to_value(&msg).unwrap();
+    println!("Message without cache_control: {}", serde_json::to_string(&json).unwrap());
+    
+    // cache_control field should not be present when not set
+    assert!(!json.as_object().unwrap().contains_key("cache_control"));
+}
+
+#[test]
+fn test_cache_control_json_format_ephemeral() {
+    let cache_control = CacheControl::ephemeral();
+    let json_str = serde_json::to_string(&cache_control).unwrap();
+    
+    println!("Ephemeral JSON string: {}", json_str);
+    
+    // Verify exact JSON format
+    assert_eq!(json_str, r#"{"type":"ephemeral"}"#);
+}
+
+#[test]
+fn test_cache_control_json_format_five_minute() {
+    let cache_control = CacheControl::five_minute();
+    let json_str = serde_json::to_string(&cache_control).unwrap();
+    
+    println!("5-minute JSON string: {}", json_str);
+    
+    // Verify exact JSON format
+    assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"5m"}"#);
+}
+
+#[test]
+fn test_cache_control_json_format_one_hour() {
+    let cache_control = CacheControl::one_hour();
+    let json_str = serde_json::to_string(&cache_control).unwrap();
+    
+    println!("1-hour JSON string: {}", json_str);
+    
+    // Verify exact JSON format
+    assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"1h"}"#);
+}
+
+#[test]
+fn test_deserialization_ephemeral() {
+    let json_str = r#"{"type":"ephemeral"}"#;
+    let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
+    
+    assert_eq!(cache_control.ttl, None);
+}
+
+#[test]
+fn test_deserialization_five_minute() {
+    let json_str = r#"{"type":"ephemeral","ttl":"5m"}"#;
+    let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
+    
+    assert_eq!(cache_control.ttl, Some("5m".to_string()));
+}
+
+#[test]
+fn test_deserialization_one_hour() {
+    let json_str = r#"{"type":"ephemeral","ttl":"1h"}"#;
+    let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
+    
+    assert_eq!(cache_control.ttl, Some("1h".to_string()));
+}
--- a/tail_tool_logs.sh
+++ b/tail_tool_logs.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Useful tool for tailing tool_calls files. It picks up whatever the latest is and does tail -f
+
+if [[ -n "$G3_WORKSPACE" ]]; then
+    TARGET_DIR="$G3_WORKSPACE/logs"
+else
+    TARGET_DIR="$HOME/tmp/workspace/logs"
+fi
+
+if [[ ! -d "$TARGET_DIR" ]]; then
+    echo "Error: Directory '$TARGET_DIR' does not exist."
+    exit 1
+fi
+
+cd "$TARGET_DIR" || exit 1
+
+echo "Monitoring directory '$TARGET_DIR' for newest 'tool_calls*' file..."
+
+
+# Variables to keep track of the current state
+CURRENT_PID=""
+CURRENT_FILE=""
+
+# Cleanup function: Kill the background tail process when this script is stopped (Ctrl+C)
+cleanup() {
+    echo ""
+    echo "Stopping monitor..."
+    if [[ -n "$CURRENT_PID" ]]; then
+        kill "$CURRENT_PID" 2>/dev/null
+    fi
+    exit 0
+}
+
+# Register the cleanup function for SIGINT (Ctrl+C) and SIGTERM
+trap cleanup SIGINT SIGTERM
+
+while true; do
+    # Find the newest file matching the pattern using ls -t (sort by time)
+    # 2>/dev/null suppresses errors if no files are found
+    NEWEST_FILE=$(ls -t tool_calls* 2>/dev/null | head -n 1)
+
+    # If a file was found AND it is different from the one we are currently watching
+    if [[ -n "$NEWEST_FILE" && "$NEWEST_FILE" != "$CURRENT_FILE" ]]; then
+        
+        # If we were already watching a file, kill the old tail process
+        if [[ -n "$CURRENT_PID" ]]; then
+            kill "$CURRENT_PID" 2>/dev/null
+        fi
+
+        echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
+        echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
+        echo ">>> Switched to new file: $NEWEST_FILE"
+        echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
+        echo ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>"
+
+        # Start tail in the background (&)
+        tail -f "$NEWEST_FILE" &
+        
+        # Capture the Process ID ($!) of the tail command we just launched
+        CURRENT_PID=$!
+       
+        # Update the tracker variable
+        CURRENT_FILE="$NEWEST_FILE"
+    fi
+
+    # Wait 1 second before checking again
+    sleep 1
+done
+
Author	SHA1	Message	Date
Jochen	1e1702001c	Add logging for discovery	2025-11-26 10:41:35 +11:00
Jochen	c419833ddf	updated the prompt	2025-11-26 10:26:52 +11:00
Jochen	c19127f809	make sure user requirements are included	2025-11-26 10:26:52 +11:00
Jochen	bd29addefa	reorder system prompt	2025-11-26 10:26:52 +11:00
Jochen	2e252cd298	added timer	2025-11-25 22:51:33 +11:00
Jochen	ad198a8501	add code exploration fast start This tries to short-circuit multiple round-trips to llm for reading code. It's a precursor to trying to context engineer tailored to specific tasks. In initial experiments, it's only marginally faster than regular mode, and burns more tokens.	2025-11-25 22:51:32 +11:00
Jochen	f501751bdf	Merge pull request #30 from dhanji/fix_tests Fix tests & add code coverage tool	2025-11-25 10:18:18 +11:00
Jochen	a96a15d1fc	add code coverage command	2025-11-21 14:38:58 +11:00
Jochen	24dc7ad642	fix build target	2025-11-21 14:07:31 +11:00
Jochen	a097c3abef	first cut	2025-11-21 13:56:36 +11:00
Jochen	34e55050b3	Merge pull request #28 from dhanji/jochen_force_todo_check_at_start check for stale TODO at startup of autonomous	2025-11-21 12:41:45 +11:00
Jochen	551a577ee1	changed user choice for TODO stale check user can ignore, mark stale or quit.	2025-11-21 12:35:14 +11:00
Jochen	84718223bc	remove minor comment	2025-11-21 12:26:41 +11:00
Jochen	28a83d2dcf	check for stale TODOs on by default, can be disabled	2025-11-21 12:09:01 +11:00
Jochen	0ce905dc74	Merge pull request #26 from dhanji/jochen_log_tool_calls__with_tool_logs log tool calls, allow multiple calls (optional)	2025-11-21 11:07:23 +11:00
Jochen	9f0d5add1e	remove redundant SYSTEM_NATIVE_TOOL_CALLS_MULTIPLE	2025-11-21 11:04:14 +11:00
Jochen	be6c6bfca4	fix ref to system prompt	2025-11-21 10:49:39 +11:00
Jochen	94a41c5c34	don't write warning to console	2025-11-21 10:49:27 +11:00
Jochen	09dbad2d68	allow multiple tool calls, log warnings if there are duplicate calls. controlled via a flag to the agent config: allow_multiple_tool_calls = true	2025-11-21 10:49:15 +11:00
Jochen	ffbf410b17	log tool calls	2025-11-21 10:49:02 +11:00
Jochen	c6f3f12b71	Merge pull request #27 from dhanji/jochen_tool_tail useful shell command for tailing tool logs	2025-11-20 13:31:09 +11:00
Dhanji Prasanna	14c8d066c9	ensure system prompt is always added first	2025-11-20 08:45:03 +11:00
Jochen	e556f06b15	useful command for tailing tool logs	2025-11-19 21:02:42 +11:00
Jochen	b6e226df67	Merge pull request #23 from dhanji/jochen-add-code-instructions system prompt now includes code style guide	2025-11-19 16:25:20 +11:00
Dhanji R. Prasanna	5b46922047	Merge pull request #25 from dhanji/fix_max_tokens fix bad max_tokens and context_window logic	2025-11-19 15:55:34 +11:00
Jochen	1069664e16	fix bad max_tokens and context_window logic for non-databricks code	2025-11-19 13:51:16 +11:00
Dhanji R. Prasanna	725f54b99b	Merge pull request #24 from dhanji/jochen_cache_control Add cache control for Anthropic (won't work via Databricks)	2025-11-19 13:39:09 +11:00
Dhanji R. Prasanna	325aab6b0e	Merge pull request #22 from dhanji/micn/console-detection patching console for detecting g3	2025-11-19 13:37:22 +11:00
Jochen	3f21bdc7b2	fix tests	2025-11-19 12:42:37 +11:00
Jochen	9bffd8b1bf	cache_control removed from databricks	2025-11-19 12:15:49 +11:00
Jochen	bfee8040e9	regression tests added	2025-11-19 11:32:14 +11:00
Jochen	a150ba6a55	adds ttl to cache control	2025-11-18 23:23:49 +11:00
Jochen	296bf5a449	adds cache_control	2025-11-18 22:38:52 +11:00
Michael Neale	8d8ddbe4b9	live reloading of detected things	2025-11-14 16:31:46 +11:00
Michael Neale	0466405d87	don't detect console, better process pickup	2025-11-13 18:46:55 +11:00