Merge branch 'dhanji/fix-auto-continue': Fix auto-continue and duplicate detection bugs

2025-12-22 17:12:24 +11:00
parent 38fcaaf449 10e2fe9b94
commit 720ad8cad7
25 changed files with 1656 additions and 383 deletions
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -267,7 +267,7 @@ use std::path::Path;
 use std::path::PathBuf;
 use std::process::exit;
 use tokio_util::sync::CancellationToken;
-use tracing::{error, info};
+use tracing::{debug, error};
 use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
 mod simple_output;
@@ -2693,7 +2693,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
            extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?;
        // Log the size of the feedback for debugging
-        info!(
+        debug!(
            "Coach feedback extracted: {} characters (from {} total)",
            coach_feedback_text.len(),
            coach_result.response.len()
--- a/crates/g3-computer-control/build.rs
+++ b/crates/g3-computer-control/build.rs
@@ -68,6 +68,18 @@ fn main() {
        dylib_dst.display()
    );
    // Re-sign the dylib with ad-hoc signature to fix code signing issues on Apple Silicon
    // This is necessary because incremental compilation can invalidate signatures
    let codesign_status = Command::new("codesign")
        .args(&["-f", "-s", "-", dylib_dst.to_str().unwrap()])
        .status();
    if let Ok(status) = codesign_status {
        if !status.success() {
            println!("cargo:warning=Failed to codesign libVisionBridge.dylib (non-fatal)");
        }
    }
    // Add rpath so the dylib can be found at runtime
    println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
    println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
--- a/crates/g3-computer-control/src/platform/macos.rs
+++ b/crates/g3-computer-control/src/platform/macos.rs
@@ -24,7 +24,7 @@ impl MacOSController {
    pub fn new() -> Result<Self> {
        let ocr = Box::new(DefaultOCR::new()?);
        let ocr_name = ocr.name().to_string();
-        tracing::info!("Initialized macOS controller with OCR engine: {}", ocr_name);
+        tracing::debug!("Initialized macOS controller with OCR engine: {}", ocr_name);
        Ok(Self {
            ocr_engine: ocr,
            ocr_name,
@@ -155,7 +155,7 @@ impl ComputerController for MacOSController {
                            // 1. At layer 0 (normal windows, not menu bar)
                            // 2. Have real bounds (width and height >= 100)
                            if layer == 0 && has_real_bounds {
-                                tracing::info!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer);
+                                tracing::debug!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer);
                                found_window_id = Some((id as u32, owner.clone()));
                                break;
                            } else {
@@ -178,7 +178,7 @@ impl ComputerController for MacOSController {
        let (cg_window_id, matched_owner) = cg_window_id.ok_or_else(|| {
            anyhow::anyhow!("Could not find window for application '{}'. Use list_windows to see available windows.", app_name)
        })?;
-        tracing::info!(
+        tracing::debug!(
            "Taking screenshot of window ID {} for app '{}'",
            cg_window_id,
            matched_owner
@@ -468,7 +468,7 @@ impl MacOSController {
                            // Only accept windows with real bounds (>= 100x100 pixels)
                            if w >= 100 && h >= 100 {
-                                tracing::info!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
+                                tracing::debug!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
                                return Ok((x, y, w, h));
                            } else {
                                tracing::debug!(
--- a/crates/g3-console/src/api/control.rs
+++ b/crates/g3-console/src/api/control.rs
@@ -3,7 +3,7 @@ use crate::process::ProcessController;
 use axum::{extract::State, http::StatusCode, Json};
 use std::sync::Arc;
 use tokio::sync::Mutex;
-use tracing::{error, info};
+use tracing::{debug, error};
 pub type ControllerState = Arc<Mutex<ProcessController>>;
@@ -22,7 +22,7 @@ pub async fn kill_instance(
    match controller.kill_process(pid) {
        Ok(_) => {
-            info!("Successfully killed process {}", pid);
+            debug!("Successfully killed process {}", pid);
            Ok(Json(serde_json::json!({
                "status": "terminating"
            })))
@@ -38,7 +38,7 @@ pub async fn restart_instance(
    State(controller): State<ControllerState>,
    axum::extract::Path(id): axum::extract::Path<String>,
 ) -> Result<Json<LaunchResponse>, StatusCode> {
-    info!("Restarting instance: {}", id);
+    debug!("Restarting instance: {}", id);
    // Extract PID from instance ID (format: pid_timestamp)
    let pid: u32 = id
@@ -81,7 +81,7 @@ pub async fn launch_instance(
    State(controller): State<ControllerState>,
    Json(request): Json<LaunchRequest>,
 ) -> Result<Json<LaunchResponse>, (StatusCode, Json<serde_json::Value>)> {
-    info!("Launching new g3 instance: {:?}", request);
+    debug!("Launching new g3 instance: {:?}", request);
    // Validate binary path if provided
    if let Some(ref binary_path) = request.g3_binary_path {
@@ -149,7 +149,7 @@ pub async fn launch_instance(
    ) {
        Ok(pid) => {
            let id = format!("{}_{}", pid, chrono::Utc::now().timestamp());
-            info!("Successfully launched g3 instance with PID {}", pid);
+            debug!("Successfully launched g3 instance with PID {}", pid);
            Ok(Json(LaunchResponse {
                id,
                status: "starting".to_string(),
--- a/crates/g3-console/src/api/state.rs
+++ b/crates/g3-console/src/api/state.rs
@@ -3,7 +3,7 @@ use axum::{http::StatusCode, Json};
 use serde::{Deserialize, Serialize};
 use std::os::unix::fs::PermissionsExt;
 use std::path::PathBuf;
-use tracing::{error, info};
+use tracing::{debug, error};
 pub async fn get_state() -> Result<Json<ConsoleState>, StatusCode> {
    let state = ConsoleState::load();
@@ -15,7 +15,7 @@ pub async fn save_state(
 ) -> Result<Json<serde_json::Value>, StatusCode> {
    match state.save() {
        Ok(_) => {
-            info!("Console state saved successfully");
+            debug!("Console state saved successfully");
            Ok(Json(serde_json::json!({
                "status": "saved"
            })))
--- a/crates/g3-console/src/launch.rs
+++ b/crates/g3-console/src/launch.rs
@@ -1,7 +1,7 @@
 use serde::{Deserialize, Serialize};
 use std::fs;
 use std::path::PathBuf;
-use tracing::info;
+use tracing::debug;
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ConsoleState {
@@ -42,7 +42,7 @@ impl ConsoleState {
    pub fn save(&self) -> anyhow::Result<()> {
        let config_path = Self::config_path();
-        info!("Saving console state to: {:?}", config_path);
+        debug!("Saving console state to: {:?}", config_path);
        // Create parent directory if it doesn't exist
        if let Some(parent) = config_path.parent() {
@@ -51,7 +51,7 @@ impl ConsoleState {
        let content = serde_json::to_string_pretty(self)?;
        fs::write(&config_path, content)?;
-        info!("Console state saved successfully to: {:?}", config_path);
+        debug!("Console state saved successfully to: {:?}", config_path);
        Ok(())
    }
--- a/crates/g3-console/src/main.rs
+++ b/crates/g3-console/src/main.rs
@@ -16,7 +16,7 @@ use std::sync::Arc;
 use tokio::sync::Mutex;
 use tower_http::cors::CorsLayer;
 use tower_http::services::ServeDir;
-use tracing::{info, Level};
+use tracing::{debug, Level};
 use tracing_subscriber;
 #[derive(Parser, Debug)]
@@ -84,12 +84,12 @@ async fn main() -> anyhow::Result<()> {
        .layer(CorsLayer::permissive());
    let addr = format!("{}:{}", args.host, args.port);
-    info!("Starting g3-console on http://{}", addr);
+    debug!("Starting g3-console on http://{}", addr);
    // Auto-open browser if requested
    if args.open {
        let url = format!("http://{}", addr);
-        info!("Opening browser to {}", url);
+        debug!("Opening browser to {}", url);
        let _ = open::that(&url);
    }
--- a/crates/g3-console/src/process/controller.rs
+++ b/crates/g3-console/src/process/controller.rs
@@ -6,7 +6,7 @@ use std::path::PathBuf;
 use std::process::{Command, Stdio};
 use std::sync::Mutex;
 use sysinfo::{Pid, Process, Signal, System};
-use tracing::{debug, info};
+use tracing::debug;
 pub struct ProcessController {
    system: System,
@@ -26,7 +26,7 @@ impl ProcessController {
        self.system.refresh_processes();
        if let Some(process) = self.system.process(sysinfo_pid) {
-            info!("Killing process {} ({})", pid, process.name());
+            debug!("Killing process {} ({})", pid, process.name());
            // Try SIGTERM first
            if process.kill_with(Signal::Term).is_some() {
@@ -107,7 +107,7 @@ impl ProcessController {
            });
        }
-        info!("Launching g3: {:?}", cmd);
+        debug!("Launching g3: {:?}", cmd);
        // Spawn and wait for the intermediate process to exit
        let mut child = cmd.spawn().context("Failed to spawn g3 process")?;
@@ -120,7 +120,7 @@ impl ProcessController {
        // The actual g3 process is now running as orphan
        // We need to scan for it by matching workspace and recent start time
-        info!(
+        debug!(
            "Scanning for newly launched g3 process in workspace: {}",
            workspace
        );
@@ -171,7 +171,7 @@ impl ProcessController {
            found
        } else {
            // If we couldn't find it, try one more refresh after a longer delay
-            info!("Process not found on first scan, trying again...");
+            debug!("Process not found on first scan, trying again...");
            std::thread::sleep(std::time::Duration::from_millis(2000));
            self.system.refresh_processes();
@@ -204,7 +204,7 @@ impl ProcessController {
            retry_found.unwrap_or(intermediate_pid)
        };
-        info!("Launched g3 process with PID {}", pid);
+        debug!("Launched g3 process with PID {}", pid);
        // Store launch params for restart
        let params = LaunchParams {
--- a/crates/g3-console/src/process/detector.rs
+++ b/crates/g3-console/src/process/detector.rs
@@ -3,7 +3,7 @@ use anyhow::Result;
 use chrono::{DateTime, Utc};
 use std::path::PathBuf;
 use sysinfo::{Pid, Process, System};
-use tracing::{debug, info, warn};
+use tracing::{debug, warn};
 pub struct ProcessDetector {
    system: System,
@@ -17,7 +17,7 @@ impl ProcessDetector {
    }
    pub fn detect_instances(&mut self) -> Result<Vec<Instance>> {
-        info!("Scanning for g3 processes...");
+        debug!("Scanning for g3 processes...");
        // Refresh all processes to ensure we catch newly started ones
        // Using refresh_all() instead of just refresh_processes() to ensure
        // we get complete information about new processes
@@ -37,7 +37,7 @@ impl ProcessDetector {
            }
        }
-        info!("Detected {} g3 instances", instances.len());
+        debug!("Detected {} g3 instances", instances.len());
        Ok(instances)
    }
--- a/crates/g3-core/src/error_handling.rs
+++ b/crates/g3-core/src/error_handling.rs
@@ -9,7 +9,7 @@
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
 use std::time::Duration;
-use tracing::{error, info, warn};
+use tracing::{debug, error, warn};
 /// Base delay for exponential backoff (in milliseconds)
 const BASE_RETRY_DELAY_MS: u64 = 1000;
@@ -149,7 +149,7 @@ impl ErrorContext {
                if let Err(e) = std::fs::write(&filename, json_content) {
                    error!("Failed to save error context to {:?}: {}", &filename, e);
                } else {
-                    info!("Error details saved to: {:?}", &filename);
+                    debug!("Error details saved to: {:?}", &filename);
                }
            }
            Err(e) => {
@@ -328,7 +328,7 @@ where
        match operation().await {
            Ok(result) => {
                if attempt > 1 {
-                    info!(
+                    debug!(
                        "Operation '{}' succeeded after {} attempts",
                        operation_name, attempt
                    );
@@ -357,7 +357,7 @@ where
                        // Special handling for token limit errors
                        if matches!(recoverable_type, RecoverableError::TokenLimit) {
-                            info!("Token limit error detected. Consider triggering summarization.");
+                            debug!("Token limit error detected. Consider triggering summarization.");
                        }
                        tokio::time::sleep(delay).await;
--- a/crates/g3-core/src/feedback_extraction.rs
+++ b/crates/g3-core/src/feedback_extraction.rs
@@ -12,7 +12,7 @@ use crate::{logs_dir, Agent, TaskResult};
 use crate::ui_writer::UiWriter;
 use serde_json::Value;
 use std::path::PathBuf;
-use tracing::{debug, info, warn};
+use tracing::{debug, warn};
 /// Result of feedback extraction with source information
 #[derive(Debug, Clone)]
@@ -103,21 +103,21 @@ where
    // Try session log first (most reliable)
    if let Some(session_id) = agent.get_session_id() {
        if let Some(feedback) = try_extract_from_session_log(&session_id, config) {
-            info!("Extracted coach feedback from session log: {} chars", feedback.len());
+            debug!("Extracted coach feedback from session log: {} chars", feedback.len());
            return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog);
        }
    }
    // Try native tool call JSON parsing
    if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) {
-        info!("Extracted coach feedback from native tool call: {} chars", feedback.len());
+        debug!("Extracted coach feedback from native tool call: {} chars", feedback.len());
        return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall);
    }
    // Try conversation history
    if let Some(session_id) = agent.get_session_id() {
        if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) {
-            info!("Extracted coach feedback from conversation history: {} chars", feedback.len());
+            debug!("Extracted coach feedback from conversation history: {} chars", feedback.len());
            return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory);
        }
    }
@@ -125,7 +125,7 @@ where
    // Try TaskResult parsing
    let extracted = coach_result.extract_final_output();
    if !extracted.is_empty() {
-        info!("Extracted coach feedback from task result: {} chars", extracted.len());
+        debug!("Extracted coach feedback from task result: {} chars", extracted.len());
        return ExtractedFeedback::new(extracted, FeedbackSource::TaskResultResponse);
    }
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -39,7 +39,7 @@ use serde_json::json;
 use std::io::Write;
 use std::time::{Duration, Instant};
 use tokio_util::sync::CancellationToken;
-use tracing::{debug, error, info, warn};
+use tracing::{debug, error, warn};
 /// Get the path to the todo.g3.md file.
 /// 
@@ -246,13 +246,23 @@ pub enum StreamState {
    Resuming,
 }
 /// Patterns used to detect JSON tool calls in text
 /// These cover common whitespace variations in JSON formatting
 const TOOL_CALL_PATTERNS: [&str; 4] = [
    r#"{"tool":"#,
    r#"{ "tool":"#,
    r#"{"tool" :"#,
    r#"{ "tool" :"#,
 ];
 /// Modern streaming tool parser that properly handles native tool calls and SSE chunks
 #[derive(Debug)]
 pub struct StreamingToolParser {
    /// Buffer for accumulating text content
    text_buffer: String,
-    /// Buffer for accumulating native tool calls
+    /// Position in text_buffer up to which tool calls have been consumed/executed
-    native_tool_calls: Vec<g3_providers::ToolCall>,
+    /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools
    last_consumed_position: usize,
    /// Whether we've received a message_stop event
    message_stopped: bool,
    /// Whether we're currently in a JSON tool call (for fallback parsing)
@@ -271,13 +281,58 @@ impl StreamingToolParser {
    pub fn new() -> Self {
        Self {
            text_buffer: String::new(),
-            native_tool_calls: Vec::new(),
+            last_consumed_position: 0,
            message_stopped: false,
            in_json_tool_call: false,
            json_tool_start: None,
        }
    }
    /// Find the starting position of the last tool call pattern in the given text
    /// Returns None if no tool call pattern is found
    fn find_last_tool_call_start(text: &str) -> Option<usize> {
        let mut best_start: Option<usize> = None;
        for pattern in &TOOL_CALL_PATTERNS {
            if let Some(pos) = text.rfind(pattern) {
                if best_start.map_or(true, |best| pos > best) {
                    best_start = Some(pos);
                }
            }
        }
        best_start
    }
    /// Find the starting position of the FIRST tool call pattern in the given text
    /// Returns None if no tool call pattern is found
    fn find_first_tool_call_start(text: &str) -> Option<usize> {
        let mut best_start: Option<usize> = None;
        for pattern in &TOOL_CALL_PATTERNS {
            if let Some(pos) = text.find(pattern) {
                if best_start.map_or(true, |best| pos < best) {
                    best_start = Some(pos);
                }
            }
        }
        best_start
    }
    /// Validate that tool call args don't contain message-like content
    /// This detects malformed tool calls where agent messages got mixed into args
    fn has_message_like_keys(args: &serde_json::Map<String, serde_json::Value>) -> bool {
        args.keys().any(|key| {
            key.len() > 100
                || key.contains('\n')
                || key.contains("I'll")
                || key.contains("Let me")
                || key.contains("Here's")
                || key.contains("I can")
                || key.contains("I need")
                || key.contains("First")
                || key.contains("Now")
                || key.contains("The ")
        })
    }
    /// Process a streaming chunk and return completed tool calls if any
    pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec<ToolCall> {
        let mut completed_tools = Vec::new();
@@ -308,10 +363,12 @@ impl StreamingToolParser {
            self.message_stopped = true;
            debug!("Message finished, processing accumulated tool calls");
-            // When stream finishes, do a final check for JSON tool calls in the accumulated buffer
+            // When stream finishes, find ALL JSON tool calls in the accumulated buffer
            if completed_tools.is_empty() && !self.text_buffer.is_empty() {
-                if let Some(json_tool) = self.try_parse_json_tool_call_from_buffer() {
+                let all_tools = self.try_parse_all_json_tool_calls_from_buffer();
-                    completed_tools.push(json_tool);
+                if !all_tools.is_empty() {
                    debug!("Found {} JSON tool calls in buffer at stream end", all_tools.len());
                    completed_tools.extend(all_tools);
                }
            }
        }
@@ -328,26 +385,12 @@ impl StreamingToolParser {
    /// Fallback method to parse JSON tool calls from text content
    fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
        // Look for JSON tool call patterns
        let patterns = [
            r#"{"tool":"#,
            r#"{ "tool":"#,
            r#"{"tool" :"#,
            r#"{ "tool" :"#,
        ];
        // If we're not currently in a JSON tool call, look for the start
        if !self.in_json_tool_call {
-            for pattern in &patterns {
+            if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) {
-                if let Some(pos) = self.text_buffer.rfind(pattern) {
+                debug!("Found JSON tool call pattern at position {}", pos);
-                    debug!(
+                self.in_json_tool_call = true;
-                        "Found JSON tool call pattern '{}' at position {}",
+                self.json_tool_start = Some(pos);
                        pattern, pos
                    );
                    self.in_json_tool_call = true;
                    self.json_tool_start = Some(pos);
                    break;
                }
            }
        }
@@ -356,83 +399,34 @@ impl StreamingToolParser {
            if let Some(start_pos) = self.json_tool_start {
                let json_text = &self.text_buffer[start_pos..];
-                // Try to find a complete JSON object
+                // Try to find a complete JSON object using the shared helper
-                let mut brace_count = 0;
+                if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
-                let mut in_string = false;
+                    let json_str = &json_text[..=end_pos];
-                let mut escape_next = false;
+                    debug!("Attempting to parse JSON tool call: {}", json_str);
-                for (i, ch) in json_text.char_indices() {
+                    // Try to parse as a ToolCall
-                    if escape_next {
+                    if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
-                        escape_next = false;
+                        // Validate that args is an object with reasonable keys
-                        continue;
+                        if let Some(args_obj) = tool_call.args.as_object() {
-                    }
+                            if Self::has_message_like_keys(args_obj) {
-
+                                debug!("Detected malformed tool call with message-like keys, skipping");
-                    match ch {
+                                self.in_json_tool_call = false;
-                        '\\' => escape_next = true,
+                                self.json_tool_start = None;
-                        '"' if !escape_next => in_string = !in_string,
+                                return None;
                        '{' if !in_string => brace_count += 1,
                        '}' if !in_string => {
                            brace_count -= 1;
                            if brace_count == 0 {
                                // Found complete JSON object
                                let json_str = &json_text[..=i];
                                debug!("Attempting to parse JSON tool call: {}", json_str);
                                // First try to parse as a ToolCall
                                if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
                                    // Validate that this is actually a proper tool call
                                    // The args should be a JSON object with reasonable keys
                                    if let Some(args_obj) = tool_call.args.as_object() {
                                        // Check if any key looks like it contains agent message content
                                        // This would indicate a malformed tool call where the message
                                        // got mixed into the args
                                        let has_message_like_key = args_obj.keys().any(|key| {
                                            key.len() > 100
                                                || key.contains('\n')
                                                || key.contains("I'll")
                                                || key.contains("Let me")
                                                || key.contains("Here's")
                                                || key.contains("I can")
                                                || key.contains("I need")
                                                || key.contains("First")
                                                || key.contains("Now")
                                                || key.contains("The ")
                                        });
                                        if has_message_like_key {
                                            debug!("Detected malformed tool call with message-like keys, skipping");
                                            // This looks like a malformed tool call, skip it
                                            self.in_json_tool_call = false;
                                            self.json_tool_start = None;
                                            break;
                                        }
                                        // Also check if the values look reasonable
                                        // Tool arguments should typically be file paths, commands, or content
                                        // Not entire agent messages
                                        debug!(
                                            "Successfully parsed valid JSON tool call: {:?}",
                                            tool_call
                                        );
                                        // Reset JSON parsing state
                                        self.in_json_tool_call = false;
                                        self.json_tool_start = None;
                                        return Some(tool_call);
                                    }
                                    // If args is not an object, skip this as invalid
                                    debug!("Tool call args is not an object, skipping");
                                } else {
                                    debug!("Failed to parse JSON tool call: {}", json_str);
                                    // Reset and continue looking
                                    self.in_json_tool_call = false;
                                    self.json_tool_start = None;
                                }
                                break;
                            }
                            debug!("Successfully parsed valid JSON tool call: {:?}", tool_call);
                            self.in_json_tool_call = false;
                            self.json_tool_start = None;
                            return Some(tool_call);
                        }
-                        _ => {}
+                        debug!("Tool call args is not an object, skipping");
                    } else {
                        debug!("Failed to parse JSON tool call: {}", json_str);
                    }
                    // Reset and continue looking
                    self.in_json_tool_call = false;
                    self.json_tool_start = None;
                }
            }
        }
@@ -440,76 +434,45 @@ impl StreamingToolParser {
        None
    }
-    /// Parse JSON tool call from the accumulated text buffer (called when stream finishes)
+    /// Parse ALL JSON tool calls from the accumulated text buffer
-    /// This is similar to try_parse_json_tool_call but operates on the full buffer
+    /// This finds all complete tool calls, not just the last one
-    fn try_parse_json_tool_call_from_buffer(&mut self) -> Option<ToolCall> {
+    fn try_parse_all_json_tool_calls_from_buffer(&self) -> Vec<ToolCall> {
-        // Look for JSON tool call patterns in the accumulated buffer
+        let mut tool_calls = Vec::new();
-        let patterns = [
+        let mut search_start = 0;
-            r#"{"tool":"#,
+        
-            r#"{ "tool":"#,
+        while search_start < self.text_buffer.len() {
-            r#"{"tool" :"#,
+            let search_text = &self.text_buffer[search_start..];
-            r#"{ "tool" :"#,
+            
-        ];
+            // Find the next tool call pattern
-
+            if let Some(relative_pos) = Self::find_first_tool_call_start(search_text) {
-        // Find the last occurrence of a tool call pattern (most likely to be complete)
+                let abs_start = search_start + relative_pos;
-        let mut best_start: Option<usize> = None;
+                let json_text = &self.text_buffer[abs_start..];
-        for pattern in &patterns {
+                
-            if let Some(pos) = self.text_buffer.rfind(pattern) {
+                // Try to find a complete JSON object
-                if best_start.map_or(true, |best| pos > best) {
+                if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
-                    best_start = Some(pos);
+                    let json_str = &json_text[..=end_pos];
-                }
+                    
-            }
+                    if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
-        }
+                        if let Some(args_obj) = tool_call.args.as_object() {
-
+                            if !Self::has_message_like_keys(args_obj) {
-        if let Some(start_pos) = best_start {
+                                debug!("Found tool call at position {}: {:?}", abs_start, tool_call.tool);
-            let json_text = &self.text_buffer[start_pos..];
+                                tool_calls.push(tool_call);
            debug!("Found potential JSON tool call at position {}: {:?}", start_pos, 
                   if json_text.len() > 200 { &json_text[..200] } else { json_text });
            // Try to find a complete JSON object
            let mut brace_count = 0;
            let mut in_string = false;
            let mut escape_next = false;
            for (i, ch) in json_text.char_indices() {
                if escape_next {
                    escape_next = false;
                    continue;
                }
                match ch {
                    '\\' => escape_next = true,
                    '"' if !escape_next => in_string = !in_string,
                    '{' if !in_string => brace_count += 1,
                    '}' if !in_string => {
                        brace_count -= 1;
                        if brace_count == 0 {
                            // Found complete JSON object
                            let json_str = &json_text[..=i];
                            debug!("Attempting to parse JSON tool call from buffer: {}", json_str);
                            if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
                                if let Some(args_obj) = tool_call.args.as_object() {
                                    // Validate - check for message-like keys
                                    let has_message_like_key = args_obj.keys().any(|key| {
                                        key.len() > 100 || key.contains('\n')
                                    });
                                    if !has_message_like_key {
                                        debug!("Successfully parsed JSON tool call from buffer: {:?}", tool_call);
                                        return Some(tool_call);
                                    }
                                }
                            }
                            break;
                        }
                    }
-                    _ => {}
+                    // Move past this tool call
                    search_start = abs_start + end_pos + 1;
                } else {
                    // Incomplete JSON, stop searching
                    break;
                }
            } else {
                // No more tool call patterns found
                break;
            }
        }
-
+        
-        None
+        tool_calls
    }
    /// Get the accumulated text content (excluding tool calls)
@@ -531,10 +494,83 @@ impl StreamingToolParser {
        self.message_stopped
    }
    /// Check if the text buffer contains an incomplete JSON tool call
    /// This detects cases where the LLM started emitting a tool call but the stream ended
    /// before the JSON was complete (truncated output)
    pub fn has_incomplete_tool_call(&self) -> bool {
        // Only check the unconsumed portion of the buffer
        let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
        if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
            let json_text = &unchecked_buffer[start_pos..];
            // If NOT complete, it's an incomplete tool call
            Self::find_complete_json_object_end(json_text).is_none()
        } else {
            false
        }
    }
    /// Check if the text buffer contains an unexecuted tool call
    /// This detects cases where the LLM emitted a complete tool call JSON
    /// but it wasn't parsed/executed (e.g., due to parsing issues)
    pub fn has_unexecuted_tool_call(&self) -> bool {
        // Only check the unconsumed portion of the buffer
        let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
        if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
            let json_text = &unchecked_buffer[start_pos..];
            // If the JSON IS complete, it means there's an unexecuted tool call
            if let Some(json_end) = Self::find_complete_json_object_end(json_text) {
                let json_only = &json_text[..=json_end];
                return serde_json::from_str::<serde_json::Value>(json_only).is_ok();
            }
        }
        false
    }
    /// Mark all tool calls up to the current buffer position as consumed/executed
    /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools
    pub fn mark_tool_calls_consumed(&mut self) {
        self.last_consumed_position = self.text_buffer.len();
    }
    /// Find the end position (byte index) of a complete JSON object in the text
    /// Returns None if no complete JSON object is found
    /// Find the end position (byte index) of a complete JSON object in the text
    pub fn find_complete_json_object_end(text: &str) -> Option<usize> {
        let mut brace_count = 0;
        let mut in_string = false;
        let mut escape_next = false;
        let mut found_start = false;
        for (i, ch) in text.char_indices() {
            if escape_next {
                escape_next = false;
                continue;
            }
            match ch {
                '\\' => escape_next = true,
                '"' if !escape_next => in_string = !in_string,
                '{' if !in_string => {
                    brace_count += 1;
                    found_start = true;
                }
                '}' if !in_string => {
                    brace_count -= 1;
                    if brace_count == 0 && found_start {
                        return Some(i); // Return the byte index of the closing brace
                    }
                }
                _ => {}
            }
        }
        None // No complete JSON object found
    }
    /// Reset the parser state for a new message
    pub fn reset(&mut self) {
        self.text_buffer.clear();
-        self.native_tool_calls.clear();
+        self.last_consumed_position = 0;
        self.message_stopped = false;
        self.in_json_tool_call = false;
        self.json_tool_start = None;
@@ -2743,7 +2779,7 @@ impl<W: UiWriter> Agent<W> {
    /// Manually trigger context summarization regardless of context window size
    /// Returns Ok(true) if summarization was successful, Ok(false) if it failed
    pub async fn force_summarize(&mut self) -> Result<bool> {
-        info!("Manual summarization triggered");
+        debug!("Manual summarization triggered");
        self.ui_writer.print_context_status(&format!(
            "\n🗜️ Manual summarization requested (current usage: {}%)...",
@@ -2861,7 +2897,7 @@ impl<W: UiWriter> Agent<W> {
    /// Manually trigger context thinning regardless of thresholds
    pub fn force_thin(&mut self) -> String {
-        info!("Manual context thinning triggered");
+        debug!("Manual context thinning triggered");
        let (message, chars_saved) = self.context_window.thin_context(self.session_id.as_deref());
        self.thinning_events.push(chars_saved);
        message
@@ -2870,7 +2906,7 @@ impl<W: UiWriter> Agent<W> {
    /// Manually trigger context thinning for the ENTIRE context window
    /// Unlike force_thin which only processes the first third, this processes all messages
    pub fn force_thin_all(&mut self) -> String {
-        info!("Manual full context skinnifying triggered");
+        debug!("Manual full context skinnifying triggered");
        let (message, chars_saved) = self.context_window.thin_context_all(self.session_id.as_deref());
        self.thinning_events.push(chars_saved);
        message
@@ -2879,7 +2915,7 @@ impl<W: UiWriter> Agent<W> {
    /// Reload README.md and AGENTS.md and replace the first system message
    /// Returns Ok(true) if README was found and reloaded, Ok(false) if no README was present initially
    pub fn reload_readme(&mut self) -> Result<bool> {
-        info!("Manual README reload triggered");
+        debug!("Manual README reload triggered");
        // Check if the second message in conversation history is a system message with README content
        // (The first message should always be the system prompt)
@@ -2922,7 +2958,7 @@ impl<W: UiWriter> Agent<W> {
            // Replace the second message (README) with the new content
            if let Some(first_msg) = self.context_window.conversation_history.get_mut(1) {
                first_msg.content = combined_content;
-                info!("README content reloaded successfully");
+                debug!("README content reloaded successfully");
                Ok(true)
            } else {
                Ok(false)
@@ -3156,7 +3192,7 @@ impl<W: UiWriter> Agent<W> {
            error!("Failed to clear continuation artifacts: {}", e);
        }
-        info!("Session cleared");
+        debug!("Session cleared");
    }
    /// Restore session from a continuation artifact
@@ -3201,7 +3237,7 @@ impl<W: UiWriter> Agent<W> {
                            });
                        }
-                        info!("Restored full context from session log");
+                        debug!("Restored full context from session log");
                        return Ok(true);
                    }
                }
@@ -3226,7 +3262,7 @@ impl<W: UiWriter> Agent<W> {
            });
        }
-        info!("Restored session from summary");
+        debug!("Restored session from summary");
        Ok(false)
    }
@@ -3836,7 +3872,7 @@ impl<W: UiWriter> Agent<W> {
            match provider.stream(request.clone()).await {
                Ok(stream) => {
                    if attempt > 1 {
-                        info!("Stream started successfully after {} attempts", attempt);
+                        debug!("Stream started successfully after {} attempts", attempt);
                    }
                    debug!("Stream started successfully");
                    debug!(
@@ -3886,9 +3922,9 @@ impl<W: UiWriter> Agent<W> {
        let mut response_started = false;
        let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
        let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
-        const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 2; // Limit auto-summary retries
+        const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery)
        let mut final_output_called = false; // Track if final_output was called
-        let mut executed_tools_in_session: std::collections::HashSet<String> = std::collections::HashSet::new(); // Track executed tools to prevent duplicates
+        // Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG)
        // Check if we need to summarize before starting
        if self.context_window.should_summarize() {
@@ -4189,77 +4225,51 @@ impl<W: UiWriter> Agent<W> {
                        };
                        // De-duplicate tool calls and track duplicates
-                        let mut seen_in_chunk: Vec<ToolCall> = Vec::new();
+                        let mut last_tool_in_chunk: Option<ToolCall> = None;
                        let mut deduplicated_tools: Vec<(ToolCall, Option<String>)> = Vec::new();
                        for tool_call in tools_to_process {
                            let mut duplicate_type = None;
-                            // Check for duplicates in current chunk
+                            // Check for IMMEDIATELY SEQUENTIAL duplicate in current chunk
-                            if seen_in_chunk
+                            // Only the immediately previous tool call counts as a duplicate
-                                .iter()
+                            if let Some(ref last_tool) = last_tool_in_chunk {
-                                .any(|tc| are_duplicates(tc, &tool_call))
+                                if are_duplicates(last_tool, &tool_call) {
                            {
                                duplicate_type = Some("DUP IN CHUNK".to_string());
                                }
                            } else {
-                                // Check for duplicate against previous message in history
+                                // Check for IMMEDIATELY SEQUENTIAL duplicate against previous message
-                                // Look at the last assistant message that contains tool calls
+                                // Only mark as duplicate if the LAST tool call in the previous message
                                // matches AND there's no significant text after it
                                let mut found_in_prev = false;
                                for msg in self.context_window.conversation_history.iter().rev() {
                                    if matches!(msg.role, MessageRole::Assistant) {
-                                        // Try to parse tool calls from the message content
+                                        // Find the LAST tool call in the message
-                                        if msg.content.contains(r#"\"tool\""#) {
+                                        let content = &msg.content;
-                                            // Simple JSON extraction for tool calls
+                                        
-                                            let content = &msg.content;
+                                        // Look for the last occurrence of a tool call pattern
-                                            let mut start_idx = 0;
+                                        if let Some(last_tool_start) = content.rfind(r#"{"tool""#)
-                                            while let Some(tool_start) =
+                                            .or_else(|| content.rfind(r#"{ "tool""#))
-                                                content[start_idx..].find(r#"{\"tool\""#)
+                                        {
-                                            {
+                                            // Find the end of this JSON object
-                                                let tool_start = start_idx + tool_start;
+                                            if let Some(end_offset) = StreamingToolParser::find_complete_json_object_end(&content[last_tool_start..]) {
-                                                // Find the end of this JSON object
+                                                let end_idx = last_tool_start + end_offset + 1;
-                                                let mut brace_count = 0;
+                                                let tool_json = &content[last_tool_start..end_idx];
-                                                let mut in_string = false;
+                                                
-                                                let mut escape_next = false;
+                                                // Check if there's any non-whitespace text after this tool call
-                                                let mut end_idx = tool_start;
+                                                let text_after = content[end_idx..].trim();
-
+                                                let has_text_after = !text_after.is_empty();
-                                                for (i, ch) in content[tool_start..].char_indices()
+                                                
-                                                {
+                                                // Only consider it a duplicate if:
-                                                    if escape_next {
+                                                // 1. The tool call matches
-                                                        escape_next = false;
+                                                // 2. There's no text after it (it was the last thing in the message)
-                                                        continue;
+                                                if !has_text_after {
-                                                    }
+                                                    if let Ok(prev_tool) = serde_json::from_str::<ToolCall>(tool_json) {
                                                    if ch == '\\' && in_string {
                                                        escape_next = true;
                                                        continue;
                                                    }
                                                    if ch == '"' && !escape_next {
                                                        in_string = !in_string;
                                                    }
                                                    if !in_string {
                                                        if ch == '{' {
                                                            brace_count += 1;
                                                        } else if ch == '}' {
                                                            brace_count -= 1;
                                                            if brace_count == 0 {
                                                                end_idx = tool_start + i + 1;
                                                                break;
                                                            }
                                                        }
                                                    }
                                                }
                                                if end_idx > tool_start {
                                                    let tool_json = &content[tool_start..end_idx];
                                                    if let Ok(prev_tool) =
                                                        serde_json::from_str::<ToolCall>(tool_json)
                                                    {
                                                        if are_duplicates(&prev_tool, &tool_call) {
                                                            found_in_prev = true;
                                                            break;
                                                        }
                                                    }
                                                }
                                                start_idx = end_idx;
                                            }
                                        }
                                        // Only check the most recent assistant message
@@ -4272,13 +4282,8 @@ impl<W: UiWriter> Agent<W> {
                                }
                            }
-                            // Add to seen list if not a duplicate in chunk
+                            // Track the last tool call for sequential duplicate detection
-                            if duplicate_type
+                            last_tool_in_chunk = Some(tool_call.clone());
                                .as_ref()
                                .map_or(true, |s| s != "DUP IN CHUNK")
                            {
                                seen_in_chunk.push(tool_call.clone());
                            }
                            deduplicated_tools.push((tool_call, duplicate_type));
                        }
@@ -4286,22 +4291,11 @@ impl<W: UiWriter> Agent<W> {
                        // Process each tool call
                        for (tool_call, duplicate_type) in deduplicated_tools {
                            debug!("Processing completed tool call: {:?}", tool_call);
                            // Mark that we detected a tool call - this prevents content from being printed
                            // even if the tool is skipped as a duplicate
                            tool_executed = true;
                            // Check if this tool was already executed in this session
                            let tool_key = format!("{}:{}", tool_call.tool, serde_json::to_string(&tool_call.args).unwrap_or_default());
                            if executed_tools_in_session.contains(&tool_key) {
                                // Log the duplicate with red prefix
                                let prefixed_tool_name = format!("🟥 {} DUP IN SESSION", tool_call.tool);
                                let warning_msg = format!(
                                    "⚠️ Duplicate tool call detected (already executed in session): Skipping {} with args {}",
                                    tool_call.tool,
                                    serde_json::to_string(&tool_call.args).unwrap_or_else(|_| "<unserializable>".to_string())
                                );
                                let mut modified_tool_call = tool_call.clone();
                                modified_tool_call.tool = prefixed_tool_name;
                                debug!("{}", warning_msg);
                                continue; // Skip execution of duplicate
                            }
                            // If it's a duplicate, log it and return a warning
                            if let Some(dup_type) = &duplicate_type {
@@ -4639,15 +4633,25 @@ impl<W: UiWriter> Agent<W> {
                            tool_executed = true;
                            any_tool_executed = true; // Track across all iterations
-                            // Add to executed tools set to prevent re-execution in this session
+                            // Reset auto-continue attempts after successful tool execution
-                            executed_tools_in_session.insert(tool_key.clone());
+                            // This gives the LLM fresh attempts since it's making progress
                            auto_summary_attempts = 0;
                            // Reset the JSON tool call filter state after each tool execution
                            // This ensures the filter doesn't stay in suppression mode for subsequent streaming content
                            self.ui_writer.reset_json_filter();
-                            // Reset parser for next iteration - this clears the text buffer
+                            // Only reset parser if there are no more unexecuted tool calls in the buffer
-                            parser.reset();
+                            // This handles the case where the LLM emits multiple tool calls in one response
                            if parser.has_unexecuted_tool_call() {
                                debug!("Parser still has unexecuted tool calls, not resetting buffer");
                                // Mark current tool as consumed so we don't re-detect it
                                parser.mark_tool_calls_consumed();
                            } else {
                                // Reset parser for next iteration - this clears the text buffer
                                parser.reset();
                            }
                            // Clear current_response for next iteration to prevent buffered text
                            // from being incorrectly displayed after tool execution
@@ -4662,8 +4666,14 @@ impl<W: UiWriter> Agent<W> {
                        } // End of for loop processing each tool call
                        // If we processed any tools in multiple mode, break out to start new stream
                        // BUT only if there are no more unexecuted tool calls in the buffer
                        if tool_executed && self.config.agent.allow_multiple_tool_calls {
-                            break;
+                            if parser.has_unexecuted_tool_call() {
                                debug!("Tool executed but parser still has unexecuted tool calls, continuing to process");
                                // Don't break - continue processing to pick up remaining tool calls
                            } else {
                                break;
                            }
                        }
                        // If no tool calls were completed, continue streaming normally
@@ -4753,7 +4763,7 @@ impl<W: UiWriter> Agent<W> {
                                        "  - Text buffer content: {:?}",
                                        parser.get_text_content()
                                    );
-                                    error!("  - Native tool calls: {:?}", parser.native_tool_calls);
+                                    error!("  - Has incomplete tool call: {}", parser.has_incomplete_tool_call());
                                    error!("  - Message stopped: {}", parser.is_message_stopped());
                                    error!("  - In JSON tool call: {}", parser.in_json_tool_call);
                                    error!("  - JSON tool start: {:?}", parser.json_tool_start);
@@ -4831,6 +4841,17 @@ impl<W: UiWriter> Agent<W> {
                                    ));
                                }
                                // If tools were executed in previous iterations but final_output wasn't called,
                                // break to let the outer loop's auto-continue logic handle it
                                if any_tool_executed && !final_output_called {
                                    debug!("Tools were executed but final_output not called - breaking to auto-continue");
                                    // Add the text response to context before breaking
                                    if has_text_response && !current_response.trim().is_empty() {
                                        full_response = current_response.clone();
                                    }
                                    break;
                                }
                                // Set full_response to current_response (don't append)
                                // current_response already contains everything that was displayed
                                // Don't set full_response here - it would duplicate the output
@@ -4873,8 +4894,8 @@ impl<W: UiWriter> Agent<W> {
                        );
                        error!("Error type: {}", std::any::type_name_of_val(&e));
-                        error!("Parser state at error: text_buffer_len={}, native_tool_calls={}, message_stopped={}",
+                        error!("Parser state at error: text_buffer_len={}, has_incomplete={}, message_stopped={}",
-                            parser.text_buffer_len(), parser.native_tool_calls.len(), parser.is_message_stopped());
+                            parser.text_buffer_len(), parser.has_incomplete_tool_call(), parser.is_message_stopped());
                        // Store the error for potential logging later
                        _last_error = Some(error_details.clone());
@@ -4893,7 +4914,7 @@ impl<W: UiWriter> Agent<W> {
                            // If we have any content or tool calls, treat this as a graceful end
                            if chunks_received > 0
                                && (!parser.get_text_content().is_empty()
-                                    || parser.native_tool_calls.len() > 0)
+                                    || parser.has_unexecuted_tool_call())
                            {
                                warn!("Stream terminated unexpectedly but we have content, continuing");
                                break; // Break to process what we have
@@ -4941,18 +4962,77 @@ impl<W: UiWriter> Agent<W> {
                let has_response = !current_response.is_empty() || !full_response.is_empty();
                // Check if the response is essentially empty (just whitespace or timing lines)
                // This detects cases where the LLM outputs nothing substantive
                let response_text = if !current_response.is_empty() {
                    &current_response
                } else {
                    &full_response
                };
                let is_empty_response = response_text.trim().is_empty() 
                    || response_text.lines().all(|line| line.trim().is_empty() || line.trim().starts_with("⏱️"));
                // Check if there's an incomplete tool call in the buffer
                let has_incomplete_tool_call = parser.has_incomplete_tool_call();
                // Check if there's a complete but unexecuted tool call in the buffer
                let has_unexecuted_tool_call = parser.has_unexecuted_tool_call();
                // Log when we detect unexecuted or incomplete tool calls for debugging
                if has_incomplete_tool_call {
                    debug!("Detected incomplete tool call in buffer (buffer_len={}, consumed_up_to={})",
                        parser.text_buffer_len(), parser.text_buffer_len());
                }
                if has_unexecuted_tool_call {
                    debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue");
                    warn!("Unexecuted tool call detected in buffer after stream ended");
                }
                // Auto-continue if tools were executed but final_output was never called
-                // This is the simple rule: LLM must call final_output before returning control
+                // OR if the LLM emitted an incomplete tool call (truncated JSON)
-                if any_tool_executed && !final_output_called {
+                // OR if the LLM emitted a complete tool call that wasn't executed
                // This ensures we don't return control when the LLM clearly intended to call a tool
                // Note: We removed the redundant condition (any_tool_executed && is_empty_response)
                // because it's already covered by (any_tool_executed && !final_output_called)
                let should_auto_continue = (any_tool_executed && !final_output_called) 
                    || has_incomplete_tool_call 
                    || has_unexecuted_tool_call;
                if should_auto_continue {
                    if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
                        auto_summary_attempts += 1;
-                        warn!(
+                        if has_incomplete_tool_call {
-                            "LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})",
+                            warn!(
-                            iteration_count, auto_summary_attempts
+                                "LLM emitted incomplete tool call ({} iterations, auto-continue attempt {}/{})",
-                        );
+                                iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
-                        self.ui_writer.print_context_status(
+                            );
-                            "\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
+                            self.ui_writer.print_context_status(
-                        );
+                                "\n🔄 Model emitted incomplete tool call. Auto-continuing...\n"
                            );
                        } else if has_unexecuted_tool_call {
                            warn!(
                                "LLM emitted unexecuted tool call ({} iterations, auto-continue attempt {}/{})",
                                iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
                            );
                            self.ui_writer.print_context_status(
                                "\n🔄 Model emitted tool call that wasn't executed. Auto-continuing...\n"
                            );
                        } else if is_empty_response {
                            warn!(
                                "LLM emitted empty/trivial response ({} iterations, auto-continue attempt {}/{})",
                                iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
                            );
                            self.ui_writer.print_context_status(
                                "\n🔄 Model emitted empty response. Auto-continuing...\n"
                            );
                        } else {
                            warn!(
                                "LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {}/{})",
                                iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
                            );
                            self.ui_writer.print_context_status(
                                "\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
                            );
                        }
                        // Add any text response to context before prompting for continuation
                        if has_response {
@@ -4971,10 +5051,17 @@ impl<W: UiWriter> Agent<W> {
                        }
                        // Add a follow-up message asking for continuation
-                        let continue_prompt = Message::new(
+                        let continue_prompt = if has_incomplete_tool_call {
-                            MessageRole::User,
+                            Message::new(
-                            "Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
+                                MessageRole::User,
-                        );
+                                "Your previous response was cut off mid-tool-call. Please complete the tool call and continue.".to_string(),
                            )
                        } else {
                            Message::new(
                                MessageRole::User,
                                "Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
                            )
                        };
                        self.context_window.add_message(continue_prompt);
                        request.messages = self.context_window.conversation_history.clone();
@@ -4983,11 +5070,17 @@ impl<W: UiWriter> Agent<W> {
                    } else {
                        // Max attempts reached, give up gracefully
                        warn!(
-                            "Max auto-continue attempts ({}) reached, returning without final_output",
+                            "Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, final_output_called={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
-                            MAX_AUTO_SUMMARY_ATTEMPTS
+                            MAX_AUTO_SUMMARY_ATTEMPTS,
                            iteration_count,
                            any_tool_executed,
                            final_output_called,
                            has_incomplete_tool_call,
                            has_unexecuted_tool_call,
                            is_empty_response
                        );
                        self.ui_writer.print_agent_response(
-                            "\n⚠️ The model stopped without calling final_output after multiple attempts.\n"
+                            &format!("\n⚠️ The model stopped without calling final_output after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
                        );
                    }
                } else if has_response {
@@ -6434,7 +6527,7 @@ impl<W: UiWriter> Agent<W> {
                        let driver = mutex.into_inner();
                        match driver.quit().await {
                            Ok(_) => {
-                                info!("WebDriver session closed successfully");
+                                debug!("WebDriver session closed successfully");
                                // Kill the safaridriver process
                                if let Some(mut process) =
@@ -6443,7 +6536,7 @@ impl<W: UiWriter> Agent<W> {
                                    if let Err(e) = process.kill().await {
                                        warn!("Failed to kill safaridriver process: {}", e);
                                    } else {
-                                        info!("Safaridriver process terminated");
+                                        debug!("Safaridriver process terminated");
                                    }
                                }
--- a/crates/g3-core/src/retry.rs
+++ b/crates/g3-core/src/retry.rs
@@ -10,7 +10,7 @@ use crate::ui_writer::UiWriter;
 use crate::{Agent, DiscoveryOptions, TaskResult};
 use anyhow::Result;
 use std::time::Instant;
-use tracing::{info, warn};
+use tracing::{debug, warn};
 /// Configuration for retry behavior
 #[derive(Debug, Clone)]
@@ -142,7 +142,7 @@ where
        match result {
            Ok(task_result) => {
                if retry_count > 0 {
-                    info!(
+                    debug!(
                        "{} task succeeded after {} retries (elapsed: {:?})",
                        config.role_name,
                        retry_count,
@@ -259,7 +259,7 @@ where
        match operation().await {
            Ok(result) => {
                if retry_count > 0 {
-                    info!(
+                    debug!(
                        "Operation '{}' succeeded after {} retries",
                        operation_name, retry_count
                    );
--- a/crates/g3-core/src/session_continuation.rs
+++ b/crates/g3-core/src/session_continuation.rs
@@ -6,7 +6,7 @@
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
 use std::path::{Path, PathBuf};
-use tracing::{debug, error, info, warn};
+use tracing::{debug, error, warn};
 /// Version of the session continuation format
 const CONTINUATION_VERSION: &str = "1.0";
@@ -89,7 +89,7 @@ pub fn save_continuation(continuation: &SessionContinuation) -> Result<PathBuf>
    let json = serde_json::to_string_pretty(continuation)?;
    std::fs::write(&latest_path, &json)?;
-    info!("Saved session continuation to {:?}", latest_path);
+    debug!("Saved session continuation to {:?}", latest_path);
    Ok(latest_path)
 }
@@ -113,7 +113,7 @@ pub fn load_continuation() -> Result<Option<SessionContinuation>> {
        );
    }
-    info!("Loaded session continuation from {:?}", latest_path);
+    debug!("Loaded session continuation from {:?}", latest_path);
    Ok(Some(continuation))
 }
@@ -131,7 +131,7 @@ pub fn clear_continuation() -> Result<()> {
                debug!("Removed session file: {:?}", path);
            }
        }
-        info!("Cleared session continuation artifacts");
+        debug!("Cleared session continuation artifacts");
    }
    Ok(())
--- a/crates/g3-core/tests/auto_continue_test.rs
+++ b/crates/g3-core/tests/auto_continue_test.rs
@@ -0,0 +1,234 @@
 //! Tests for the auto-continue detection features
 //!
 //! These tests verify the logic used to detect when the LLM should auto-continue:
 //! 1. Empty/trivial responses (just timing lines)
 //! 2. Incomplete tool calls
 //! 3. Unexecuted tool calls
 //! 4. Missing final_output after tool execution
 /// Helper function to check if a response is considered "empty" or trivial
 /// This mirrors the logic in lib.rs for detecting empty responses
 fn is_empty_response(response_text: &str) -> bool {
    response_text.trim().is_empty()
        || response_text.lines().all(|line| {
            line.trim().is_empty() || line.trim().starts_with("⏱️")
        })
 }
 #[test]
 fn test_empty_response_detection_empty_string() {
    assert!(is_empty_response(""));
 }
 #[test]
 fn test_empty_response_detection_whitespace_only() {
    assert!(is_empty_response("   "));
    assert!(is_empty_response("\n\n\n"));
    assert!(is_empty_response("  \n  \t  \n  "));
 }
 #[test]
 fn test_empty_response_detection_timing_line_only() {
    assert!(is_empty_response("⏱️ 43.0s | 💭 3.6s"));
    assert!(is_empty_response("  ⏱️ 43.0s | 💭 3.6s  "));
    assert!(is_empty_response("\n⏱️ 43.0s | 💭 3.6s\n"));
 }
 #[test]
 fn test_empty_response_detection_multiple_timing_lines() {
    let response = "\n⏱️ 10.0s | 💭 1.0s\n\n⏱️ 20.0s | 💭 2.0s\n";
    assert!(is_empty_response(response));
 }
 #[test]
 fn test_empty_response_detection_timing_with_empty_lines() {
    let response = "\n\n⏱️ 43.0s | 💭 3.6s\n\n";
    assert!(is_empty_response(response));
 }
 #[test]
 fn test_empty_response_detection_substantive_content() {
    // These should NOT be considered empty
    assert!(!is_empty_response("Hello, I will help you."));
    assert!(!is_empty_response("Let me read that file."));
    assert!(!is_empty_response("I've completed the task."));
 }
 #[test]
 fn test_empty_response_detection_timing_with_text() {
    // If there's any substantive text, it's not empty
    let response = "⏱️ 43.0s | 💭 3.6s\nHere is the result.";
    assert!(!is_empty_response(response));
 }
 #[test]
 fn test_empty_response_detection_text_before_timing() {
    let response = "Done!\n⏱️ 43.0s | 💭 3.6s";
    assert!(!is_empty_response(response));
 }
 #[test]
 fn test_empty_response_detection_json_tool_call() {
    // A JSON tool call is definitely not empty
    let response = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
    assert!(!is_empty_response(response));
 }
 #[test]
 fn test_empty_response_detection_partial_json() {
    // Even partial JSON is not empty
    let response = r#"{"tool": "read_file", "args": {"#;
    assert!(!is_empty_response(response));
 }
 #[test]
 fn test_empty_response_detection_markdown() {
    // Markdown content is not empty
    let response = "# Summary\n\nI completed the task.";
    assert!(!is_empty_response(response));
 }
 #[test]
 fn test_empty_response_detection_code_block() {
    // Code blocks are not empty
    let response = "```rust\nfn main() {}\n```";
    assert!(!is_empty_response(response));
 }
 // Test the MAX_AUTO_SUMMARY_ATTEMPTS constant value
 // This is a compile-time check that the constant exists and has the expected value
 #[test]
 fn test_max_auto_summary_attempts_is_reasonable() {
    // The constant should be at least 3 to give the LLM a fair chance to recover
    // We can't directly access the constant from here, but we document the expected value
    // Current value: 5 (increased from 2)
    const EXPECTED_MIN_ATTEMPTS: usize = 3;
    const EXPECTED_MAX_ATTEMPTS: usize = 10;
    const CURRENT_VALUE: usize = 5;
    assert!(CURRENT_VALUE >= EXPECTED_MIN_ATTEMPTS, 
        "MAX_AUTO_SUMMARY_ATTEMPTS should be at least {} for reliable recovery", EXPECTED_MIN_ATTEMPTS);
    assert!(CURRENT_VALUE <= EXPECTED_MAX_ATTEMPTS,
        "MAX_AUTO_SUMMARY_ATTEMPTS should not exceed {} to avoid infinite loops", EXPECTED_MAX_ATTEMPTS);
 }
 // =============================================================================
 // Test: Auto-continue condition logic
 // =============================================================================
 /// Simulates the should_auto_continue logic from lib.rs
 fn should_auto_continue(
    any_tool_executed: bool,
    final_output_called: bool,
    has_incomplete_tool_call: bool,
    has_unexecuted_tool_call: bool,
    is_empty_response: bool,
 ) -> bool {
    (any_tool_executed && !final_output_called)
        || has_incomplete_tool_call
        || has_unexecuted_tool_call
        || (any_tool_executed && is_empty_response)
 }
 #[test]
 fn test_auto_continue_after_tool_no_final_output() {
    // Tool executed but no final_output - should continue
    assert!(should_auto_continue(
        true,  // any_tool_executed
        false, // final_output_called
        false, // has_incomplete_tool_call
        false, // has_unexecuted_tool_call
        false, // is_empty_response
    ));
 }
 #[test]
 fn test_auto_continue_with_final_output() {
    // Tool executed AND final_output called - should NOT continue
    assert!(!should_auto_continue(
        true,  // any_tool_executed
        true,  // final_output_called
        false, // has_incomplete_tool_call
        false, // has_unexecuted_tool_call
        false, // is_empty_response
    ));
 }
 #[test]
 fn test_auto_continue_incomplete_tool_call() {
    // Incomplete tool call - should continue regardless of other flags
    assert!(should_auto_continue(
        false, // any_tool_executed
        false, // final_output_called
        true,  // has_incomplete_tool_call
        false, // has_unexecuted_tool_call
        false, // is_empty_response
    ));
 }
 #[test]
 fn test_auto_continue_unexecuted_tool_call() {
    // Unexecuted tool call - should continue
    assert!(should_auto_continue(
        false, // any_tool_executed
        false, // final_output_called
        false, // has_incomplete_tool_call
        true,  // has_unexecuted_tool_call
        false, // is_empty_response
    ));
 }
 #[test]
 fn test_auto_continue_empty_response_after_tool() {
    // Empty response after tool execution - should continue
    assert!(should_auto_continue(
        true,  // any_tool_executed
        false, // final_output_called
        false, // has_incomplete_tool_call
        false, // has_unexecuted_tool_call
        true,  // is_empty_response
    ));
 }
 #[test]
 fn test_auto_continue_empty_response_no_tool() {
    // Empty response but no tool executed - should NOT continue
    // (This is a normal case where LLM just didn't respond)
    assert!(!should_auto_continue(
        false, // any_tool_executed
        false, // final_output_called
        false, // has_incomplete_tool_call
        false, // has_unexecuted_tool_call
        true,  // is_empty_response
    ));
 }
 #[test]
 fn test_auto_continue_no_conditions_met() {
    // No tools, no incomplete calls, substantive response - should NOT continue
    assert!(!should_auto_continue(
        false, // any_tool_executed
        false, // final_output_called
        false, // has_incomplete_tool_call
        false, // has_unexecuted_tool_call
        false, // is_empty_response
    ));
 }
 // =============================================================================
 // Test: Redundant condition detection
 // =============================================================================
 #[test]
 fn test_redundant_empty_response_condition() {
    // This test documents that (any_tool_executed && is_empty_response) is redundant
    // when (any_tool_executed && !final_output_called) is already true
    // Case: tool executed, no final_output, empty response
    let result_with_empty = should_auto_continue(true, false, false, false, true);
    let result_without_empty = should_auto_continue(true, false, false, false, false);
    // Both should be true because (any_tool_executed && !final_output_called) is true
    assert_eq!(result_with_empty, result_without_empty, 
        "The is_empty_response condition is redundant when any_tool_executed && !final_output_called");
 }
--- a/crates/g3-core/tests/duplicate_detection_test.rs
+++ b/crates/g3-core/tests/duplicate_detection_test.rs
@@ -0,0 +1,231 @@
 //! Tests for tool call duplicate detection
 //!
 //! These tests ensure that duplicate detection only catches IMMEDIATELY SEQUENTIAL
 //! duplicates, not legitimate re-use of tools with text between them.
 use g3_core::StreamingToolParser;
 use g3_providers::CompletionChunk;
 // Helper to create a chunk
 fn chunk(content: &str, finished: bool) -> CompletionChunk {
    CompletionChunk {
        content: content.to_string(),
        finished,
        tool_calls: None,
        usage: None,
    }
 }
 // =============================================================================
 // Test: find_complete_json_object_end helper function
 // =============================================================================
 #[test]
 fn test_find_complete_json_object_end_simple() {
    let json = r#"{"tool": "test", "args": {}}"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_some(), "Should find end of complete JSON");
    assert_eq!(end.unwrap(), json.len() - 1, "End should be at last character");
 }
 #[test]
 fn test_find_complete_json_object_end_nested() {
    let json = r#"{"tool": "test", "args": {"nested": {"deep": true}}}"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_some(), "Should find end of nested JSON");
    assert_eq!(end.unwrap(), json.len() - 1);
 }
 #[test]
 fn test_find_complete_json_object_end_with_trailing_text() {
    let json = r#"{"tool": "test", "args": {}} some text after"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_some(), "Should find end of JSON even with trailing text");
    // The end should be at the closing brace, not at the end of the string
    let end_pos = end.unwrap();
    assert_eq!(&json[end_pos..end_pos+1], "}", "End should be at closing brace");
 }
 #[test]
 fn test_find_complete_json_object_end_incomplete() {
    let json = r#"{"tool": "test", "args": {"#;
    let end = StreamingToolParser::find_complete_json_object_end(json);
    assert!(end.is_none(), "Should return None for incomplete JSON");
 }
 // =============================================================================
 // Test: Tool calls separated by text should NOT be duplicates
 // =============================================================================
 #[test]
 fn test_same_tool_with_text_between_not_duplicate() {
    // This tests the scenario where the LLM calls the same tool twice
    // but with explanatory text between them - this should NOT be a duplicate
    let mut parser = StreamingToolParser::new();
    // First tool call
    let content1 = r#"{"tool": "todo_read", "args": {}}"#;
    let tools1 = parser.process_chunk(&chunk(content1, true));
    assert_eq!(tools1.len(), 1, "First tool call should be detected");
    assert_eq!(tools1[0].tool, "todo_read");
    // Reset parser (simulating what happens after tool execution)
    parser.reset();
    // Some text, then the same tool call again
    let content2 = r#"Now let me check the TODO again to verify my changes.
 {"tool": "todo_read", "args": {}}"#;
    let tools2 = parser.process_chunk(&chunk(content2, true));
    // The second tool call should be detected - it's NOT a duplicate
    // because there's text before it
    assert_eq!(tools2.len(), 1, "Second tool call should be detected (not a duplicate)");
    assert_eq!(tools2[0].tool, "todo_read");
 }
 #[test]
 fn test_different_tools_back_to_back_not_duplicate() {
    let mut parser = StreamingToolParser::new();
    // Two different tool calls back to back
    let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
 {"tool": "shell", "args": {"command": "ls"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // Both should be detected - they're different tools
    assert!(tools.len() >= 1, "Should detect tool calls");
    // At minimum, the first one should be detected
    assert_eq!(tools[0].tool, "read_file");
 }
 #[test]
 fn test_same_tool_different_args_not_duplicate() {
    let mut parser = StreamingToolParser::new();
    // Same tool but different arguments - NOT a duplicate
    let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
 {"tool": "read_file", "args": {"file_path": "b.txt"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // Both should be detected - different args means not a duplicate
    assert!(tools.len() >= 1, "Should detect tool calls");
 }
 // =============================================================================
 // Test: Immediately sequential identical tool calls ARE duplicates
 // =============================================================================
 #[test]
 fn test_identical_tool_calls_back_to_back_are_duplicates() {
    // This tests the scenario where the LLM stutters and outputs
    // the exact same tool call twice in a row - this IS a duplicate
    let mut parser = StreamingToolParser::new();
    // Two identical tool calls with no text between them
    let content = r#"{"tool": "todo_read", "args": {}}
 {"tool": "todo_read", "args": {}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // The parser should detect both, but the deduplication logic
    // (which happens at a higher level in the agent) should mark
    // the second one as a duplicate
    // Here we just verify both are parsed
    assert!(tools.len() >= 1, "Should detect at least one tool call");
 }
 // =============================================================================
 // Test: Text content detection for duplicate logic
 // =============================================================================
 #[test]
 fn test_has_text_after_tool_call() {
    // Helper test to verify we can detect text after a tool call
    let content_with_text = r#"{"tool": "test", "args": {}} Some text after"#;
    let content_without_text = r#"{"tool": "test", "args": {}}"#;
    let content_with_whitespace_only = r#"{"tool": "test", "args": {}}   
  "#;
    // Find the end of the JSON in each case
    let end1 = StreamingToolParser::find_complete_json_object_end(content_with_text).unwrap();
    let end2 = StreamingToolParser::find_complete_json_object_end(content_without_text).unwrap();
    let end3 = StreamingToolParser::find_complete_json_object_end(content_with_whitespace_only).unwrap();
    // Check what's after the JSON
    let after1 = content_with_text[end1 + 1..].trim();
    let after2 = content_without_text.get(end2 + 1..).unwrap_or("").trim();
    let after3 = content_with_whitespace_only[end3 + 1..].trim();
    assert!(!after1.is_empty(), "Should have text after tool call");
    assert!(after2.is_empty(), "Should have no text after tool call");
    assert!(after3.is_empty(), "Whitespace-only should count as no text");
 }
 // =============================================================================
 // Test: Edge cases
 // =============================================================================
 #[test]
 fn test_tool_call_with_newlines_between() {
    let mut parser = StreamingToolParser::new();
    // Tool calls separated by multiple newlines (but no actual text)
    // This SHOULD be considered a duplicate since there's no meaningful text
    let content = r#"{"tool": "todo_read", "args": {}}
 {"tool": "todo_read", "args": {}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    assert!(tools.len() >= 1, "Should detect at least one tool call");
 }
 #[test]
 fn test_tool_call_with_whitespace_text_between() {
    let mut parser = StreamingToolParser::new();
    // Tool calls separated by text that's just whitespace and punctuation
    // The key is whether there's "meaningful" text
    let content = r#"{"tool": "todo_read", "args": {}}
 OK, now again:
 {"tool": "todo_read", "args": {}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // Both should be detected since there's text between them
    assert!(tools.len() >= 1, "Should detect tool calls");
 }
 #[test]
 fn test_tool_call_in_middle_of_text() {
    let mut parser = StreamingToolParser::new();
    // Tool call surrounded by text
    let content = r#"Let me read the file first.
 {"tool": "read_file", "args": {"file_path": "test.txt"}}
 Now I'll analyze the contents."#;
    let tools = parser.process_chunk(&chunk(content, true));
    assert_eq!(tools.len(), 1, "Should detect the tool call");
    assert_eq!(tools[0].tool, "read_file");
 }
 #[test]
 fn test_multiple_different_tool_calls_with_text() {
    let mut parser = StreamingToolParser::new();
    // Multiple different tool calls with text between each
    let content = r#"First, let me read the file:
 {"tool": "read_file", "args": {"file_path": "test.txt"}}
 Now let me check the TODO:
 {"tool": "todo_read", "args": {}}
 Finally, let me run a command:
 {"tool": "shell", "args": {"command": "ls"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // All three should be detected
    assert!(tools.len() >= 1, "Should detect tool calls");
 }
--- a/crates/g3-core/tests/incomplete_tool_call_test.rs
+++ b/crates/g3-core/tests/incomplete_tool_call_test.rs
@@ -0,0 +1,182 @@
 //! Tests for the incomplete tool call detection feature
 use g3_core::StreamingToolParser;
 use g3_providers::CompletionChunk;
 #[test]
 fn test_has_incomplete_tool_call_empty_buffer() {
    let parser = StreamingToolParser::new();
    assert!(!parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_has_incomplete_tool_call_no_tool_pattern() {
    let mut parser = StreamingToolParser::new();
    let chunk = CompletionChunk {
        content: "Hello, I will help you with that.".to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    assert!(!parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_has_incomplete_tool_call_complete_tool_call() {
    let mut parser = StreamingToolParser::new();
    let chunk = CompletionChunk {
        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON should NOT be detected as incomplete
    assert!(!parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_has_incomplete_tool_call_truncated_tool_call() {
    let mut parser = StreamingToolParser::new();
    // Simulate truncated tool call - missing closing braces
    let chunk = CompletionChunk {
        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_has_incomplete_tool_call_truncated_mid_value() {
    let mut parser = StreamingToolParser::new();
    // Simulate truncated tool call - cut off mid-value
    let chunk = CompletionChunk {
        content: r#"{"tool": "shell", "args": {"command": "cargo test --package g3-cli --test filter_json_test test_streaming -- --test-threads=1 2>&1 | tail"#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_has_incomplete_tool_call_with_text_before() {
    let mut parser = StreamingToolParser::new();
    // Text before the incomplete tool call
    let chunk = CompletionChunk {
        content: r#"Let me read that file for you.
 {"tool": "read_file", "args": {"file_path":"#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should be detected
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_has_incomplete_tool_call_malformed_like_trace() {
    let mut parser = StreamingToolParser::new();
    // This simulates a truncated tool call where the stream ended mid-JSON
    // The actual trace showed truncated output, not malformed characters
    let chunk = CompletionChunk {
        content: r#"{"tool": "read_file", "args": {"file_path":"src/engine.rkt""#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Truncated JSON (missing closing braces) should be detected as incomplete
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_has_unexecuted_tool_call_empty_buffer() {
    let parser = StreamingToolParser::new();
    assert!(!parser.has_unexecuted_tool_call());
 }
 #[test]
 fn test_has_unexecuted_tool_call_no_tool_pattern() {
    let mut parser = StreamingToolParser::new();
    let chunk = CompletionChunk {
        content: "Hello, I will help you with that.".to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    assert!(!parser.has_unexecuted_tool_call());
 }
 #[test]
 fn test_has_unexecuted_tool_call_complete_tool_call() {
    let mut parser = StreamingToolParser::new();
    let chunk = CompletionChunk {
        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call that wasn't executed should be detected
    assert!(parser.has_unexecuted_tool_call());
 }
 #[test]
 fn test_has_unexecuted_tool_call_incomplete_json() {
    let mut parser = StreamingToolParser::new();
    let chunk = CompletionChunk {
        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted)
    assert!(!parser.has_unexecuted_tool_call());
 }
 #[test]
 fn test_has_unexecuted_tool_call_with_trailing_text() {
    let mut parser = StreamingToolParser::new();
    // Complete JSON tool call followed by trailing text
    let chunk = CompletionChunk {
        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
 Some trailing text after the JSON"#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call should be detected even with trailing text
    assert!(parser.has_unexecuted_tool_call());
 }
 #[test]
 fn test_has_unexecuted_tool_call_with_text_before_and_after() {
    let mut parser = StreamingToolParser::new();
    let chunk = CompletionChunk {
        content: r#"Let me read that file.
 {"tool": "shell", "args": {"command": "ls -la"}}
 I'll execute this command now."#.to_string(),
        finished: false,
        tool_calls: None,
        usage: None,
    };
    parser.process_chunk(&chunk);
    // Complete JSON tool call should be detected
    assert!(parser.has_unexecuted_tool_call());
 }
--- a/crates/g3-core/tests/streaming_parser_test.rs
+++ b/crates/g3-core/tests/streaming_parser_test.rs
@@ -0,0 +1,545 @@
 //! Comprehensive tests for StreamingToolParser
 //!
 //! Tests cover:
 //! - Multiple tool calls in one response
 //! - Tool call followed by text
 //! - Incomplete tool calls at various truncation points
 //! - Parser reset behavior
 //! - Buffer management
 use g3_core::StreamingToolParser;
 use g3_providers::CompletionChunk;
 // Helper to create a chunk
 fn chunk(content: &str, finished: bool) -> CompletionChunk {
    CompletionChunk {
        content: content.to_string(),
        finished,
        tool_calls: None,
        usage: None,
    }
 }
 // =============================================================================
 // Test: Multiple tool calls in one response
 // =============================================================================
 #[test]
 fn test_multiple_tool_calls_in_single_chunk() {
    let mut parser = StreamingToolParser::new();
    // Two complete tool calls in one chunk
    let content = r#"Let me do two things:
 {"tool": "read_file", "args": {"file_path": "a.txt"}}
 Now the second:
 {"tool": "shell", "args": {"command": "ls"}}"#;
    let tools = parser.process_chunk(&chunk(content, false));
    // Should detect at least one tool call
    // Note: Current implementation may only return the first one found
    assert!(!tools.is_empty(), "Should detect at least one tool call");
 }
 #[test]
 fn test_multiple_tool_calls_across_chunks() {
    let mut parser = StreamingToolParser::new();
    // First tool call
    let tools1 = parser.process_chunk(&chunk(
        r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
        false
    ));
    assert_eq!(tools1.len(), 1, "First tool call should be detected");
    assert_eq!(tools1[0].tool, "read_file");
    // Reset parser (simulating what happens after tool execution)
    parser.reset();
    // Second tool call
    let tools2 = parser.process_chunk(&chunk(
        r#"{"tool": "shell", "args": {"command": "ls"}}"#,
        false
    ));
    assert_eq!(tools2.len(), 1, "Second tool call should be detected");
    assert_eq!(tools2[0].tool, "shell");
 }
 #[test]
 fn test_first_complete_second_incomplete() {
    let mut parser = StreamingToolParser::new();
    // First complete, second incomplete
    let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
 {"tool": "shell", "args": {"command": "ls"#;
    let tools = parser.process_chunk(&chunk(content, false));
    // Should detect the first complete tool call
    // The incomplete one should be detected by has_incomplete_tool_call
    assert!(parser.has_incomplete_tool_call(), "Should detect incomplete tool call");
 }
 // =============================================================================
 // Test: Tool call followed by text
 // =============================================================================
 #[test]
 fn test_tool_call_with_trailing_text() {
    let mut parser = StreamingToolParser::new();
    let content = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
 Here is the content of the file..."#;
    let tools = parser.process_chunk(&chunk(content, false));
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].tool, "read_file");
    // The trailing text should be in the buffer
    let text = parser.get_text_content();
    assert!(text.contains("Here is the content"), "Trailing text should be preserved");
 }
 #[test]
 fn test_text_before_tool_call() {
    let mut parser = StreamingToolParser::new();
    let content = r#"Let me read that file for you.
 {"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
    let tools = parser.process_chunk(&chunk(content, false));
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].tool, "read_file");
    // The leading text should be in the buffer
    let text = parser.get_text_content();
    assert!(text.contains("Let me read"), "Leading text should be preserved");
 }
 #[test]
 fn test_text_before_and_after_tool_call() {
    let mut parser = StreamingToolParser::new();
    let content = r#"I'll check the file.
 {"tool": "read_file", "args": {"file_path": "test.txt"}}
 Done checking."#;
    let tools = parser.process_chunk(&chunk(content, false));
    assert_eq!(tools.len(), 1);
    let text = parser.get_text_content();
    assert!(text.contains("I'll check"), "Leading text should be preserved");
    assert!(text.contains("Done checking"), "Trailing text should be preserved");
 }
 // =============================================================================
 // Test: Incomplete tool calls at various truncation points
 // =============================================================================
 #[test]
 fn test_incomplete_after_tool_key() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk(r#"{"tool":"#, false));
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_incomplete_after_tool_name() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk(r#"{"tool": "read_file""#, false));
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_incomplete_after_args_key() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk(r#"{"tool": "read_file", "args":"#, false));
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_incomplete_mid_args_object() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path":"#, false));
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_incomplete_mid_string_value() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk(r#"{"tool": "shell", "args": {"command": "ls -la /very/long/path"#, false));
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_incomplete_missing_final_brace() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}"#, false));
    assert!(parser.has_incomplete_tool_call());
 }
 #[test]
 fn test_complete_tool_call_not_incomplete() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
    assert!(!parser.has_incomplete_tool_call(), "Complete tool call should not be marked incomplete");
 }
 // =============================================================================
 // Test: Parser reset behavior
 // =============================================================================
 #[test]
 fn test_reset_clears_buffer() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk("Some content here", false));
    assert!(!parser.get_text_content().is_empty());
    parser.reset();
    assert!(parser.get_text_content().is_empty(), "Buffer should be empty after reset");
 }
 #[test]
 fn test_reset_clears_incomplete_state() {
    let mut parser = StreamingToolParser::new();
    // Create incomplete tool call
    parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"#, false));
    assert!(parser.has_incomplete_tool_call());
    parser.reset();
    assert!(!parser.has_incomplete_tool_call(), "Incomplete state should be cleared after reset");
 }
 #[test]
 fn test_reset_clears_unexecuted_state() {
    let mut parser = StreamingToolParser::new();
    // Create complete but "unexecuted" tool call
    parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
    assert!(parser.has_unexecuted_tool_call());
    parser.reset();
    assert!(!parser.has_unexecuted_tool_call(), "Unexecuted state should be cleared after reset");
 }
 #[test]
 fn test_reset_allows_new_tool_calls() {
    let mut parser = StreamingToolParser::new();
    // First tool call
    let tools1 = parser.process_chunk(&chunk(
        r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
        false
    ));
    assert_eq!(tools1.len(), 1);
    parser.reset();
    // Second tool call after reset
    let tools2 = parser.process_chunk(&chunk(
        r#"{"tool": "shell", "args": {"command": "ls"}}"#,
        false
    ));
    assert_eq!(tools2.len(), 1);
    assert_eq!(tools2[0].tool, "shell");
 }
 // =============================================================================
 // Test: Buffer management and edge cases
 // =============================================================================
 #[test]
 fn test_streaming_chunks_accumulate() {
    let mut parser = StreamingToolParser::new();
    // Stream in chunks
    parser.process_chunk(&chunk(r#"{"tool": "#, false));
    parser.process_chunk(&chunk(r#""read_file", "#, false));
    parser.process_chunk(&chunk(r#""args": {"file_path": "#, false));
    parser.process_chunk(&chunk(r#""test.txt"}}"#, false));
    // Should have accumulated the complete tool call
    let text = parser.get_text_content();
    assert!(text.contains(r#""tool""#));
    assert!(text.contains(r#""read_file""#));
 }
 #[test]
 fn test_finished_chunk_triggers_final_parse() {
    let mut parser = StreamingToolParser::new();
    // Incomplete chunks
    parser.process_chunk(&chunk(r#"{"tool": "read_file", "#, false));
    let tools1 = parser.process_chunk(&chunk(r#""args": {"file_path": "test.txt"}}"#, false));
    // Tool should be detected before finished
    assert!(!tools1.is_empty() || !parser.has_unexecuted_tool_call(), 
        "Tool should be detected during streaming or marked as unexecuted");
 }
 #[test]
 fn test_empty_chunks_ignored() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk("", false));
    parser.process_chunk(&chunk("", false));
    assert!(parser.get_text_content().is_empty());
    assert!(!parser.has_incomplete_tool_call());
    assert!(!parser.has_unexecuted_tool_call());
 }
 #[test]
 fn test_whitespace_only_chunks() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk("   \n\t  ", false));
    assert!(!parser.has_incomplete_tool_call());
    assert!(!parser.has_unexecuted_tool_call());
 }
 #[test]
 fn test_json_with_escaped_quotes() {
    let mut parser = StreamingToolParser::new();
    let content = r#"{"tool": "shell", "args": {"command": "echo \"hello\""}}"#;
    let tools = parser.process_chunk(&chunk(content, false));
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].tool, "shell");
 }
 #[test]
 fn test_json_with_escaped_backslashes() {
    let mut parser = StreamingToolParser::new();
    let content = r#"{"tool": "write_file", "args": {"file_path": "C:\\Users\\test.txt", "content": "data"}}"#;
    let tools = parser.process_chunk(&chunk(content, false));
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].tool, "write_file");
 }
 #[test]
 fn test_json_with_nested_braces_in_string() {
    let mut parser = StreamingToolParser::new();
    let content = r#"{"tool": "write_file", "args": {"content": "{\"nested\": {\"json\": true}}"}}"#;
    let tools = parser.process_chunk(&chunk(content, false));
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].tool, "write_file");
 }
 #[test]
 fn test_text_buffer_length_tracking() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk("Hello", false));
    assert_eq!(parser.text_buffer_len(), 5);
    parser.process_chunk(&chunk(" World", false));
    assert_eq!(parser.text_buffer_len(), 11);
    parser.reset();
    assert_eq!(parser.text_buffer_len(), 0);
 }
 #[test]
 fn test_message_stopped_flag() {
    let mut parser = StreamingToolParser::new();
    parser.process_chunk(&chunk("Hello", false));
    assert!(!parser.is_message_stopped());
    parser.process_chunk(&chunk(" World", true));
    assert!(parser.is_message_stopped());
    parser.reset();
    assert!(!parser.is_message_stopped());
 }
 // =============================================================================
 // Test: Tool call pattern variations
 // =============================================================================
 #[test]
 fn test_tool_pattern_no_spaces() {
    let mut parser = StreamingToolParser::new();
    let tools = parser.process_chunk(&chunk(
        r#"{"tool":"read_file","args":{"file_path":"test.txt"}}"#,
        false
    ));
    assert_eq!(tools.len(), 1);
 }
 // =============================================================================
 // Test: mark_tool_calls_consumed functionality
 // =============================================================================
 #[test]
 fn test_mark_consumed_clears_unexecuted_state() {
    let mut parser = StreamingToolParser::new();
    // Add a complete tool call
    parser.process_chunk(&chunk(
        r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#,
        false
    ));
    // Should be detected as unexecuted
    assert!(parser.has_unexecuted_tool_call());
    // Mark as consumed
    parser.mark_tool_calls_consumed();
    // Should no longer be detected as unexecuted
    assert!(!parser.has_unexecuted_tool_call(), 
        "After marking consumed, has_unexecuted_tool_call should return false");
 }
 #[test]
 fn test_mark_consumed_allows_new_tool_detection() {
    let mut parser = StreamingToolParser::new();
    // First tool call
    parser.process_chunk(&chunk(
        r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
        false
    ));
    parser.mark_tool_calls_consumed();
    // Second tool call (without reset)
    parser.process_chunk(&chunk(
        r#"{"tool": "shell", "args": {"command": "ls"}}"#,
        false
    ));
    // Should detect the new unexecuted tool call
    assert!(parser.has_unexecuted_tool_call(), 
        "New tool call after consumed position should be detected");
 }
 #[test]
 fn test_bare_brace_not_incomplete() {
    let mut parser = StreamingToolParser::new();
    // Just a bare opening brace - not a tool call pattern
    parser.process_chunk(&chunk(r#"{""#, false));
    // Should NOT be detected as incomplete because it doesn't match tool patterns
    assert!(!parser.has_incomplete_tool_call(), 
        "Bare {{ should not be detected as incomplete tool call");
 }
 #[test]
 fn test_duplicate_tool_call_pattern() {
    let mut parser = StreamingToolParser::new();
    // Simulate the problematic pattern: tool call, garbage, duplicate tool call
    let content = concat!(
        r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#,
        "\n\n{\"\n\n",
        r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#
    );
    let tools = parser.process_chunk(&chunk(content, false));
    // Should detect at least one tool call
    assert!(!tools.is_empty(), "Should detect at least one tool call");
    // After processing, there should be an unexecuted tool call (the duplicate)
    // because the parser only returns the first one it finds during streaming
    assert!(parser.has_unexecuted_tool_call(), 
        "Should detect the duplicate as unexecuted");
 }
 #[test]
 fn test_multiple_tool_calls_returned_on_finish() {
    let mut parser = StreamingToolParser::new();
    // Two complete tool calls in one chunk, with finished=true
    let content = concat!(
        r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
        "\nSome text\n",
        r#"{"tool": "shell", "args": {"command": "ls"}}"#
    );
    // First, add content without finishing
    parser.process_chunk(&chunk(content, false));
    // Now finish the stream - should return ALL tool calls
    let tools = parser.process_chunk(&chunk("", true));
    // Should return both tool calls
    assert_eq!(tools.len(), 2, "Should return both tool calls when stream finishes");
    assert_eq!(tools[0].tool, "read_file");
    assert_eq!(tools[1].tool, "shell");
 }
 #[test]
 fn test_tool_pattern_extra_spaces() {
    let mut parser = StreamingToolParser::new();
    let tools = parser.process_chunk(&chunk(
        r#"{ "tool" : "read_file" , "args" : { "file_path" : "test.txt" } }"#,
        false
    ));
    assert_eq!(tools.len(), 1);
 }
 #[test]
 fn test_tool_pattern_with_newlines() {
    let mut parser = StreamingToolParser::new();
    // Note: The parser looks for specific patterns like {"tool": or { "tool":
    // Multi-line JSON with newlines between { and "tool" won't match
    // This is expected behavior - the pattern matching is intentionally strict
    let _tools = parser.process_chunk(&chunk(
        r#"{
  "tool": "read_file",
  "args": {
    "file_path": "test.txt"
  }
 }"#,
        false
    ));
    // This won't be detected as a tool call due to newline after {
    // The has_unexecuted_tool_call check also won't find it
    // This is a known limitation of the pattern-based detection
 }
 // =============================================================================
 // Test: Edge cases for has_message_like_keys validation
 // =============================================================================
 #[test]
 fn test_normal_args_accepted() {
    let mut parser = StreamingToolParser::new();
    let tools = parser.process_chunk(&chunk(
        r#"{"tool": "read_file", "args": {"file_path": "test.txt", "start": 0, "end": 100}}"#,
        false
    ));
    assert_eq!(tools.len(), 1);
 }
 #[test] 
 fn test_content_with_phrases_in_value_accepted() {
    let mut parser = StreamingToolParser::new();
    // Phrases like "I'll" in VALUES should be fine (only keys are checked)
    let tools = parser.process_chunk(&chunk(
        r#"{"tool": "write_file", "args": {"file_path": "test.txt", "content": "I'll help you with that. Let me explain."}}"#,
        false
    ));
    assert_eq!(tools.len(), 1);
 }
--- a/crates/g3-ensembles/src/flock.rs
+++ b/crates/g3-ensembles/src/flock.rs
@@ -7,7 +7,7 @@ use std::path::{Path, PathBuf};
 use std::process::Stdio;
 use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::Command;
-use tracing::{debug, error, info, warn};
+use tracing::{debug, error, warn};
 use uuid::Uuid;
 use crate::status::{FlockStatus, SegmentState, SegmentStatus};
@@ -174,7 +174,7 @@ impl FlockMode {
    /// Run flock mode
    pub async fn run(&mut self) -> Result<()> {
-        info!(
+        debug!(
            "Starting flock mode with {} segments",
            self.config.num_segments
        );
@@ -625,7 +625,7 @@ async fn run_segment(
    status_file: PathBuf,
    session_id: String,
 ) -> Result<SegmentStatus> {
-    info!(
+    debug!(
        "Starting segment {} in {}",
        segment_id,
        segment_dir.display()
--- a/crates/g3-execution/src/lib.rs
+++ b/crates/g3-execution/src/lib.rs
@@ -3,7 +3,7 @@ use regex::Regex;
 use std::io::Write;
 use std::process::Command;
 use tempfile::NamedTempFile;
-use tracing::{debug, error, info};
+use tracing::{debug, error};
 /// Expand tilde (~) in a path to the user's home directory
 fn expand_tilde(path: &str) -> String {
@@ -72,7 +72,7 @@ impl CodeExecutor {
        }
        for (language, code) in code_blocks {
-            info!("Executing {} code", language);
+            debug!("Executing {} code", language);
            if show_code {
                results.push(format!("📋 Running {} code:", language));
@@ -459,7 +459,7 @@ pub fn is_cargo_llvm_cov_installed() -> Result<bool> {
 /// Install llvm-tools-preview via rustup
 pub fn install_llvm_tools() -> Result<()> {
-    info!("Installing llvm-tools-preview...");
+    debug!("Installing llvm-tools-preview...");
    let output = Command::new("rustup")
        .args(&["component", "add", "llvm-tools-preview"])
        .output()?;
@@ -469,13 +469,13 @@ pub fn install_llvm_tools() -> Result<()> {
        anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr);
    }
-    info!("✅ llvm-tools-preview installed successfully");
+    debug!("✅ llvm-tools-preview installed successfully");
    Ok(())
 }
 /// Install cargo-llvm-cov via cargo install
 pub fn install_cargo_llvm_cov() -> Result<()> {
-    info!("Installing cargo-llvm-cov... (this may take a few minutes)");
+    debug!("Installing cargo-llvm-cov... (this may take a few minutes)");
    let output = Command::new("cargo")
        .args(&["install", "cargo-llvm-cov"])
        .output()?;
@@ -485,7 +485,7 @@ pub fn install_cargo_llvm_cov() -> Result<()> {
        anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr);
    }
-    info!("✅ cargo-llvm-cov installed successfully");
+    debug!("✅ cargo-llvm-cov installed successfully");
    Ok(())
 }
@@ -496,20 +496,20 @@ pub fn ensure_coverage_tools_installed() -> Result<bool> {
    // Check and install llvm-tools-preview
    if !is_llvm_tools_installed()? {
-        info!("llvm-tools-preview not found, installing...");
+        debug!("llvm-tools-preview not found, installing...");
        install_llvm_tools()?;
        already_installed = false;
    } else {
-        info!("✅ llvm-tools-preview is already installed");
+        debug!("✅ llvm-tools-preview is already installed");
    }
    // Check and install cargo-llvm-cov
    if !is_cargo_llvm_cov_installed()? {
-        info!("cargo-llvm-cov not found, installing...");
+        debug!("cargo-llvm-cov not found, installing...");
        install_cargo_llvm_cov()?;
        already_installed = false;
    } else {
-        info!("✅ cargo-llvm-cov is already installed");
+        debug!("✅ cargo-llvm-cov is already installed");
    }
    Ok(already_installed)
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -328,7 +328,7 @@ impl AnthropicProvider {
        tracing::debug!("create_request_body called: max_tokens={}, disable_thinking={}, thinking_budget_tokens={:?}", max_tokens, disable_thinking, self.thinking_budget_tokens);
        let thinking = if disable_thinking {
-            tracing::info!(
+            tracing::debug!(
                "Thinking mode explicitly disabled for this request (max_tokens={})",
                max_tokens
            );
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -64,7 +64,7 @@ use serde::{Deserialize, Serialize};
 use std::time::Duration;
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
-use tracing::{debug, error, info, warn};
+use tracing::{debug, error, warn};
 use crate::{
    CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
@@ -166,7 +166,7 @@ impl DatabricksProvider {
            .build()
            .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
-        info!(
+        debug!(
            "Initialized Databricks provider with model: {} on host: {}",
            model, host
        );
@@ -196,7 +196,7 @@ impl DatabricksProvider {
            .build()
            .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
-        info!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host);
+        debug!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host);
        Ok(Self {
            client,
@@ -220,7 +220,7 @@ impl DatabricksProvider {
            .build()
            .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
-        info!(
+        debug!(
            "Initialized Databricks provider with OAuth for model: {} on host: {}",
            model, host
        );
@@ -249,7 +249,7 @@ impl DatabricksProvider {
            .build()
            .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
-        info!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host);
+        debug!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host);
        Ok(Self {
            client,
@@ -857,7 +857,7 @@ impl LLMProvider for DatabricksProvider {
            if status == reqwest::StatusCode::FORBIDDEN
                && (error_text.contains("Invalid Token") || error_text.contains("invalid_token"))
            {
-                info!("Received 403 Invalid Token error, attempting to refresh OAuth token");
+                debug!("Received 403 Invalid Token error, attempting to refresh OAuth token");
                // Try to refresh the token if we're using OAuth
                if let DatabricksAuth::OAuth { .. } = &provider_clone.auth {
@@ -867,7 +867,7 @@ impl LLMProvider for DatabricksProvider {
                    // Try to get a new token (will attempt refresh or new OAuth flow)
                    match provider_clone.auth.get_token().await {
                        Ok(_new_token) => {
-                            info!("Successfully refreshed OAuth token, retrying request");
+                            debug!("Successfully refreshed OAuth token, retrying request");
                            // Retry the request with the new token
                            response = provider_clone
@@ -1038,7 +1038,7 @@ impl LLMProvider for DatabricksProvider {
            if status == reqwest::StatusCode::FORBIDDEN
                && (error_text.contains("Invalid Token") || error_text.contains("invalid_token"))
            {
-                info!("Received 403 Invalid Token error, attempting to refresh OAuth token");
+                debug!("Received 403 Invalid Token error, attempting to refresh OAuth token");
                // Try to refresh the token if we're using OAuth
                if let DatabricksAuth::OAuth { .. } = &provider_clone.auth {
@@ -1048,7 +1048,7 @@ impl LLMProvider for DatabricksProvider {
                    // Try to get a new token (will attempt refresh or new OAuth flow)
                    match provider_clone.auth.get_token().await {
                        Ok(_new_token) => {
-                            info!("Successfully refreshed OAuth token, retrying streaming request");
+                            debug!("Successfully refreshed OAuth token, retrying streaming request");
                            // Retry the request with the new token
                            response = provider_clone
--- a/crates/g3-providers/src/embedded.rs
+++ b/crates/g3-providers/src/embedded.rs
@@ -12,7 +12,7 @@ use std::sync::Arc;
 use tokio::sync::mpsc;
 use tokio::sync::Mutex;
 use tokio_stream::wrappers::ReceiverStream;
-use tracing::{debug, error, info};
+use tracing::{debug, error};
 pub struct EmbeddedProvider {
    session: Arc<Mutex<LlamaSession>>,
@@ -32,7 +32,7 @@ impl EmbeddedProvider {
        gpu_layers: Option<u32>,
        threads: Option<u32>,
    ) -> Result<Self> {
-        info!("Loading embedded model from: {}", model_path);
+        debug!("Loading embedded model from: {}", model_path);
        // Expand tilde in path
        let expanded_path = shellexpand::tilde(&model_path);
@@ -41,7 +41,7 @@ impl EmbeddedProvider {
        // If model doesn't exist and it's the default Qwen model, offer to download it
        if !model_path_buf.exists() {
            if model_path.contains("qwen2.5-7b-instruct-q3_k_m.gguf") {
-                info!("Model file not found. Attempting to download Qwen 2.5 7B model...");
+                debug!("Model file not found. Attempting to download Qwen 2.5 7B model...");
                Self::download_qwen_model(&model_path_buf)?;
            } else {
                anyhow::bail!("Model file not found: {}", model_path_buf.display());
@@ -55,14 +55,14 @@ impl EmbeddedProvider {
        if let Some(gpu_layers) = gpu_layers {
            params.n_gpu_layers = gpu_layers;
-            info!("Using {} GPU layers", gpu_layers);
+            debug!("Using {} GPU layers", gpu_layers);
        }
        let context_size = context_length.unwrap_or(4096);
-        info!("Using context length: {}", context_size);
+        debug!("Using context length: {}", context_size);
        // Load the model
-        info!("Loading model...");
+        debug!("Loading model...");
        let model = LlamaModel::load_from_file(model_path, params)
            .map_err(|e| anyhow::anyhow!("Failed to load model: {}", e))?;
@@ -79,7 +79,7 @@ impl EmbeddedProvider {
            .create_session(session_params)
            .map_err(|e| anyhow::anyhow!("Failed to create session: {}", e))?;
-        info!("Successfully loaded {} model", model_type);
+        debug!("Successfully loaded {} model", model_type);
        Ok(Self {
            session: Arc::new(Mutex::new(session)),
@@ -330,7 +330,7 @@ impl EmbeddedProvider {
            Ok(inner_result) => match inner_result {
                Ok(task_result) => match task_result {
                    Ok((text, token_count)) => {
-                        info!(
+                        debug!(
                            "Completed generation: {} tokens (dynamic limit was {})",
                            token_count, dynamic_max_tokens
                        );
@@ -448,9 +448,9 @@ impl EmbeddedProvider {
            fs::create_dir_all(parent)?;
        }
-        info!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)...");
+        debug!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)...");
-        info!("This is a one-time download that may take several minutes depending on your connection.");
+        debug!("This is a one-time download that may take several minutes depending on your connection.");
-        info!("Downloading to: {}", model_path.display());
+        debug!("Downloading to: {}", model_path.display());
        // Use curl with progress bar for download
        let output = Command::new("curl")
@@ -497,7 +497,7 @@ impl EmbeddedProvider {
            );
        }
-        info!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb);
+        debug!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb);
        Ok(())
    }
 }
--- a/crates/g3-providers/src/oauth.rs
+++ b/crates/g3-providers/src/oauth.rs
@@ -392,7 +392,7 @@ pub async fn get_oauth_token_async(
                            if let Err(e) = token_cache.save_token(&new_token) {
                                tracing::warn!("Failed to save refreshed token: {}", e);
                            }
-                            tracing::info!("Successfully refreshed token");
+                            tracing::debug!("Successfully refreshed token");
                            return Ok(new_token.access_token);
                        }
                        Err(e) => {
--- a/tmp/test_planner_ui.sh
+++ b/tmp/test_planner_ui.sh
@@ -1,24 +0,0 @@
 #!/bin/bash
 set -e
 # Clean logs first
 rm -rf ~/RustroverProjects/g3/logs/*.log ~/RustroverProjects/g3/logs/*.txt 2>/dev/null || true
 # Create test requirements file
 mkdir -p /tmp/g3-test-planning/g3-plan
 cat > /tmp/g3-test-planning/g3-plan/new_requirements.md <<'EOF'
 Simple test task: List all .rs files in the src directory.
 EOF
 # Initialize git repo for test (planning mode requires git)
 cd /tmp/g3-test-planning
 if [ ! -d .git ]; then
    git init
    git config user.name "Test User"
    git config user.email "test@example.com"
    git add .
    git commit -m "Initial commit" || true
 fi
 echo "Test environment ready at /tmp/g3-test-planning"
 echo "Run: cd /tmp && ~/RustroverProjects/g3/target/release/g3 --planning --codepath /tmp/g3-test-planning --no-git"