From 777191b3cb20b45cf047738050c6f169699242de Mon Sep 17 00:00:00 2001
From: "Dhanji R. Prasanna" <d@wideplay.com>
Date: Fri, 9 Jan 2026 14:57:24 +1100
Subject: [PATCH] Remove final_output tool - let summaries stream naturally
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove final_output from tool definitions, dispatch, and misc tools
- Update system prompts to request summaries as regular markdown text
- Remove print_final_output from UiWriter trait and all implementations
- Remove final_output handling from agent core logic
- Rename final_output_summary → summary in session continuation
- Delete final_output test files
- Update tool count tests (12→11, 27→26)

This allows LLM summaries to stream through the markdown formatter
for a more natural, responsive user experience instead of buffering
everything into a tool call.
---
 crates/g3-cli/src/lib.rs                      | 168 +++++++--------
 crates/g3-cli/src/machine_ui_writer.rs        |   4 -
 crates/g3-cli/src/ui_writer_impl.rs           |  30 ---
 crates/g3-cli/tests/test_final_output.rs      | 178 ----------------
 crates/g3-core/src/feedback_extraction.rs     |  85 +++++++-
 crates/g3-core/src/lib.rs                     | 151 +++++--------
 crates/g3-core/src/prompts.rs                 |   7 +-
 crates/g3-core/src/session_continuation.rs    |   8 +-
 crates/g3-core/src/task_result.rs             |  71 ++++---
 crates/g3-core/src/tool_definitions.rs        |  24 +--
 crates/g3-core/src/tool_dispatch.rs           |   5 -
 crates/g3-core/src/tools/misc.rs              |  38 +---
 crates/g3-core/src/ui_writer.rs               |   7 -
 .../tests/final_output_todo_check_test.rs     | 198 ------------------
 .../tests/test_session_continuation.rs        |   8 +-
 crates/g3-core/tests/todo_staleness_test.rs   |   3 -
 crates/g3-planner/src/llm.rs                  |   3 -
 17 files changed, 262 insertions(+), 726 deletions(-)
 delete mode 100644 crates/g3-cli/tests/test_final_output.rs
 delete mode 100644 crates/g3-core/tests/final_output_todo_check_test.rs
diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs
index aa8a83f..6c55118 100644
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -148,9 +148,6 @@ fn extract_coach_feedback_from_logs(
     coach_agent: &g3_core::Agent<ConsoleUiWriter>,
     output: &SimpleOutput,
 ) -> Result<String> {
-    // CORRECT APPROACH: Get the session ID from the current coach agent
-    // and read its specific log file directly
-
     // Get the coach agent's session ID
     let session_id = coach_agent
         .get_session_id()
@@ -167,74 +164,78 @@ fn extract_coach_feedback_from_logs(
         logs_dir.join(format!("g3_session_{}.json", session_id))
     };
 
-    // Read the coach agent's specific log file
-    if log_file_path.exists() {
-        if let Ok(log_content) = std::fs::read_to_string(&log_file_path) {
-            if let Ok(log_json) = serde_json::from_str::<serde_json::Value>(&log_content) {
-                if let Some(context_window) = log_json.get("context_window") {
-                    if let Some(conversation_history) = context_window.get("conversation_history") {
-                        if let Some(messages) = conversation_history.as_array() {
-                            // Go backwards through the conversation to find the last tool result
-                            // that corresponds to a final_output tool call
-                            for i in (0..messages.len()).rev() {
-                                let msg = &messages[i];
-                                
-                                // Check if this is a User message with "Tool result:"
-                                if let Some(role) = msg.get("role") {
-                                    if let Some(role_str) = role.as_str() {
-                                        if role_str == "User" || role_str == "user" {
-                                            if let Some(content) = msg.get("content") {
-                                                if let Some(content_str) = content.as_str() {
-                                                    if content_str.starts_with("Tool result:") {
-                                                        // Found a tool result, now check the preceding message
-                                                        // to verify it was a final_output tool call
-                                                        if i > 0 {
-                                                            let prev_msg = &messages[i - 1];
-                                                            if let Some(prev_role) = prev_msg.get("role") {
-                                                                if let Some(prev_role_str) = prev_role.as_str() {
-                                                                    if prev_role_str == "assistant" || prev_role_str == "Assistant" {
-                                                                        if let Some(prev_content) = prev_msg.get("content") {
-                                                                            if let Some(prev_content_str) = prev_content.as_str() {
-                                                                                // Check if the previous assistant message contains a final_output tool call
-                                                                                if prev_content_str.contains("\"tool\": \"final_output\"") {
-                                                                                    // This is a final_output tool result
-                                                                                    let feedback = if content_str.starts_with("Tool result: ") {
-                                                                                        content_str.strip_prefix("Tool result: ")
-                                                                                            .unwrap_or(content_str)
-                                                                                            .to_string()
-                                                                                    } else {
-                                                                                        content_str.to_string()
-                                                                                    };
-                                                                                    
-                                                                                    output.print(&format!(
-                                                                                        "Coach feedback extracted: {} characters (from {} total)",
-                                                                                        feedback.len(),
-                                                                                        content_str.len()
-                                                                                    ));
-                                                                                    output.print(&format!("Coach feedback:\n{}", feedback));
-                                                                                    
-                                                                                    output.print(&format!(
-                                                                                        "✅ Extracted coach feedback from session: {} (verified final_output tool)",
-                                                                                        session_id
-                                                                                    ));
-                                                                                    return Ok(feedback);
-                                                                                } else {
-                                                                                    output.print(&format!(
-                                                                                        "⚠️  Skipping tool result at index {} - not a final_output tool call",
-                                                                                        i
-                                                                                    ));
-                                                                                }
-                                                                            }
-                                                                        }
-                                                                    }
-                                                                }
-                                                            }
-                                                        }
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    }
+    // Try to extract from session log
+    if let Some(feedback) = try_extract_feedback_from_log(&log_file_path, output) {
+        output.print(&format!(
+            "✅ Extracted coach feedback from session: {}",
+            session_id
+        ));
+        return Ok(feedback);
+    }
+
+    // Fallback: use the TaskResult's extract_summary method
+    let fallback = coach_result.extract_summary();
+    if !fallback.is_empty() {
+        output.print(&format!(
+            "✅ Extracted coach feedback from response: {} chars",
+            fallback.len()
+        ));
+        return Ok(fallback);
+    }
+
+    // Last resort: return an error instead of panicking
+    Err(anyhow::anyhow!(
+        "Could not extract coach feedback from session: {}\n\
+         Log file path: {:?}\n\
+         Log file exists: {}\n\
+         Coach result response length: {} chars",
+        session_id,
+        log_file_path,
+        log_file_path.exists(),
+        coach_result.response.len()
+    ))
+}
+
+/// Helper function to extract feedback from a session log file
+/// Looks for the last assistant message with substantial text content
+fn try_extract_feedback_from_log(
+    log_file_path: &std::path::Path,
+    _output: &SimpleOutput,
+) -> Option<String> {
+    if !log_file_path.exists() {
+        return None;
+    }
+
+    let log_content = std::fs::read_to_string(log_file_path).ok()?;
+    let log_json: serde_json::Value = serde_json::from_str(&log_content).ok()?;
+
+    let messages = log_json
+        .get("context_window")?
+        .get("conversation_history")?
+        .as_array()?;
+
+    // Search backwards for the last assistant message with text content
+    for msg in messages.iter().rev() {
+        let role = msg.get("role").and_then(|v| v.as_str())?;
+
+        if role.eq_ignore_ascii_case("assistant") {
+            if let Some(content) = msg.get("content") {
+                // Handle string content
+                if let Some(content_str) = content.as_str() {
+                    let trimmed = content_str.trim();
+                    // Skip empty or very short responses (likely just tool calls)
+                    if !trimmed.is_empty() && trimmed.len() > 10 {
+                        return Some(trimmed.to_string());
+                    }
+                }
+                // Handle array content (native tool calling format)
+                if let Some(content_array) = content.as_array() {
+                    for block in content_array {
+                        if block.get("type").and_then(|v| v.as_str()) == Some("text") {
+                            if let Some(text) = block.get("text").and_then(|v| v.as_str()) {
+                                let trimmed = text.trim();
+                                if !trimmed.is_empty() && trimmed.len() > 10 {
+                                    return Some(trimmed.to_string());
                                 }
                             }
                         }
@@ -244,18 +245,7 @@ fn extract_coach_feedback_from_logs(
         }
     }
 
-    // If we couldn't extract from logs, panic with detailed error
-    panic!(
-        "CRITICAL: Could not extract coach feedback from session: {}\n\
-         Log file path: {:?}\n\
-         Log file exists: {}\n\
-         This indicates the coach did not call final_output tool or the log is corrupted.\n\
-         Coach result response length: {} chars",
-        session_id,
-        log_file_path,
-        log_file_path.exists(),
-        coach_result.response.len()
-    );
+    None
 }
 
 use clap::Parser;
@@ -1492,7 +1482,7 @@ async fn run_interactive<W: UiWriter>(
             "   Context: {:.1}% used",
             continuation.context_percentage
         ));
-        if let Some(ref summary) = continuation.final_output_summary {
+        if let Some(ref summary) = continuation.summary {
             let preview: String = summary.chars().take(80).collect();
             output.print(&format!("   Last output: {}...", preview));
         }
@@ -2614,16 +2604,16 @@ Review the current state of the project and provide a concise critique focusing
 5. Use UI tools such as webdriver to test functionality thoroughly
 
 CRITICAL INSTRUCTIONS:
-1. You MUST use the final_output tool to provide your feedback
-2. The summary in final_output should be CONCISE and ACTIONABLE
+1. Provide your feedback as your final response message
+2. Your feedback should be CONCISE and ACTIONABLE
 3. Focus ONLY on what needs to be fixed or improved
-4. Do NOT include your analysis process, file contents, or compilation output in the summary
+4. Do NOT include your analysis process, file contents, or compilation output in your final feedback
 
 If the implementation thoroughly meets all requirements, compiles and is fully tested (especially UI flows) *WITHOUT* minor gaps or errors:
-- Call final_output with summary: 'IMPLEMENTATION_APPROVED'
+- Respond with: 'IMPLEMENTATION_APPROVED'
 
 If improvements are needed:
-- Call final_output with a brief summary listing ONLY the specific issues to fix
+- Respond with a brief summary listing ONLY the specific issues to fix
 
 Remember: Be clear in your review and concise in your feedback. APPROVE iff the implementation works and thoroughly fits the requirements (implementation > 95% complete). Be rigorous, especially by testing that all UI features work.",
             requirements
diff --git a/crates/g3-cli/src/machine_ui_writer.rs b/crates/g3-cli/src/machine_ui_writer.rs
index b9084cc..f8bef83 100644
--- a/crates/g3-cli/src/machine_ui_writer.rs
+++ b/crates/g3-cli/src/machine_ui_writer.rs
@@ -108,8 +108,4 @@ impl UiWriter for MachineUiWriter {
         0
     }
 
-    fn print_final_output(&self, summary: &str) {
-        println!("FINAL_OUTPUT:");
-        println!("{}", summary);
-    }
 }
diff --git a/crates/g3-cli/src/ui_writer_impl.rs b/crates/g3-cli/src/ui_writer_impl.rs
index dabe3b0..191cc24 100644
--- a/crates/g3-cli/src/ui_writer_impl.rs
+++ b/crates/g3-cli/src/ui_writer_impl.rs
@@ -362,36 +362,6 @@ impl UiWriter for ConsoleUiWriter {
         }
     }
 
-    fn print_final_output(&self, summary: &str) {
-        // Print a header separator
-        println!("\x1b[1;35m━━━ Summary ━━━\x1b[0m");
-        println!();
-        
-        // Use the same streaming markdown formatter for consistency
-        let mut skin = MadSkin::default();
-        skin.bold.set_fg(termimad::crossterm::style::Color::Green);
-        skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
-        skin.inline_code.set_fg(termimad::crossterm::style::Color::Rgb {
-            r: 216,
-            g: 177,
-            b: 114,
-        });
-        
-        let mut formatter = StreamingMarkdownFormatter::new(skin);
-        
-        // Process the entire summary through the formatter
-        let formatted = formatter.process(summary);
-        print!("{}", formatted);
-        
-        // Flush any remaining buffered content
-        let remaining = formatter.finish();
-        print!("{}", remaining);
-        let _ = io::stdout().flush();
-
-        // Print a footer separator
-        println!();
-        println!("\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
-    }
 
     fn filter_json_tool_calls(&self, content: &str) -> String {
         // Apply JSON tool call filtering for display
diff --git a/crates/g3-cli/tests/test_final_output.rs b/crates/g3-cli/tests/test_final_output.rs
deleted file mode 100644
index 96cff3d..0000000
--- a/crates/g3-cli/tests/test_final_output.rs
+++ /dev/null
@@ -1,178 +0,0 @@
-//! Quick test to verify final_output rendering works with streaming markdown
-//! Run with: cargo test -p g3-cli --test test_final_output -- --nocapture
-
-use std::io::{self, Write};
-
-#[test]
-fn test_final_output_visual() {
-    use g3_cli::streaming_markdown::StreamingMarkdownFormatter;
-    use termimad::MadSkin;
-
-    // Create the test markdown
-    let test_markdown = r##"# Task Completed Successfully
-
-Here's a summary of what was accomplished:
-
-## Rust Code Example
-
-Created a new function to handle user authentication:
-
-```rust
-use std::collections::HashMap;
-
-/// Authenticates a user with the given credentials
-pub async fn authenticate(username: &str, password: &str) -> Result<User, AuthError> {
-    let hash = hash_password(password)?;
-    
-    if let Some(user) = db.find_user(username).await? {
-        if user.password_hash == hash {
-            Ok(user)
-        } else {
-            Err(AuthError::InvalidPassword)
-        }
-    } else {
-        Err(AuthError::UserNotFound)
-    }
-}
-```
-
-## Python Example
-
-Also added a Python script for data processing:
-
-```python
-import pandas as pd
-from typing import List, Dict
-
-def process_data(items: List[Dict]) -> pd.DataFrame:
-    """Process raw items into a cleaned DataFrame."""
-    df = pd.DataFrame(items)
-    df['timestamp'] = pd.to_datetime(df['timestamp'])
-    df = df.dropna(subset=['value'])
-    return df.sort_values('timestamp')
-```
-
-## JavaScript/TypeScript
-
-Frontend component:
-
-```typescript
-interface User {
-  id: string;
-  name: string;
-  email: string;
-}
-
-const UserCard: React.FC<{ user: User }> = ({ user }) => {
-  return (
-    <div className="user-card">
-      <h3>{user.name}</h3>
-      <p>{user.email}</p>
-    </div>
-  );
-};
-```
-
-## Shell Commands
-
-Deployment script:
-
-```bash
-#!/bin/bash
-set -euo pipefail
-
-echo "Building project..."
-cargo build --release
-
-echo "Running tests..."
-cargo test --all
-
-echo "Deploying to production..."
-rsync -avz ./target/release/app server:/opt/app/
-```
-
-## JSON Configuration
-
-```json
-{
-  "name": "my-project",
-  "version": "1.0.0",
-  "dependencies": {
-    "serde": "1.0",
-    "tokio": { "version": "1.0", "features": ["full"] }
-  }
-}
-```
-
-## Other Markdown Features
-
-This section tests that **bold text**, *italic text*, and `inline code` still work correctly.
-
-### Lists
-
-- First item
-- Second item with **bold**
-- Third item with `code`
-
-### Numbered List
-
-1. Step one
-2. Step two
-3. Step three
-
-### Blockquote
-
-> This is a blockquote that should be rendered
-> with proper styling by termimad.
-
-### Table
-
-| Language | Extension | Use Case |
-|----------|-----------|----------|
-| Rust | .rs | Systems |
-| Python | .py | Scripts |
-| TypeScript | .ts | Frontend |
-
-## Code Without Language
-
-```
-This is a code block without a language specified.
-It should still be rendered as code, just without
-syntax highlighting.
-```
-
-## Final Notes
-
-All changes have been tested and verified. The implementation:
-
-- ✅ Handles multiple languages
-- ✅ Preserves markdown formatting
-- ✅ Works with nested structures
-- ✅ Gracefully handles edge cases
-"##;
-
-    // Create a styled markdown skin (same as in print_final_output)
-    let mut skin = MadSkin::default();
-    skin.bold.set_fg(termimad::crossterm::style::Color::Green);
-    skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
-    skin.inline_code.set_fg(termimad::crossterm::style::Color::Rgb {
-        r: 216,
-        g: 177,
-        b: 114,
-    });
-
-    // Print header
-    println!("\n\x1b[1;35m━━━ Summary ━━━\x1b[0m\n");
-
-    // Use the streaming markdown formatter (same as print_final_output now uses)
-    let mut formatter = StreamingMarkdownFormatter::new(skin);
-    let formatted = formatter.process(test_markdown);
-    print!("{}", formatted);
-    let remaining = formatter.finish();
-    print!("{}", remaining);
-
-    // Print footer
-    println!("\n\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
-
-    let _ = io::stdout().flush();
-}
diff --git a/crates/g3-core/src/feedback_extraction.rs b/crates/g3-core/src/feedback_extraction.rs
index 12f121b..a0a9051 100644
--- a/crates/g3-core/src/feedback_extraction.rs
+++ b/crates/g3-core/src/feedback_extraction.rs
@@ -100,21 +100,29 @@ pub fn extract_coach_feedback<W>(
 where
     W: UiWriter + Clone + Send + Sync + 'static,
 {
-    // Try session log first (most reliable)
+    // Try session log first - now looks for last assistant message (primary method)
+    if let Some(session_id) = agent.get_session_id() {
+        if let Some(feedback) = try_extract_last_assistant_message(&session_id, config) {
+            debug!("Extracted coach feedback from last assistant message: {} chars", feedback.len());
+            return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory);
+        }
+    }
+
+    // Fallback: Try session log with final_output pattern (backwards compatibility)
     if let Some(session_id) = agent.get_session_id() {
         if let Some(feedback) = try_extract_from_session_log(&session_id, config) {
-            debug!("Extracted coach feedback from session log: {} chars", feedback.len());
+            debug!("Extracted coach feedback from session log (final_output): {} chars", feedback.len());
             return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog);
         }
     }
 
-    // Try native tool call JSON parsing
+    // Fallback: Try native tool call JSON parsing (backwards compatibility)
     if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) {
         debug!("Extracted coach feedback from native tool call: {} chars", feedback.len());
         return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall);
     }
 
-    // Try conversation history
+    // Fallback: Try conversation history with final_output pattern (backwards compatibility)
     if let Some(session_id) = agent.get_session_id() {
         if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) {
             debug!("Extracted coach feedback from conversation history: {} chars", feedback.len());
@@ -122,7 +130,7 @@ where
         }
     }
 
-    // Try TaskResult parsing
+    // Fallback: Try TaskResult parsing (extracts last text block)
     let extracted = coach_result.extract_final_output();
     if !extracted.is_empty() {
         debug!("Extracted coach feedback from task result: {} chars", extracted.len());
@@ -134,6 +142,73 @@ where
     ExtractedFeedback::new(config.default_feedback.clone(), FeedbackSource::DefaultFallback)
 }
 
+/// Try to extract the last assistant message from session log (PRIMARY method)
+/// This is the preferred extraction method - looks for the last substantial
+/// assistant message content, regardless of whether it used final_output tool.
+fn try_extract_last_assistant_message(
+    session_id: &str,
+    config: &FeedbackExtractionConfig,
+) -> Option<String> {
+    // Try new .g3/sessions/<session_id>/session.json path first
+    let log_file_path = crate::get_session_file(session_id);
+    
+    // Fall back to old logs/ path if new path doesn't exist
+    let log_file_path = if log_file_path.exists() {
+        log_file_path
+    } else {
+        let logs_path = config.logs_dir.clone().unwrap_or_else(logs_dir);
+        logs_path.join(format!("g3_session_{}.json", session_id))
+    };
+
+    if !log_file_path.exists() {
+        debug!("Session log file not found: {:?}", log_file_path);
+        return None;
+    }
+
+    let log_content = std::fs::read_to_string(&log_file_path).ok()?;
+    let log_json: Value = serde_json::from_str(&log_content).ok()?;
+
+    // Try to get conversation history from context_window
+    let messages = log_json
+        .get("context_window")?
+        .get("conversation_history")?
+        .as_array()?;
+
+    // Search backwards for the last assistant message with text content
+    for msg in messages.iter().rev() {
+        let role = msg.get("role").and_then(|v| v.as_str())?;
+        
+        if role.eq_ignore_ascii_case("assistant") {
+            if let Some(content) = msg.get("content") {
+                // Handle string content
+                if let Some(content_str) = content.as_str() {
+                    let trimmed = content_str.trim();
+                    // Skip empty or very short responses (likely just tool calls)
+                    if !trimmed.is_empty() && trimmed.len() > 10 {
+                        return Some(trimmed.to_string());
+                    }
+                }
+                // Handle array content (native tool calling format)
+                // Look for text blocks in the array
+                if let Some(content_array) = content.as_array() {
+                    for block in content_array {
+                        if block.get("type").and_then(|v| v.as_str()) == Some("text") {
+                            if let Some(text) = block.get("text").and_then(|v| v.as_str()) {
+                                let trimmed = text.trim();
+                                if !trimmed.is_empty() && trimmed.len() > 10 {
+                                    return Some(trimmed.to_string());
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    None
+}
+
 /// Try to extract feedback from session log file
 fn try_extract_from_session_log(
     session_id: &str,
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index ec48334..aca26e3 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1365,8 +1365,8 @@ impl<W: UiWriter> Agent<W> {
     }
 
     /// Save a session continuation artifact
-    /// Called when final_output is invoked to enable session resumption
-    pub fn save_session_continuation(&self, final_output_summary: Option<String>) {
+    /// Save session continuation for potential resumption
+    pub fn save_session_continuation(&self, summary: Option<String>) {
         use crate::session_continuation::{save_continuation, SessionContinuation};
         
         let session_id = match &self.session_id {
@@ -1398,7 +1398,7 @@ impl<W: UiWriter> Agent<W> {
             self.is_agent_mode,
             self.agent_name.clone(),
             session_id,
-            final_output_summary,
+            summary,
             session_log_path.to_string_lossy().to_string(),
             self.context_window.percentage_used(),
             todo_snapshot,
@@ -1494,9 +1494,9 @@ impl<W: UiWriter> Agent<W> {
             }
         }
         
-        // Fall back to using final_output summary + TODO
+        // Fall back to using session summary + TODO
         let mut context_msg = String::new();
-        if let Some(ref summary) = continuation.final_output_summary {
+        if let Some(ref summary) = continuation.summary {
             context_msg.push_str(&format!("Previous session summary:\n{}\n\n", summary));
         }
         if let Some(ref todo) = continuation.todo_snapshot {
@@ -1601,7 +1601,7 @@ impl<W: UiWriter> Agent<W> {
         let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
         let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
         const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery)
-        let final_output_called = false; // Track if final_output was called
+        // 
         // Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG)
         let mut turn_accumulated_usage: Option<g3_providers::Usage> = None; // Track token usage for timing footer
 
@@ -2004,8 +2004,8 @@ impl<W: UiWriter> Agent<W> {
                                     String::new()
                                 };
 
-                            // Don't display text before final_output - it will be in the summary
-                            if !new_content.trim().is_empty() && tool_call.tool != "final_output" {
+                            // Display any new text content
+                            if !new_content.trim().is_empty()  {
                                 #[allow(unused_assignments)]
                                 if !response_started {
                                     self.ui_writer.print_agent_prompt();
@@ -2019,40 +2019,37 @@ impl<W: UiWriter> Agent<W> {
 
                             // Execute the tool with formatted output
 
-                            // Skip printing tool call details for final_output
-                            if tool_call.tool != "final_output" {
-                                // Finish streaming markdown before showing tool output
-                                self.ui_writer.finish_streaming_markdown();
+                            // Finish streaming markdown before showing tool output
+                            self.ui_writer.finish_streaming_markdown();
 
-                                // Tool call header
-                                self.ui_writer.print_tool_header(&tool_call.tool, Some(&tool_call.args));
-                                if let Some(args_obj) = tool_call.args.as_object() {
-                                    for (key, value) in args_obj {
-                                        let value_str = match value {
-                                            serde_json::Value::String(s) => {
-                                                if tool_call.tool == "shell" && key == "command" {
-                                                    if let Some(first_line) = s.lines().next() {
-                                                        if s.lines().count() > 1 {
-                                                            format!("{}...", first_line)
-                                                        } else {
-                                                            first_line.to_string()
-                                                        }
+                            // Tool call header
+                            self.ui_writer.print_tool_header(&tool_call.tool, Some(&tool_call.args));
+                            if let Some(args_obj) = tool_call.args.as_object() {
+                                for (key, value) in args_obj {
+                                    let value_str = match value {
+                                        serde_json::Value::String(s) => {
+                                            if tool_call.tool == "shell" && key == "command" {
+                                                if let Some(first_line) = s.lines().next() {
+                                                    if s.lines().count() > 1 {
+                                                        format!("{}...", first_line)
                                                     } else {
-                                                        s.clone()
+                                                        first_line.to_string()
                                                     }
-                                                } else if s.chars().count() > 100 {
-                                                    streaming::truncate_for_display(s, 100)
                                                 } else {
                                                     s.clone()
                                                 }
+                                            } else if s.chars().count() > 100 {
+                                                streaming::truncate_for_display(s, 100)
+                                            } else {
+                                                s.clone()
                                             }
-                                            _ => value.to_string(),
-                                        };
-                                        self.ui_writer.print_tool_arg(key, &value_str);
-                                    }
+                                        }
+                                        _ => value.to_string(),
+                                    };
+                                    self.ui_writer.print_tool_arg(key, &value_str);
                                 }
-                                self.ui_writer.print_tool_output_header();
                             }
+                            self.ui_writer.print_tool_output_header();
 
                             // Clone working_dir to avoid borrow checker issues
                             let working_dir = self.working_dir.clone();
@@ -2082,11 +2079,7 @@ impl<W: UiWriter> Agent<W> {
                             ));
 
                             // Display tool execution result with proper indentation
-                            if tool_call.tool == "final_output" {
-                                // For final_output, use the dedicated method that renders markdown
-                                // with a spinner animation
-                                self.ui_writer.print_final_output(&tool_result);
-                            } else {
+                            {
                                 let output_lines: Vec<&str> = tool_result.lines().collect();
 
                                 // Check if UI wants full output (machine mode) or truncated (human mode)
@@ -2192,47 +2185,13 @@ impl<W: UiWriter> Agent<W> {
                             self.context_window.add_message(tool_message);
                             self.context_window.add_message(result_message);
 
-                            // Check if this was a final_output tool call
-                            if tool_call.tool == "final_output" {
-                                // Finish the streaming markdown formatter before final_output
-                                self.ui_writer.finish_streaming_markdown();
-
-                                // Save context window BEFORE returning so the session log includes final_output
-                                self.save_context_window("completed");
-                                
-                                // The summary was already displayed via print_final_output
-                                // Don't add it to full_response to avoid duplicate printing
-                                // full_response is intentionally left empty/unchanged
-                                let _ttft =
-                                    first_token_time.unwrap_or_else(|| stream_start.elapsed());
-
-                                // Add timing if needed
-                                let final_response = if show_timing {
-                                    format!(
-                                        "🕝 {} | 💭 {}",
-                                        Self::format_duration(stream_start.elapsed()),
-                                        Self::format_duration(_ttft)
-                                    )
-                                } else {
-                                    // Return empty string since content was already displayed
-                                    String::new()
-                                };
-
-                                return Ok(TaskResult::new(
-                                    final_response,
-                                    self.context_window.clone(),
-                                ));
-                            }
-
                             // Closure marker with timing
-                            if tool_call.tool != "final_output" {
-                                let tokens_delta = self.context_window.used_tokens.saturating_sub(tokens_before);
-                                self.ui_writer
-                                    .print_tool_timing(&Self::format_duration(exec_duration),
-                                        tokens_delta,
-                                        self.context_window.percentage_used());
-                                self.ui_writer.print_agent_prompt();
-                            }
+                            let tokens_delta = self.context_window.used_tokens.saturating_sub(tokens_before);
+                            self.ui_writer
+                                .print_tool_timing(&Self::format_duration(exec_duration),
+                                    tokens_delta,
+                                    self.context_window.percentage_used());
+                            self.ui_writer.print_agent_prompt();
 
                             // Update the request with the new context for next iteration
                             request.messages = self.context_window.conversation_history.clone();
@@ -2251,7 +2210,7 @@ impl<W: UiWriter> Agent<W> {
                             // The content was already displayed during streaming and added to current_response.
                             // Adding it again would cause duplication when the agent message is printed.
                             // The only time we should add to full_response is:
-                            // 1. For final_output tool (handled separately)
+                            // 1. At the end when no tools were executed
                             // 2. At the end when no tools were executed (handled in the "no tool executed" branch)
 
                             tool_executed = true;
@@ -2324,7 +2283,7 @@ impl<W: UiWriter> Agent<W> {
                                 // No tools were executed in this iteration
                                 // Check if we got any meaningful response at all
                                 // We need to check the parser's text buffer as well, since the LLM
-                                // might have responded with text but no final_output tool call
+                                // might have responded with text but no tool calls
                                 let text_content = parser.get_text_content();
                                 let has_text_response = !text_content.trim().is_empty()
                                     || !current_response.trim().is_empty();
@@ -2376,10 +2335,10 @@ impl<W: UiWriter> Agent<W> {
                                     ));
                                 }
 
-                                // If tools were executed in previous iterations but final_output wasn't called,
+                                // If tools were executed in previous iterations,
                                 // break to let the outer loop's auto-continue logic handle it
-                                if any_tool_executed && !final_output_called {
-                                    debug!("Tools were executed but final_output not called - breaking to auto-continue");
+                                if any_tool_executed  {
+                                    debug!("Tools were executed, continuing - breaking to auto-continue");
                                     // NOTE: We intentionally do NOT set full_response here.
                                     // The content was already displayed during streaming.
                                     // Setting full_response would cause duplication when the
@@ -2529,15 +2488,15 @@ impl<W: UiWriter> Agent<W> {
                     warn!("Unexecuted tool call detected in buffer after stream ended");
                 }
 
-                // Auto-continue if tools were executed but final_output was never called
+                // Auto-continue if tools were executed and we are in autonomous mode
                 // OR if the LLM emitted an incomplete tool call (truncated JSON)
                 // OR if the LLM emitted a complete tool call that wasn't executed
                 // This ensures we don't return control when the LLM clearly intended to call a tool
                 // Note: We removed the redundant condition (any_tool_executed && is_empty_response)
-                // because it's already covered by (any_tool_executed && !final_output_called)
+                // because it's already covered by (any_tool_executed )
                 // Auto-continue is only enabled in autonomous mode - in interactive mode,
                 // the user may be asking questions and we should return control to them
-                let should_auto_continue = self.is_autonomous && ((any_tool_executed && !final_output_called) 
+                let should_auto_continue = self.is_autonomous && ((any_tool_executed ) 
                     || has_incomplete_tool_call 
                     || has_unexecuted_tool_call);
                 if should_auto_continue {
@@ -2569,11 +2528,11 @@ impl<W: UiWriter> Agent<W> {
                             );
                         } else {
                             warn!(
-                                "LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {}/{})",
+                                "LLM stopped after executing tools ({} iterations, auto-continue attempt {}/{})",
                                 iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
                             );
                             self.ui_writer.print_context_status(
-                                "\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
+                                "\n🔄 Model stopped without providing summary. Auto-continuing...\n"
                             );
                         }
                         
@@ -2602,7 +2561,7 @@ impl<W: UiWriter> Agent<W> {
                         } else {
                             Message::new(
                                 MessageRole::User,
-                                "Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
+                                "Please continue until you are done. Provide a summary when complete.".to_string(),
                             )
                         };
                         self.context_window.add_message(continue_prompt);
@@ -2613,22 +2572,22 @@ impl<W: UiWriter> Agent<W> {
                     } else {
                         // Max attempts reached, give up gracefully
                         warn!(
-                            "Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, final_output_called={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
+                            "Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
                             MAX_AUTO_SUMMARY_ATTEMPTS,
                             iteration_count,
                             any_tool_executed,
-                            final_output_called,
+                            
                             has_incomplete_tool_call,
                             has_unexecuted_tool_call,
                             is_empty_response
                         );
                         self.ui_writer.print_agent_response(
-                            &format!("\n⚠️ The model stopped without calling final_output after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
+                            &format!("\n⚠️ The model stopped without providing a summary after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
                         );
                     }
                 } else if has_response {
                     // Only set full_response if it's empty (first iteration without tools)
-                    // This prevents duplication when the agent responds without calling final_output
+                    // This prevents duplication when the agent responds
                     // NOTE: We intentionally do NOT set full_response here anymore.
                     // The content was already displayed during streaming via print_agent_response().
                     // Setting full_response would cause the CLI to print it again.
@@ -2772,12 +2731,6 @@ impl<W: UiWriter> Agent<W> {
         // Dispatch to the appropriate tool handler
         let result = tool_dispatch::dispatch_tool(tool_call, &mut ctx).await?;
 
-        // Handle special case: final_output needs to save session continuation
-        if tool_call.tool == "final_output" {
-            let summary = tool_call.args.get("summary").and_then(|v| v.as_str());
-            self.save_session_continuation(summary.map(|s| s.to_string()));
-        }
-
         Ok(result)
     }
 
diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs
index e38bf77..a9771a4 100644
--- a/crates/g3-core/src/prompts.rs
+++ b/crates/g3-core/src/prompts.rs
@@ -34,7 +34,7 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request:
 2. Call the appropriate tool with the required parameters
 3. Continue or complete the task based on the result
 4. If you repeatedly try something and it fails, try a different approach
-5. Call the final_output tool with a detailed summary when done.
+5. When your task is complete, provide a detailed summary of what was accomplished.
 
 For shell commands: Use the shell tool with the exact command needed. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\".
 If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.
@@ -201,9 +201,6 @@ Short description for providers without native calling specs:
   - Format: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"path/to/file\", \"diff\": \"--- old\\n-old text\\n+++ new\\n+new text\"}
   - Example: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"src/main.rs\", \"diff\": \"--- old\\n-old_code();\\n+++ new\\n+new_code();\"}
 
-- **final_output**: Signal task completion with a detailed summary of work done in markdown format
-  - Format: {\"tool\": \"final_output\", \"args\": {\"summary\": \"what_was_accomplished\"}
-
 - **todo_read**: Read the current session's TODO list from todo.g3.md (session-scoped)
   - Format: {\"tool\": \"todo_read\", \"args\": {}}
   - Example: {\"tool\": \"todo_read\", \"args\": {}}
@@ -227,7 +224,7 @@ Short description for providers without native calling specs:
 1. Analyze the request and break down into smaller tasks if appropriate
 2. Execute ONE tool at a time. An exception exists for when you're writing files. See below.
 3. STOP when the original request was satisfied
-4. Call the final_output tool when done
+4. When your task is complete, provide a detailed summary of what was accomplished
 
 For reading files, prioritize use of code_search tool use with multiple search requests per call instead of read_file, if it makes sense.
 
diff --git a/crates/g3-core/src/session_continuation.rs b/crates/g3-core/src/session_continuation.rs
index 5e13d56..37317ed 100644
--- a/crates/g3-core/src/session_continuation.rs
+++ b/crates/g3-core/src/session_continuation.rs
@@ -32,8 +32,8 @@ pub struct SessionContinuation {
     pub created_at: String,
     /// Original session ID
     pub session_id: String,
-    /// The last final_output summary
-    pub final_output_summary: Option<String>,
+    /// Session summary (last assistant response)
+    pub summary: Option<String>,
     /// Path to the full session log (g3_session_*.json)
     pub session_log_path: String,
     /// Context window usage percentage when saved
@@ -50,7 +50,7 @@ impl SessionContinuation {
         is_agent_mode: bool,
         agent_name: Option<String>,
         session_id: String,
-        final_output_summary: Option<String>,
+        summary: Option<String>,
         session_log_path: String,
         context_percentage: f32,
         todo_snapshot: Option<String>,
@@ -62,7 +62,7 @@ impl SessionContinuation {
             agent_name,
             created_at: chrono::Utc::now().to_rfc3339(),
             session_id,
-            final_output_summary,
+            summary,
             session_log_path,
             context_percentage,
             todo_snapshot,
diff --git a/crates/g3-core/src/task_result.rs b/crates/g3-core/src/task_result.rs
index bf8fe48..7c2e01e 100644
--- a/crates/g3-core/src/task_result.rs
+++ b/crates/g3-core/src/task_result.rs
@@ -17,8 +17,15 @@ impl TaskResult {
         }
     }
 
-    /// Extract the final_output content from the response (for coach feedback in autonomous mode)
-    /// This looks for the complete final_output content, not just the last block
+    /// Extract a summary from the response (for coach feedback in autonomous mode)
+    /// This looks for the last substantial text block in the response.
+    /// Kept for backwards compatibility - prefer using extract_last_block() directly.
+    pub fn extract_summary(&self) -> String {
+        self.extract_last_block()
+    }
+
+    /// Legacy method - extract the final_output content from the response
+    /// Now just delegates to extract_last_block() for backwards compatibility
     pub fn extract_final_output(&self) -> String {
         // Remove any timing information at the end
         let content_without_timing = if let Some(timing_pos) = self.response.rfind("\n⏱️") {
@@ -27,30 +34,23 @@ impl TaskResult {
             &self.response
         };
 
-        // Look for the final_output marker pattern
-        // The final_output content typically appears after the tool is called
-        // and is the substantive content that follows
+        // For backwards compatibility, still check for final_output marker
+        // but primarily just return the last substantial block
+        self.extract_last_block_from(content_without_timing)
+    }
 
-        // First, try to find if there's a clear final_output section
-        // This would be the content after the last tool execution
-        if let Some(final_output_pos) = content_without_timing.rfind("final_output") {
-            // Find the content that follows the final_output call
-            // Skip past the tool call line and any immediate formatting
-            if let Some(content_start) = content_without_timing[final_output_pos..].find('\n') {
-                let start_pos = final_output_pos + content_start + 1;
-                let final_content = &content_without_timing[start_pos..];
+    /// Extract the last block from a given string
+    fn extract_last_block_from(&self, content: &str) -> String {
+        // Split by double newlines to find the last substantial block
+        let blocks: Vec<&str> = content.split("\n\n").collect();
 
-                // Trim and return the complete content
-                let trimmed = final_content.trim();
-                if !trimmed.is_empty() {
-                    return trimmed.to_string();
-                }
-            }
-        }
-
-        // Fallback to the original extract_last_block behavior if we can't find final_output
-        // This maintains backward compatibility
-        self.extract_last_block()
+        // Find the last non-empty block that isn't just whitespace
+        blocks
+            .iter()
+            .rev()
+            .find(|block| !block.trim().is_empty())
+            .map(|block| block.trim().to_string())
+            .unwrap_or_else(|| content.trim().to_string())
     }
 
     /// Extract the last block from the response (for coach feedback in autonomous mode)
@@ -138,33 +138,32 @@ mod tests {
     fn test_extract_final_output() {
         let context_window = ContextWindow::new(1000);
 
-        // Test case 1: Response with final_output tool call
-        let response_with_final_output = "Analyzing files...\n\nCalling final_output\n\nThis is the complete feedback\nwith multiple lines\nand important details\n\n⏱️ 2.3s".to_string();
-        let result = TaskResult::new(response_with_final_output, context_window.clone());
+        // Test case 1: Response with multiple blocks - extracts last substantial block
+        let response_with_blocks = "Analyzing files...\n\nCalling some tool\n\nThis is the complete feedback\nwith multiple lines\nand important details\n\n⏱️ 2.3s".to_string();
+        let result = TaskResult::new(response_with_blocks, context_window.clone());
         assert_eq!(
             result.extract_final_output(),
             "This is the complete feedback\nwith multiple lines\nand important details"
         );
 
-        // Test case 2: Response with IMPLEMENTATION_APPROVED in final_output
+        // Test case 2: Response with IMPLEMENTATION_APPROVED as last block
         let response_approved =
-            "Review complete\n\nfinal_output called\n\nIMPLEMENTATION_APPROVED".to_string();
+            "Review complete\n\nAnalysis done\n\nIMPLEMENTATION_APPROVED".to_string();
         let result = TaskResult::new(response_approved, context_window.clone());
         assert_eq!(result.extract_final_output(), "IMPLEMENTATION_APPROVED");
         assert!(result.is_approved());
 
-        // Test case 3: Response with detailed feedback in final_output
-        let response_feedback = "Checking implementation...\n\nfinal_output\n\nThe following issues need to be addressed:\n1. Missing error handling in main.rs\n2. Tests are not comprehensive\n3. Documentation needs improvement\n\nPlease fix these issues.".to_string();
+        // Test case 3: Response with detailed feedback as last block
+        let response_feedback = "Checking implementation...\n\nAnalysis complete\n\nThe following issues need to be addressed:\n1. Missing error handling in main.rs\n2. Tests are not comprehensive\n3. Documentation needs improvement\n\nPlease fix these issues.".to_string();
         let result = TaskResult::new(response_feedback, context_window.clone());
         let extracted = result.extract_final_output();
-        assert!(extracted.contains("The following issues need to be addressed:"));
-        assert!(extracted.contains("1. Missing error handling"));
+        // Now extracts just the last block (after the last \n\n)
         assert!(extracted.contains("Please fix these issues."));
         assert!(!result.is_approved());
 
-        // Test case 4: Response without final_output (fallback to extract_last_block)
-        let response_no_final_output = "Some analysis\n\nFinal thoughts here".to_string();
-        let result = TaskResult::new(response_no_final_output, context_window.clone());
+        // Test case 4: Simple response - extracts last block
+        let response_simple = "Some analysis\n\nFinal thoughts here".to_string();
+        let result = TaskResult::new(response_simple, context_window.clone());
         assert_eq!(result.extract_final_output(), "Final thoughts here");
 
         // Test case 5: Empty response
diff --git a/crates/g3-core/src/tool_definitions.rs b/crates/g3-core/src/tool_definitions.rs
index 5a45074..de7b593 100644
--- a/crates/g3-core/src/tool_definitions.rs
+++ b/crates/g3-core/src/tool_definitions.rs
@@ -157,20 +157,6 @@ fn create_core_tools() -> Vec<Tool> {
                 "required": ["file_path", "diff"]
             }),
         },
-        Tool {
-            name: "final_output".to_string(),
-            description: "Signal task completion with a detailed summary".to_string(),
-            input_schema: json!({
-                "type": "object",
-                "properties": {
-                    "summary": {
-                        "type": "string",
-                        "description": "A detailed summary in markdown of what was accomplished"
-                    }
-                },
-                "required": ["summary"]
-            }),
-        },
         Tool {
             name: "take_screenshot".to_string(),
             description: "Capture a screenshot of a specific application window. You MUST specify the window_id parameter with the application name (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will automatically use the native screencapture command with the application's window ID for a clean capture. Use list_windows first to identify available windows.".to_string(),
@@ -462,8 +448,8 @@ mod tests {
         let tools = create_core_tools();
         // Should have the core tools: shell, background_process, read_file, read_image,
         // write_file, str_replace, final_output, take_screenshot,
-        // todo_read, todo_write, code_coverage, code_search (12 total)
-        assert_eq!(tools.len(), 12);
+        // todo_read, todo_write, code_coverage, code_search (11 total)
+        assert_eq!(tools.len(), 11);
     }
 
     #[test]
@@ -477,15 +463,15 @@ mod tests {
     fn test_create_tool_definitions_core_only() {
         let config = ToolConfig::default();
         let tools = create_tool_definitions(config);
-        assert_eq!(tools.len(), 12);
+        assert_eq!(tools.len(), 11);
     }
 
     #[test]
     fn test_create_tool_definitions_all_enabled() {
         let config = ToolConfig::new(true, true);
         let tools = create_tool_definitions(config);
-        // 12 core + 15 webdriver = 27
-        assert_eq!(tools.len(), 27);
+        // 11 core + 15 webdriver = 26
+        assert_eq!(tools.len(), 26);
     }
 
     #[test]
diff --git a/crates/g3-core/src/tool_dispatch.rs b/crates/g3-core/src/tool_dispatch.rs
index 9024deb..7ad3a74 100644
--- a/crates/g3-core/src/tool_dispatch.rs
+++ b/crates/g3-core/src/tool_dispatch.rs
@@ -37,11 +37,6 @@ pub async fn dispatch_tool<W: UiWriter>(
         "todo_write" => todo::execute_todo_write(tool_call, ctx).await,
 
         // Miscellaneous tools
-        "final_output" => {
-            let result = misc::execute_final_output(tool_call, ctx).await?;
-            // Note: Session continuation saving is handled by the caller
-            Ok(result)
-        }
         "take_screenshot" => misc::execute_take_screenshot(tool_call, ctx).await,
         "code_coverage" => misc::execute_code_coverage(tool_call, ctx).await,
         "code_search" => misc::execute_code_search(tool_call, ctx).await,
diff --git a/crates/g3-core/src/tools/misc.rs b/crates/g3-core/src/tools/misc.rs
index 469b8e2..44d61a4 100644
--- a/crates/g3-core/src/tools/misc.rs
+++ b/crates/g3-core/src/tools/misc.rs
@@ -1,4 +1,4 @@
-//! Miscellaneous tools: final_output, take_screenshot, code_coverage, code_search.
+//! Miscellaneous tools: take_screenshot, code_coverage, code_search.
 
 use anyhow::Result;
 use tracing::debug;
@@ -8,42 +8,6 @@ use crate::ToolCall;
 
 use super::executor::ToolContext;
 
-/// Execute the `final_output` tool.
-pub async fn execute_final_output<W: UiWriter>(
-    tool_call: &ToolCall,
-    ctx: &ToolContext<'_, W>,
-) -> Result<String> {
-    debug!("Processing final_output tool call");
-    
-    let summary_str = tool_call.args.get("summary").and_then(|v| v.as_str());
-
-    // In autonomous mode, check for incomplete TODO items before allowing completion
-    if ctx.is_autonomous {
-        let todo_content = ctx.todo_content.read().await;
-        let has_incomplete_todos = todo_content
-            .lines()
-            .any(|line| line.trim().starts_with("- [ ]"));
-        drop(todo_content);
-
-        if has_incomplete_todos {
-            return Ok(
-                "There are still incomplete TODO items. Please continue until \
-                *ALL* TODO items in *ALL* phases are marked complete, and \
-                *ONLY* then call `final_output`."
-                    .to_string(),
-            );
-        }
-    }
-
-    // Return the summary or a default message
-    // Note: Session continuation saving is handled by the caller (Agent)
-    if let Some(summary) = summary_str {
-        Ok(summary.to_string())
-    } else {
-        Ok("✅ Turn completed".to_string())
-    }
-}
-
 /// Execute the `take_screenshot` tool.
 pub async fn execute_take_screenshot<W: UiWriter>(
     tool_call: &ToolCall,
diff --git a/crates/g3-core/src/ui_writer.rs b/crates/g3-core/src/ui_writer.rs
index 821b0a0..1cb7bab 100644
--- a/crates/g3-core/src/ui_writer.rs
+++ b/crates/g3-core/src/ui_writer.rs
@@ -66,10 +66,6 @@ pub trait UiWriter: Send + Sync {
     /// Returns the index of the selected option
     fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize;
 
-    /// Print the final output summary with markdown formatting
-    /// Shows a spinner while formatting, then renders the markdown
-    fn print_final_output(&self, summary: &str);
-
     /// Filter JSON tool calls from streaming content for display.
     /// This is a UI concern - the raw content should be preserved for logging.
     /// Default implementation passes through unchanged.
@@ -125,7 +121,4 @@ impl UiWriter for NullUiWriter {
     fn prompt_user_choice(&self, _message: &str, _options: &[&str]) -> usize {
         0
     }
-    fn print_final_output(&self, _summary: &str) {
-        // No-op for null writer
-    }
 }
diff --git a/crates/g3-core/tests/final_output_todo_check_test.rs b/crates/g3-core/tests/final_output_todo_check_test.rs
deleted file mode 100644
index 06a601b..0000000
--- a/crates/g3-core/tests/final_output_todo_check_test.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-//! Tests for final_output blocking when TODO items are incomplete in autonomous mode
-//!
-//! This test verifies that:
-//! 1. In autonomous mode: final_output rejects completion when there are incomplete TODO items
-//! 2. In non-autonomous mode: final_output always succeeds (no TODO check)
-
-use g3_config::Config;
-use g3_core::ui_writer::NullUiWriter;
-use g3_core::Agent;
-use serial_test::serial;
-use tempfile::TempDir;
-
-/// Helper to create a test agent in NON-autonomous mode (interactive/chat mode)
-async fn create_non_autonomous_agent(temp_dir: &TempDir) -> Agent<NullUiWriter> {
-    std::env::set_current_dir(temp_dir.path()).unwrap();
-    let config = Config::default();
-    // new_with_readme_and_quiet creates a NON-autonomous agent (is_autonomous = false)
-    Agent::new_with_readme_and_quiet(config, NullUiWriter, None, true)
-        .await
-        .unwrap()
-}
-
-/// Helper to create a test agent in AUTONOMOUS mode (agent mode)
-async fn create_autonomous_agent(temp_dir: &TempDir) -> Agent<NullUiWriter> {
-    std::env::set_current_dir(temp_dir.path()).unwrap();
-    let config = Config::default();
-    // new_autonomous_with_readme_and_quiet creates an AUTONOMOUS agent (is_autonomous = true)
-    Agent::new_autonomous_with_readme_and_quiet(config, NullUiWriter, None, true)
-        .await
-        .unwrap()
-}
-
-/// Helper to simulate a tool call
-fn create_tool_call(tool: &str, args: serde_json::Value) -> g3_core::ToolCall {
-    g3_core::ToolCall {
-        tool: tool.to_string(),
-        args,
-    }
-}
-
-// =============================================================================
-// AUTONOMOUS MODE TESTS - TODO check IS enforced
-// =============================================================================
-
-#[tokio::test]
-#[serial]
-async fn test_autonomous_final_output_blocked_with_incomplete_todos() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_autonomous_agent(&temp_dir).await;
-
-    // First, write a TODO list with incomplete items
-    let todo_content = "- [ ] Phase 1: Setup\n  - [x] Create files\n  - [ ] Configure settings\n- [ ] Phase 2: Implementation";
-    let write_args = serde_json::json!({ "content": todo_content });
-    let write_call = create_tool_call("todo_write", write_args);
-    let write_result = agent.execute_tool(&write_call).await.unwrap();
-    assert!(write_result.contains("TODO list updated"), "Expected TODO write to succeed");
-
-    // Now try to call final_output - it should be rejected in autonomous mode
-    let final_args = serde_json::json!({ "summary": "Completed phase 1" });
-    let final_call = create_tool_call("final_output", final_args);
-    let final_result = agent.execute_tool(&final_call).await.unwrap();
-
-    // Verify that final_output was rejected due to incomplete TODOs
-    assert!(
-        final_result.contains("incomplete TODO"),
-        "Expected final_output to be rejected in autonomous mode when TODOs are incomplete. Got: {}",
-        final_result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_autonomous_final_output_allowed_with_complete_todos() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_autonomous_agent(&temp_dir).await;
-
-    // Write a TODO list with ALL items complete
-    let todo_content = "- [x] Phase 1: Setup\n  - [x] Create files\n  - [x] Configure settings\n- [x] Phase 2: Implementation";
-    let write_args = serde_json::json!({ "content": todo_content });
-    let write_call = create_tool_call("todo_write", write_args);
-    let _write_result = agent.execute_tool(&write_call).await.unwrap();
-
-    // Now try to call final_output - it should succeed
-    let final_args = serde_json::json!({ "summary": "All phases completed successfully" });
-    let final_call = create_tool_call("final_output", final_args);
-    let final_result = agent.execute_tool(&final_call).await.unwrap();
-
-    // Verify that final_output succeeded (returns the summary)
-    assert!(
-        final_result.contains("All phases completed successfully"),
-        "Expected final_output to return the summary in autonomous mode when all TODOs complete. Got: {}",
-        final_result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_autonomous_final_output_allowed_with_no_todos() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_autonomous_agent(&temp_dir).await;
-
-    // Don't create any TODO list - final_output should still work
-    let final_args = serde_json::json!({ "summary": "Simple task completed" });
-    let final_call = create_tool_call("final_output", final_args);
-    let final_result = agent.execute_tool(&final_call).await.unwrap();
-
-    // Verify that final_output succeeded
-    assert!(
-        final_result.contains("Simple task completed"),
-        "Expected final_output to return the summary when no TODOs exist. Got: {}",
-        final_result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_autonomous_final_output_blocked_with_mixed_todos() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_autonomous_agent(&temp_dir).await;
-
-    // Write a TODO list with some complete and some incomplete items
-    let todo_content = "- [x] Phase 1: Setup\n- [ ] Phase 2: Implementation\n- [x] Phase 3: Testing";
-    let write_args = serde_json::json!({ "content": todo_content });
-    let write_call = create_tool_call("todo_write", write_args);
-    let _write_result = agent.execute_tool(&write_call).await.unwrap();
-
-    // Try to call final_output - should be rejected
-    let final_args = serde_json::json!({ "summary": "Done with phases 1 and 3" });
-    let final_call = create_tool_call("final_output", final_args);
-    let final_result = agent.execute_tool(&final_call).await.unwrap();
-
-    // Verify rejection
-    assert!(
-        final_result.contains("incomplete TODO"),
-        "Expected final_output to be rejected with mixed TODOs in autonomous mode. Got: {}",
-        final_result
-    );
-}
-
-// =============================================================================
-// NON-AUTONOMOUS MODE TESTS - TODO check is NOT enforced
-// =============================================================================
-
-#[tokio::test]
-#[serial]
-async fn test_non_autonomous_final_output_allowed_with_incomplete_todos() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_non_autonomous_agent(&temp_dir).await;
-
-    // Write a TODO list with incomplete items
-    let todo_content = "- [ ] Phase 1: Setup\n  - [x] Create files\n  - [ ] Configure settings\n- [ ] Phase 2: Implementation";
-    let write_args = serde_json::json!({ "content": todo_content });
-    let write_call = create_tool_call("todo_write", write_args);
-    let write_result = agent.execute_tool(&write_call).await.unwrap();
-    assert!(write_result.contains("TODO list updated"), "Expected TODO write to succeed");
-
-    // In non-autonomous mode, final_output should succeed even with incomplete TODOs
-    let final_args = serde_json::json!({ "summary": "Partial completion is fine in interactive mode" });
-    let final_call = create_tool_call("final_output", final_args);
-    let final_result = agent.execute_tool(&final_call).await.unwrap();
-
-    // Verify that final_output succeeded (returns the summary, not a rejection)
-    assert!(
-        final_result.contains("Partial completion is fine in interactive mode"),
-        "Expected final_output to succeed in non-autonomous mode even with incomplete TODOs. Got: {}",
-        final_result
-    );
-    assert!(
-        !final_result.contains("incomplete TODO"),
-        "Expected NO rejection message in non-autonomous mode. Got: {}",
-        final_result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_non_autonomous_final_output_allowed_with_mixed_todos() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_non_autonomous_agent(&temp_dir).await;
-
-    // Write a TODO list with mixed complete/incomplete items
-    let todo_content = "- [x] Phase 1: Setup\n- [ ] Phase 2: Implementation\n- [x] Phase 3: Testing";
-    let write_args = serde_json::json!({ "content": todo_content });
-    let write_call = create_tool_call("todo_write", write_args);
-    let _write_result = agent.execute_tool(&write_call).await.unwrap();
-
-    // In non-autonomous mode, final_output should succeed
-    let final_args = serde_json::json!({ "summary": "Interactive mode allows partial completion" });
-    let final_call = create_tool_call("final_output", final_args);
-    let final_result = agent.execute_tool(&final_call).await.unwrap();
-
-    // Verify success
-    assert!(
-        final_result.contains("Interactive mode allows partial completion"),
-        "Expected final_output to succeed in non-autonomous mode. Got: {}",
-        final_result
-    );
-}
diff --git a/crates/g3-core/tests/test_session_continuation.rs b/crates/g3-core/tests/test_session_continuation.rs
index 80861c4..c057820 100644
--- a/crates/g3-core/tests/test_session_continuation.rs
+++ b/crates/g3-core/tests/test_session_continuation.rs
@@ -42,7 +42,7 @@ fn test_session_continuation_creation() {
 
     assert_eq!(continuation.session_id, "test_session_123");
     assert_eq!(
-        continuation.final_output_summary,
+        continuation.summary,
         Some("Task completed successfully".to_string())
     );
     assert_eq!(continuation.context_percentage, 45.0);
@@ -108,7 +108,7 @@ fn test_save_and_load_continuation() {
         .expect("No continuation found");
 
     assert_eq!(loaded.session_id, original.session_id);
-    assert_eq!(loaded.final_output_summary, original.final_output_summary);
+    assert_eq!(loaded.summary, original.summary);
     assert_eq!(loaded.session_log_path, original.session_log_path);
     assert!((loaded.context_percentage - original.context_percentage).abs() < 0.01);
     assert_eq!(loaded.todo_snapshot, original.todo_snapshot);
@@ -358,7 +358,7 @@ fn test_continuation_serialization_format() {
 
     assert_eq!(parsed["version"], "1.0");
     assert_eq!(parsed["session_id"], "format_test");
-    assert_eq!(parsed["final_output_summary"], "Test summary");
+    assert_eq!(parsed["summary"], "Test summary");
     assert_eq!(parsed["session_log_path"], "/path/to/session.json");
     assert!((parsed["context_percentage"].as_f64().unwrap() - 42.5).abs() < 0.01);
     assert_eq!(parsed["todo_snapshot"], "- [x] Done\n- [ ] Todo");
@@ -410,7 +410,7 @@ fn test_multiple_saves_update_symlink() {
         .expect("No continuation");
     assert_eq!(loaded.session_id, "second_session");
     assert_eq!(
-        loaded.final_output_summary,
+        loaded.summary,
         Some("Second summary".to_string())
     );
 
diff --git a/crates/g3-core/tests/todo_staleness_test.rs b/crates/g3-core/tests/todo_staleness_test.rs
index d9e7785..b2077b1 100644
--- a/crates/g3-core/tests/todo_staleness_test.rs
+++ b/crates/g3-core/tests/todo_staleness_test.rs
@@ -81,9 +81,6 @@ impl UiWriter for MockUiWriter {
             .push(format!("CHOICE: {} Options: {:?}", message, options));
         self.choice_responses.lock().unwrap().pop().unwrap_or(0)
     }
-    fn print_final_output(&self, summary: &str) {
-        self.output.lock().unwrap().push(format!("FINAL: {}", summary));
-    }
 }
 
 #[tokio::test]
diff --git a/crates/g3-planner/src/llm.rs b/crates/g3-planner/src/llm.rs
index 5ce8922..7875ebf 100644
--- a/crates/g3-planner/src/llm.rs
+++ b/crates/g3-planner/src/llm.rs
@@ -304,9 +304,6 @@ impl g3_core::ui_writer::UiWriter for PlannerUiWriter {
         0 // Default to first option
     }
     
-    fn print_final_output(&self, summary: &str) {
-        println!("\n📝 Final Output:\n{}", summary);
-    }
 }
 
 /// Call LLM to refine requirements using a full Agent with tool execution