Remove final_output tool - let summaries stream naturally

- Remove final_output from tool definitions, dispatch, and misc tools - Update system prompts to request summaries as regular markdown text - Remove print_final_output from UiWriter trait and all implementations - Remove final_output handling from agent core logic - Rename final_output_summary → summary in session continuation - Delete final_output test files - Update tool count tests (12→11, 27→26) This allows LLM summaries to stream through the markdown formatter for a more natural, responsive user experience instead of buffering everything into a tool call.
2026-01-09 14:57:24 +11:00
parent bebf04c7bd
commit 777191b3cb
17 changed files with 262 additions and 726 deletions
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -148,9 +148,6 @@ fn extract_coach_feedback_from_logs(
    coach_agent: &g3_core::Agent<ConsoleUiWriter>,
    output: &SimpleOutput,
 ) -> Result<String> {
-    // CORRECT APPROACH: Get the session ID from the current coach agent
-    // and read its specific log file directly
-
    // Get the coach agent's session ID
    let session_id = coach_agent
        .get_session_id()
@@ -167,74 +164,78 @@ fn extract_coach_feedback_from_logs(
        logs_dir.join(format!("g3_session_{}.json", session_id))
    };

-    // Read the coach agent's specific log file
-    if log_file_path.exists() {
-        if let Ok(log_content) = std::fs::read_to_string(&log_file_path) {
-            if let Ok(log_json) = serde_json::from_str::<serde_json::Value>(&log_content) {
-                if let Some(context_window) = log_json.get("context_window") {
-                    if let Some(conversation_history) = context_window.get("conversation_history") {
-                        if let Some(messages) = conversation_history.as_array() {
-                            // Go backwards through the conversation to find the last tool result
-                            // that corresponds to a final_output tool call
-                            for i in (0..messages.len()).rev() {
-                                let msg = &messages[i];
-                                
-                                // Check if this is a User message with "Tool result:"
-                                if let Some(role) = msg.get("role") {
-                                    if let Some(role_str) = role.as_str() {
-                                        if role_str == "User" || role_str == "user" {
-                                            if let Some(content) = msg.get("content") {
-                                                if let Some(content_str) = content.as_str() {
-                                                    if content_str.starts_with("Tool result:") {
-                                                        // Found a tool result, now check the preceding message
-                                                        // to verify it was a final_output tool call
-                                                        if i > 0 {
-                                                            let prev_msg = &messages[i - 1];
-                                                            if let Some(prev_role) = prev_msg.get("role") {
-                                                                if let Some(prev_role_str) = prev_role.as_str() {
-                                                                    if prev_role_str == "assistant" || prev_role_str == "Assistant" {
-                                                                        if let Some(prev_content) = prev_msg.get("content") {
-                                                                            if let Some(prev_content_str) = prev_content.as_str() {
-                                                                                // Check if the previous assistant message contains a final_output tool call
-                                                                                if prev_content_str.contains("\"tool\": \"final_output\"") {
-                                                                                    // This is a final_output tool result
-                                                                                    let feedback = if content_str.starts_with("Tool result: ") {
-                                                                                        content_str.strip_prefix("Tool result: ")
-                                                                                            .unwrap_or(content_str)
-                                                                                            .to_string()
-                                                                                    } else {
-                                                                                        content_str.to_string()
-                                                                                    };
-                                                                                    
-                                                                                    output.print(&format!(
-                                                                                        "Coach feedback extracted: {} characters (from {} total)",
-                                                                                        feedback.len(),
-                                                                                        content_str.len()
-                                                                                    ));
-                                                                                    output.print(&format!("Coach feedback:\n{}", feedback));
-                                                                                    
-                                                                                    output.print(&format!(
-                                                                                        "✅ Extracted coach feedback from session: {} (verified final_output tool)",
-                                                                                        session_id
-                                                                                    ));
-                                                                                    return Ok(feedback);
-                                                                                } else {
-                                                                                    output.print(&format!(
-                                                                                        "⚠️  Skipping tool result at index {} - not a final_output tool call",
-                                                                                        i
-                                                                                    ));
-                                                                                }
-                                                                            }
-                                                                        }
-                                                                    }
-                                                                }
-                                                            }
-                                                        }
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    }
+    // Try to extract from session log
+    if let Some(feedback) = try_extract_feedback_from_log(&log_file_path, output) {
+        output.print(&format!(
+            "✅ Extracted coach feedback from session: {}",
+            session_id
+        ));
+        return Ok(feedback);
+    }
+
+    // Fallback: use the TaskResult's extract_summary method
+    let fallback = coach_result.extract_summary();
+    if !fallback.is_empty() {
+        output.print(&format!(
+            "✅ Extracted coach feedback from response: {} chars",
+            fallback.len()
+        ));
+        return Ok(fallback);
+    }
+
+    // Last resort: return an error instead of panicking
+    Err(anyhow::anyhow!(
+        "Could not extract coach feedback from session: {}\n\
+         Log file path: {:?}\n\
+         Log file exists: {}\n\
+         Coach result response length: {} chars",
+        session_id,
+        log_file_path,
+        log_file_path.exists(),
+        coach_result.response.len()
+    ))
+}
+
+/// Helper function to extract feedback from a session log file
+/// Looks for the last assistant message with substantial text content
+fn try_extract_feedback_from_log(
+    log_file_path: &std::path::Path,
+    _output: &SimpleOutput,
+) -> Option<String> {
+    if !log_file_path.exists() {
+        return None;
+    }
+
+    let log_content = std::fs::read_to_string(log_file_path).ok()?;
+    let log_json: serde_json::Value = serde_json::from_str(&log_content).ok()?;
+
+    let messages = log_json
+        .get("context_window")?
+        .get("conversation_history")?
+        .as_array()?;
+
+    // Search backwards for the last assistant message with text content
+    for msg in messages.iter().rev() {
+        let role = msg.get("role").and_then(|v| v.as_str())?;
+
+        if role.eq_ignore_ascii_case("assistant") {
+            if let Some(content) = msg.get("content") {
+                // Handle string content
+                if let Some(content_str) = content.as_str() {
+                    let trimmed = content_str.trim();
+                    // Skip empty or very short responses (likely just tool calls)
+                    if !trimmed.is_empty() && trimmed.len() > 10 {
+                        return Some(trimmed.to_string());
+                    }
+                }
+                // Handle array content (native tool calling format)
+                if let Some(content_array) = content.as_array() {
+                    for block in content_array {
+                        if block.get("type").and_then(|v| v.as_str()) == Some("text") {
+                            if let Some(text) = block.get("text").and_then(|v| v.as_str()) {
+                                let trimmed = text.trim();
+                                if !trimmed.is_empty() && trimmed.len() > 10 {
+                                    return Some(trimmed.to_string());
                                }
                            }
                        }
@@ -244,18 +245,7 @@ fn extract_coach_feedback_from_logs(
        }
    }

-    // If we couldn't extract from logs, panic with detailed error
-    panic!(
-        "CRITICAL: Could not extract coach feedback from session: {}\n\
-         Log file path: {:?}\n\
-         Log file exists: {}\n\
-         This indicates the coach did not call final_output tool or the log is corrupted.\n\
-         Coach result response length: {} chars",
-        session_id,
-        log_file_path,
-        log_file_path.exists(),
-        coach_result.response.len()
-    );
+    None
 }

 use clap::Parser;
@@ -1492,7 +1482,7 @@ async fn run_interactive<W: UiWriter>(
            "   Context: {:.1}% used",
            continuation.context_percentage
        ));
-        if let Some(ref summary) = continuation.final_output_summary {
+        if let Some(ref summary) = continuation.summary {
            let preview: String = summary.chars().take(80).collect();
            output.print(&format!("   Last output: {}...", preview));
        }
@@ -2614,16 +2604,16 @@ Review the current state of the project and provide a concise critique focusing
 5. Use UI tools such as webdriver to test functionality thoroughly

 CRITICAL INSTRUCTIONS:
-1. You MUST use the final_output tool to provide your feedback
-2. The summary in final_output should be CONCISE and ACTIONABLE
+1. Provide your feedback as your final response message
+2. Your feedback should be CONCISE and ACTIONABLE
 3. Focus ONLY on what needs to be fixed or improved
-4. Do NOT include your analysis process, file contents, or compilation output in the summary
+4. Do NOT include your analysis process, file contents, or compilation output in your final feedback

 If the implementation thoroughly meets all requirements, compiles and is fully tested (especially UI flows) *WITHOUT* minor gaps or errors:
- Call final_output with summary: 'IMPLEMENTATION_APPROVED'
+- Respond with: 'IMPLEMENTATION_APPROVED'

 If improvements are needed:
- Call final_output with a brief summary listing ONLY the specific issues to fix
+- Respond with a brief summary listing ONLY the specific issues to fix

 Remember: Be clear in your review and concise in your feedback. APPROVE iff the implementation works and thoroughly fits the requirements (implementation > 95% complete). Be rigorous, especially by testing that all UI features work.",
            requirements
--- a/crates/g3-cli/src/machine_ui_writer.rs
+++ b/crates/g3-cli/src/machine_ui_writer.rs
@@ -108,8 +108,4 @@ impl UiWriter for MachineUiWriter {
        0
    }

-    fn print_final_output(&self, summary: &str) {
-        println!("FINAL_OUTPUT:");
-        println!("{}", summary);
-    }
 }
--- a/crates/g3-cli/src/ui_writer_impl.rs
+++ b/crates/g3-cli/src/ui_writer_impl.rs
@@ -362,36 +362,6 @@ impl UiWriter for ConsoleUiWriter {
        }
    }

-    fn print_final_output(&self, summary: &str) {
-        // Print a header separator
-        println!("\x1b[1;35m━━━ Summary ━━━\x1b[0m");
-        println!();
-        
-        // Use the same streaming markdown formatter for consistency
-        let mut skin = MadSkin::default();
-        skin.bold.set_fg(termimad::crossterm::style::Color::Green);
-        skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
-        skin.inline_code.set_fg(termimad::crossterm::style::Color::Rgb {
-            r: 216,
-            g: 177,
-            b: 114,
-        });
-        
-        let mut formatter = StreamingMarkdownFormatter::new(skin);
-        
-        // Process the entire summary through the formatter
-        let formatted = formatter.process(summary);
-        print!("{}", formatted);
-        
-        // Flush any remaining buffered content
-        let remaining = formatter.finish();
-        print!("{}", remaining);
-        let _ = io::stdout().flush();
-
-        // Print a footer separator
-        println!();
-        println!("\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
-    }

    fn filter_json_tool_calls(&self, content: &str) -> String {
        // Apply JSON tool call filtering for display
--- a/crates/g3-cli/tests/test_final_output.rs
+++ b/crates/g3-cli/tests/test_final_output.rs
@@ -1,178 +0,0 @@
-//! Quick test to verify final_output rendering works with streaming markdown
-//! Run with: cargo test -p g3-cli --test test_final_output -- --nocapture
-
-use std::io::{self, Write};
-
-#[test]
-fn test_final_output_visual() {
-    use g3_cli::streaming_markdown::StreamingMarkdownFormatter;
-    use termimad::MadSkin;
-
-    // Create the test markdown
-    let test_markdown = r##"# Task Completed Successfully
-
-Here's a summary of what was accomplished:
-
-## Rust Code Example
-
-Created a new function to handle user authentication:
-
-```rust
-use std::collections::HashMap;
-
-/// Authenticates a user with the given credentials
-pub async fn authenticate(username: &str, password: &str) -> Result<User, AuthError> {
-    let hash = hash_password(password)?;
-    
-    if let Some(user) = db.find_user(username).await? {
-        if user.password_hash == hash {
-            Ok(user)
-        } else {
-            Err(AuthError::InvalidPassword)
-        }
-    } else {
-        Err(AuthError::UserNotFound)
-    }
-}
-```
-
-## Python Example
-
-Also added a Python script for data processing:
-
-```python
-import pandas as pd
-from typing import List, Dict
-
-def process_data(items: List[Dict]) -> pd.DataFrame:
-    """Process raw items into a cleaned DataFrame."""
-    df = pd.DataFrame(items)
-    df['timestamp'] = pd.to_datetime(df['timestamp'])
-    df = df.dropna(subset=['value'])
-    return df.sort_values('timestamp')
-```
-
-## JavaScript/TypeScript
-
-Frontend component:
-
-```typescript
-interface User {
-  id: string;
-  name: string;
-  email: string;
-}
-
-const UserCard: React.FC<{ user: User }> = ({ user }) => {
-  return (
-    <div className="user-card">
-      <h3>{user.name}</h3>
-      <p>{user.email}</p>
-    </div>
-  );
-};
-```
-
-## Shell Commands
-
-Deployment script:
-
-```bash
-#!/bin/bash
-set -euo pipefail
-
-echo "Building project..."
-cargo build --release
-
-echo "Running tests..."
-cargo test --all
-
-echo "Deploying to production..."
-rsync -avz ./target/release/app server:/opt/app/
-```
-
-## JSON Configuration
-
-```json
-{
-  "name": "my-project",
-  "version": "1.0.0",
-  "dependencies": {
-    "serde": "1.0",
-    "tokio": { "version": "1.0", "features": ["full"] }
-  }
-}
-```
-
-## Other Markdown Features
-
-This section tests that **bold text**, *italic text*, and `inline code` still work correctly.
-
-### Lists
-
- First item
- Second item with **bold**
- Third item with `code`
-
-### Numbered List
-
-1. Step one
-2. Step two
-3. Step three
-
-### Blockquote
-
-> This is a blockquote that should be rendered
-> with proper styling by termimad.
-
-### Table
-
-| Language | Extension | Use Case |
-|----------|-----------|----------|
-| Rust | .rs | Systems |
-| Python | .py | Scripts |
-| TypeScript | .ts | Frontend |
-
-## Code Without Language
-
-```
-This is a code block without a language specified.
-It should still be rendered as code, just without
-syntax highlighting.
-```
-
-## Final Notes
-
-All changes have been tested and verified. The implementation:
-
- ✅ Handles multiple languages
- ✅ Preserves markdown formatting
- ✅ Works with nested structures
- ✅ Gracefully handles edge cases
-"##;
-
-    // Create a styled markdown skin (same as in print_final_output)
-    let mut skin = MadSkin::default();
-    skin.bold.set_fg(termimad::crossterm::style::Color::Green);
-    skin.italic.set_fg(termimad::crossterm::style::Color::Cyan);
-    skin.inline_code.set_fg(termimad::crossterm::style::Color::Rgb {
-        r: 216,
-        g: 177,
-        b: 114,
-    });
-
-    // Print header
-    println!("\n\x1b[1;35m━━━ Summary ━━━\x1b[0m\n");
-
-    // Use the streaming markdown formatter (same as print_final_output now uses)
-    let mut formatter = StreamingMarkdownFormatter::new(skin);
-    let formatted = formatter.process(test_markdown);
-    print!("{}", formatted);
-    let remaining = formatter.finish();
-    print!("{}", remaining);
-
-    // Print footer
-    println!("\n\x1b[1;35m━━━━━━━━━━━━━━━\x1b[0m");
-
-    let _ = io::stdout().flush();
-}