make sure coach feedback is extracted.

actually record coach feedback and use it
2025-12-02 22:00:58 +11:00 · 2025-12-02 21:23:50 +11:00
5 changed files with 444 additions and 44 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1377,6 +1377,7 @@ dependencies = [
 "serde",
 "serde_json",
 "sha2",
 "tempfile",
 "termimad",
 "tokio",
 "tokio-util",
--- a/crates/g3-cli/Cargo.toml
+++ b/crates/g3-cli/Cargo.toml
@@ -27,3 +27,6 @@ chrono = { version = "0.4", features = ["serde"] }
 crossterm = "0.29.0"
 ratatui = "0.29"
 termimad = "0.34.0"
 [dev-dependencies]
 tempfile = "3.8"
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -163,15 +163,66 @@ fn extract_coach_feedback_from_logs(
                if let Some(context_window) = log_json.get("context_window") {
                    if let Some(conversation_history) = context_window.get("conversation_history") {
                        if let Some(messages) = conversation_history.as_array() {
-                            // Simply get the last message content - this is the coach's final feedback
+                            // Go backwards through the conversation to find the last tool result
-                            if let Some(last_message) = messages.last() {
+                            // that corresponds to a final_output tool call
-                                if let Some(content) = last_message.get("content") {
+                            for i in (0..messages.len()).rev() {
-                                    if let Some(content_str) = content.as_str() {
+                                let msg = &messages[i];
-                                        output.print(&format!(
+                                
-                                            "✅ Extracted coach feedback from session: {}",
+                                // Check if this is a User message with "Tool result:"
-                                            session_id
+                                if let Some(role) = msg.get("role") {
-                                        ));
+                                    if let Some(role_str) = role.as_str() {
-                                        return Ok(content_str.to_string());
+                                        if role_str == "User" || role_str == "user" {
                                            if let Some(content) = msg.get("content") {
                                                if let Some(content_str) = content.as_str() {
                                                    if content_str.starts_with("Tool result:") {
                                                        // Found a tool result, now check the preceding message
                                                        // to verify it was a final_output tool call
                                                        if i > 0 {
                                                            let prev_msg = &messages[i - 1];
                                                            if let Some(prev_role) = prev_msg.get("role") {
                                                                if let Some(prev_role_str) = prev_role.as_str() {
                                                                    if prev_role_str == "assistant" || prev_role_str == "Assistant" {
                                                                        if let Some(prev_content) = prev_msg.get("content") {
                                                                            if let Some(prev_content_str) = prev_content.as_str() {
                                                                                // Check if the previous assistant message contains a final_output tool call
                                                                                if prev_content_str.contains("\"tool\": \"final_output\"") {
                                                                                    // This is a final_output tool result
                                                                                    let feedback = if content_str.starts_with("Tool result: ") {
                                                                                        content_str.strip_prefix("Tool result: ")
                                                                                            .unwrap_or(content_str)
                                                                                            .to_string()
                                                                                    } else {
                                                                                        content_str.to_string()
                                                                                    };
                                                                                    output.print(&format!(
                                                                                        "Coach feedback extracted: {} characters (from {} total)",
                                                                                        feedback.len(),
                                                                                        content_str.len()
                                                                                    ));
                                                                                    output.print(&format!("Coach feedback:\n{}", feedback));
                                                                                    output.print(&format!(
                                                                                        "✅ Extracted coach feedback from session: {} (verified final_output tool)",
                                                                                        session_id
                                                                                    ));
                                                                                    return Ok(feedback);
                                                                                } else {
                                                                                    output.print(&format!(
                                                                                        "⚠️  Skipping tool result at index {} - not a final_output tool call",
                                                                                        i
                                                                                    ));
                                                                                }
                                                                            }
                                                                        }
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
@@ -187,7 +238,7 @@ fn extract_coach_feedback_from_logs(
        "CRITICAL: Could not extract coach feedback from session: {}\n\
         Log file path: {:?}\n\
         Log file exists: {}\n\
-         This indicates the coach did not call any tool or the log is corrupted.\n\
+         This indicates the coach did not call final_output tool or the log is corrupted.\n\
         Coach result response length: {} chars",
        session_id,
        log_file_path,
--- a/crates/g3-cli/tests/coach_feedback_extraction_test.rs
+++ b/crates/g3-cli/tests/coach_feedback_extraction_test.rs
@@ -0,0 +1,336 @@
 use serde_json::json;
 use std::fs;
 use tempfile::TempDir;
 #[test]
 fn test_extract_coach_feedback_with_timing_message() {
    // Create a temporary directory for logs
    let temp_dir = TempDir::new().unwrap();
    let logs_dir = temp_dir.path().join("logs");
    fs::create_dir(&logs_dir).unwrap();
    // Create a mock session log with the problematic conversation history
    // where timing message appears after the tool result
    let session_id = "test_session_123";
    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
    let log_content = json!({
        "session_id": session_id,
        "context_window": {
            "conversation_history": [
                {
                    "role": "assistant",
                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"IMPLEMENTATION_APPROVED\"}}"
                },
                {
                    "role": "user",
                    "content": "Tool result: IMPLEMENTATION_APPROVED"
                },
                {
                    "role": "assistant",
                    "content": "🕝 27.7s | 💭 7.5s"
                }
            ]
        }
    });
    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
    // Now test the extraction logic
    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
    if let Some(context_window) = log_json.get("context_window") {
        if let Some(conversation_history) = context_window.get("conversation_history") {
            if let Some(messages) = conversation_history.as_array() {
                // This is the key logic we're testing - find the last USER message with "Tool result:"
                let last_tool_result = messages.iter().rev().find(|msg| {
                    if let Some(role) = msg.get("role") {
                        if let Some(role_str) = role.as_str() {
                            if role_str == "User" || role_str == "user" {
                                if let Some(content) = msg.get("content") {
                                    if let Some(content_str) = content.as_str() {
                                        return content_str.starts_with("Tool result:");
                                    }
                                }
                            }
                        }
                    }
                    false
                });
                // Verify we found the correct message
                assert!(last_tool_result.is_some(), "Should find the tool result message");
                if let Some(last_message) = last_tool_result {
                    if let Some(content) = last_message.get("content") {
                        if let Some(content_str) = content.as_str() {
                            let feedback = if content_str.starts_with("Tool result: ") {
                                content_str.strip_prefix("Tool result: ").unwrap_or(content_str)
                            } else {
                                content_str
                            };
                            // Verify we extracted the correct feedback
                            assert_eq!(feedback, "IMPLEMENTATION_APPROVED", "Should extract the actual feedback, not timing");
                            // Verify the feedback is NOT the timing message
                            assert!(!feedback.contains("🕝"), "Feedback should not be the timing message");
                            println!("✅ Successfully extracted coach feedback: {}", feedback);
                            return;
                        }
                    }
                }
            }
        }
    }
    panic!("Failed to extract coach feedback");
 }
 #[test]
 fn test_extract_only_final_output_tool_results() {
    // Test that we only extract tool results from final_output, not from other tools
    let temp_dir = TempDir::new().unwrap();
    let logs_dir = temp_dir.path().join("logs");
    fs::create_dir(&logs_dir).unwrap();
    let session_id = "test_session_final_output_only";
    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
    let log_content = json!({
        "session_id": session_id,
        "context_window": {
            "conversation_history": [
                {
                    "role": "assistant",
                    "content": "{\"tool\": \"shell\", \"args\": {\"command\":\"ls\"}}"
                },
                {
                    "role": "user",
                    "content": "Tool result: file1.txt\nfile2.txt"
                },
                {
                    "role": "assistant",
                    "content": "{\"tool\": \"read_file\", \"args\": {\"file_path\":\"test.txt\"}}"
                },
                {
                    "role": "user",
                    "content": "Tool result: This is test content"
                },
                {
                    "role": "assistant",
                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"APPROVED_RESULT\"}}"
                },
                {
                    "role": "user",
                    "content": "Tool result: APPROVED_RESULT"
                },
                {
                    "role": "assistant",
                    "content": "🕝 20.5s | 💭 5.2s"
                }
            ]
        }
    });
    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
    // Test the new extraction logic that verifies the tool is final_output
    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
    if let Some(context_window) = log_json.get("context_window") {
        if let Some(conversation_history) = context_window.get("conversation_history") {
            if let Some(messages) = conversation_history.as_array() {
                // Go backwards through messages to find final_output tool result
                for i in (0..messages.len()).rev() {
                    let msg = &messages[i];
                    if let Some(role) = msg.get("role") {
                        if let Some(role_str) = role.as_str() {
                            if role_str == "User" || role_str == "user" {
                                if let Some(content) = msg.get("content") {
                                    if let Some(content_str) = content.as_str() {
                                        if content_str.starts_with("Tool result:") {
                                            // Check if preceding message was final_output
                                            if i > 0 {
                                                let prev_msg = &messages[i - 1];
                                                if let Some(prev_content) = prev_msg.get("content") {
                                                    if let Some(prev_content_str) = prev_content.as_str() {
                                                        if prev_content_str.contains("\"tool\": \"final_output\"") {
                                                            let feedback = content_str.strip_prefix("Tool result: ").unwrap_or(content_str);
                                                            assert_eq!(feedback, "APPROVED_RESULT", "Should extract only final_output result");
                                                            println!("✅ Correctly extracted only final_output tool result: {}", feedback);
                                                            return;
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    panic!("Failed to extract final_output tool result");
 }
 #[test]
 fn test_extract_coach_feedback_without_timing_message() {
    // Create a temporary directory for logs
    let temp_dir = TempDir::new().unwrap();
    let logs_dir = temp_dir.path().join("logs");
    fs::create_dir(&logs_dir).unwrap();
    // Test the case where there's no timing message (backward compatibility)
    let session_id = "test_session_456";
    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
    let log_content = json!({
        "session_id": session_id,
        "context_window": {
            "conversation_history": [
                {
                    "role": "assistant",
                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"TEST_FEEDBACK\"}}"
                },
                {
                    "role": "user",
                    "content": "Tool result: TEST_FEEDBACK"
                }
            ]
        }
    });
    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
    // Test extraction
    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
    if let Some(context_window) = log_json.get("context_window") {
        if let Some(conversation_history) = context_window.get("conversation_history") {
            if let Some(messages) = conversation_history.as_array() {
                let last_tool_result = messages.iter().rev().find(|msg| {
                    if let Some(role) = msg.get("role") {
                        if let Some(role_str) = role.as_str() {
                            if role_str == "User" || role_str == "user" {
                                if let Some(content) = msg.get("content") {
                                    if let Some(content_str) = content.as_str() {
                                        return content_str.starts_with("Tool result:");
                                    }
                                }
                            }
                        }
                    }
                    false
                });
                assert!(last_tool_result.is_some());
                if let Some(last_message) = last_tool_result {
                    if let Some(content) = last_message.get("content") {
                        if let Some(content_str) = content.as_str() {
                            let feedback = content_str.strip_prefix("Tool result: ").unwrap_or(content_str);
                            assert_eq!(feedback, "TEST_FEEDBACK");
                            println!("✅ Successfully extracted coach feedback without timing: {}", feedback);
                            return;
                        }
                    }
                }
            }
        }
    }
    panic!("Failed to extract coach feedback");
 }
 #[test]
 fn test_extract_coach_feedback_with_multiple_tool_results() {
    // Test that we get the LAST tool result when there are multiple
    let temp_dir = TempDir::new().unwrap();
    let logs_dir = temp_dir.path().join("logs");
    fs::create_dir(&logs_dir).unwrap();
    let session_id = "test_session_789";
    let log_file_path = logs_dir.join(format!("g3_session_{}.json", session_id));
    let log_content = json!({
        "session_id": session_id,
        "context_window": {
            "conversation_history": [
                {
                    "role": "assistant",
                    "content": "{\"tool\": \"shell\", \"args\": {\"command\":\"ls\"}}"
                },
                {
                    "role": "user",
                    "content": "Tool result: file1.txt\nfile2.txt"
                },
                {
                    "role": "assistant",
                    "content": "{\"tool\": \"final_output\", \"args\": {\"summary\":\"FINAL_RESULT\"}}"
                },
                {
                    "role": "user",
                    "content": "Tool result: FINAL_RESULT"
                },
                {
                    "role": "assistant",
                    "content": "🕝 15.2s | 💭 3.1s"
                }
            ]
        }
    });
    fs::write(&log_file_path, serde_json::to_string_pretty(&log_content).unwrap()).unwrap();
    // Test extraction
    let log_content_str = fs::read_to_string(&log_file_path).unwrap();
    let log_json: serde_json::Value = serde_json::from_str(&log_content_str).unwrap();
    if let Some(context_window) = log_json.get("context_window") {
        if let Some(conversation_history) = context_window.get("conversation_history") {
            if let Some(messages) = conversation_history.as_array() {
                let last_tool_result = messages.iter().rev().find(|msg| {
                    if let Some(role) = msg.get("role") {
                        if let Some(role_str) = role.as_str() {
                            if role_str == "User" || role_str == "user" {
                                if let Some(content) = msg.get("content") {
                                    if let Some(content_str) = content.as_str() {
                                        return content_str.starts_with("Tool result:");
                                    }
                                }
                            }
                        }
                    }
                    false
                });
                assert!(last_tool_result.is_some());
                if let Some(last_message) = last_tool_result {
                    if let Some(content) = last_message.get("content") {
                        if let Some(content_str) = content.as_str() {
                            let feedback = content_str.strip_prefix("Tool result: ").unwrap_or(content_str);
                            // Should get the LAST tool result (final_output), not the first one (shell)
                            assert_eq!(feedback, "FINAL_RESULT", "Should extract the last tool result");
                            assert!(!feedback.contains("file1.txt"), "Should not extract earlier tool results");
                            println!("✅ Successfully extracted last tool result: {}", feedback);
                            return;
                        }
                    }
                }
            }
        }
    }
    panic!("Failed to extract coach feedback");
 }
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -3604,40 +3604,6 @@ impl<W: UiWriter> Agent<W> {
                                }
                            }
                            // Check if this was a final_output tool call
                            if tool_call.tool == "final_output" {
                                // The summary was already displayed via print_final_output
                                // Don't add it to full_response to avoid duplicate printing
                                // full_response is intentionally left empty/unchanged
                                self.ui_writer.println("");
                                let _ttft =
                                    first_token_time.unwrap_or_else(|| stream_start.elapsed());
                                // Add timing if needed
                                let final_response = if show_timing {
                                    format!(
                                        "🕝 {} | 💭 {}",
                                        Self::format_duration(stream_start.elapsed()),
                                        Self::format_duration(_ttft)
                                    )
                                } else {
                                    // Return empty string since content was already displayed
                                    String::new()
                                };
                                return Ok(TaskResult::new(
                                    final_response,
                                    self.context_window.clone(),
                                ));
                            }
                            // Closure marker with timing
                            if tool_call.tool != "final_output" {
                                self.ui_writer
                                    .print_tool_timing(&Self::format_duration(exec_duration));
                                self.ui_writer.print_agent_prompt();
                            }
                            // Add the tool call and result to the context window using RAW unfiltered content
                            // This ensures the log file contains the true raw content including JSON tool calls
                            let tool_message = if !raw_content_for_log.trim().is_empty() {
@@ -3701,6 +3667,43 @@ impl<W: UiWriter> Agent<W> {
                            self.context_window.add_message(tool_message);
                            self.context_window.add_message(result_message);
                            // Check if this was a final_output tool call
                            if tool_call.tool == "final_output" {
                                // Save context window BEFORE returning so the session log includes final_output
                                self.save_context_window("completed");
                                // The summary was already displayed via print_final_output
                                // Don't add it to full_response to avoid duplicate printing
                                // full_response is intentionally left empty/unchanged
                                self.ui_writer.println("");
                                let _ttft =
                                    first_token_time.unwrap_or_else(|| stream_start.elapsed());
                                // Add timing if needed
                                let final_response = if show_timing {
                                    format!(
                                        "🕝 {} | 💭 {}",
                                        Self::format_duration(stream_start.elapsed()),
                                        Self::format_duration(_ttft)
                                    )
                                } else {
                                    // Return empty string since content was already displayed
                                    String::new()
                                };
                                return Ok(TaskResult::new(
                                    final_response,
                                    self.context_window.clone(),
                                ));
                            }
                            // Closure marker with timing
                            if tool_call.tool != "final_output" {
                                self.ui_writer
                                    .print_tool_timing(&Self::format_duration(exec_duration));
                                self.ui_writer.print_agent_prompt();
                            }
                            // Update the request with the new context for next iteration
                            request.messages = self.context_window.conversation_history.clone();
@@ -3922,6 +3925,9 @@ impl<W: UiWriter> Agent<W> {
                                full_response = String::new();
                                self.ui_writer.println("");
                                // Save context window BEFORE returning
                                self.save_context_window("completed");
                                let _ttft =
                                    first_token_time.unwrap_or_else(|| stream_start.elapsed());
@@ -4060,6 +4066,9 @@ impl<W: UiWriter> Agent<W> {
                    }
                }
                // Save context window BEFORE returning
                self.save_context_window("completed");
                // Add timing if needed
                let final_response = if show_timing {
                    format!(
Author	SHA1	Message	Date
Jochen	0327a6dfdf	make sure coach feedback is extracted.	2025-12-02 22:00:58 +11:00
Jochen	928f2bfa9d	actually record coach feedback and use it	2025-12-02 21:23:50 +11:00