coach mode feedback fix

2025-10-11 16:13:22 +11:00
parent f42e43a0d6
commit 05c21b61df
3 changed files with 89 additions and 5 deletions
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -1238,8 +1238,8 @@ Remember: Be thorough in your review but concise in your feedback. APPROVE if th
        // We have a valid coach result, process it
        let coach_result = coach_result_opt.unwrap();

-        // Extract the coach feedback using the semantic extraction from TaskResult
-        let coach_feedback_text = coach_result.extract_last_block();
+        // Extract the complete coach feedback from final_output
+        let coach_feedback_text = coach_result.extract_final_output();

        // Log the size of the feedback for debugging
        info!(
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1500,14 +1500,27 @@ The tool will execute immediately and you'll receive the result (success or erro
                            if tool_call.tool != "final_output" {
                                let output_lines: Vec<&str> = tool_result.lines().collect();
                                const MAX_LINES: usize = 5;
+                                const MAX_LINE_WIDTH: usize = 80;

                                if output_lines.len() <= MAX_LINES {
                                    for line in output_lines {
-                                        self.ui_writer.print_tool_output_line(line);
+                                        // Clip line to max width
+                                        let clipped_line = if line.len() > MAX_LINE_WIDTH {
+                                            format!("{}...", &line[..MAX_LINE_WIDTH.saturating_sub(3)])
+                                        } else {
+                                            line.to_string()
+                                        };
+                                        self.ui_writer.print_tool_output_line(&clipped_line);
                                    }
                                } else {
                                    for line in output_lines.iter().take(MAX_LINES) {
-                                        self.ui_writer.print_tool_output_line(line);
+                                        // Clip line to max width
+                                        let clipped_line = if line.len() > MAX_LINE_WIDTH {
+                                            format!("{}...", &line[..MAX_LINE_WIDTH.saturating_sub(3)])
+                                        } else {
+                                            line.to_string()
+                                        };
+                                        self.ui_writer.print_tool_output_line(&clipped_line);
                                    }
                                    let hidden_count = output_lines.len() - MAX_LINES;
                                    self.ui_writer.print_tool_output_summary(hidden_count);
--- a/crates/g3-core/src/task_result.rs
+++ b/crates/g3-core/src/task_result.rs
@@ -17,6 +17,42 @@ impl TaskResult {
        }
    }

+    /// Extract the final_output content from the response (for coach feedback in autonomous mode)
+    /// This looks for the complete final_output content, not just the last block
+    pub fn extract_final_output(&self) -> String {
+        // Remove any timing information at the end
+        let content_without_timing = if let Some(timing_pos) = self.response.rfind("\n⏱️") {
+            &self.response[..timing_pos]
+        } else {
+            &self.response
+        };
+
+        // Look for the final_output marker pattern
+        // The final_output content typically appears after the tool is called
+        // and is the substantive content that follows
+        
+        // First, try to find if there's a clear final_output section
+        // This would be the content after the last tool execution
+        if let Some(final_output_pos) = content_without_timing.rfind("final_output") {
+            // Find the content that follows the final_output call
+            // Skip past the tool call line and any immediate formatting
+            if let Some(content_start) = content_without_timing[final_output_pos..].find('\n') {
+                let start_pos = final_output_pos + content_start + 1;
+                let final_content = &content_without_timing[start_pos..];
+                
+                // Trim and return the complete content
+                let trimmed = final_content.trim();
+                if !trimmed.is_empty() {
+                    return trimmed.to_string();
+                }
+            }
+        }
+        
+        // Fallback to the original extract_last_block behavior if we can't find final_output
+        // This maintains backward compatibility
+        self.extract_last_block()
+    }
+
    /// Extract the last block from the response (for coach feedback in autonomous mode)
    /// This looks for the final_output content which is the last substantial block
    pub fn extract_last_block(&self) -> String {
@@ -43,7 +79,7 @@ impl TaskResult {

    /// Check if the response contains an approval (for autonomous mode)
    pub fn is_approved(&self) -> bool {
-        self.extract_last_block().contains("IMPLEMENTATION_APPROVED")
+        self.extract_final_output().contains("IMPLEMENTATION_APPROVED")
    }
 }

@@ -94,4 +130,39 @@ mod tests {
        let result = TaskResult::new(multiple_empty, context_window);
        assert_eq!(result.extract_last_block(), "Some content");
    }
+
+    #[test]
+    fn test_extract_final_output() {
+        let context_window = ContextWindow::new(1000);
+        
+        // Test case 1: Response with final_output tool call
+        let response_with_final_output = "Analyzing files...\n\nCalling final_output\n\nThis is the complete feedback\nwith multiple lines\nand important details\n\n⏱️ 2.3s".to_string();
+        let result = TaskResult::new(response_with_final_output, context_window.clone());
+        assert_eq!(result.extract_final_output(), "This is the complete feedback\nwith multiple lines\nand important details");
+        
+        // Test case 2: Response with IMPLEMENTATION_APPROVED in final_output
+        let response_approved = "Review complete\n\nfinal_output called\n\nIMPLEMENTATION_APPROVED".to_string();
+        let result = TaskResult::new(response_approved, context_window.clone());
+        assert_eq!(result.extract_final_output(), "IMPLEMENTATION_APPROVED");
+        assert!(result.is_approved());
+        
+        // Test case 3: Response with detailed feedback in final_output
+        let response_feedback = "Checking implementation...\n\nfinal_output\n\nThe following issues need to be addressed:\n1. Missing error handling in main.rs\n2. Tests are not comprehensive\n3. Documentation needs improvement\n\nPlease fix these issues.".to_string();
+        let result = TaskResult::new(response_feedback, context_window.clone());
+        let extracted = result.extract_final_output();
+        assert!(extracted.contains("The following issues need to be addressed:"));
+        assert!(extracted.contains("1. Missing error handling"));
+        assert!(extracted.contains("Please fix these issues."));
+        assert!(!result.is_approved());
+        
+        // Test case 4: Response without final_output (fallback to extract_last_block)
+        let response_no_final_output = "Some analysis\n\nFinal thoughts here".to_string();
+        let result = TaskResult::new(response_no_final_output, context_window.clone());
+        assert_eq!(result.extract_final_output(), "Final thoughts here");
+        
+        // Test case 5: Empty response
+        let empty_response = "".to_string();
+        let result = TaskResult::new(empty_response, context_window);
+        assert_eq!(result.extract_final_output(), "");
+    }
 }