final output fix for auto mode

2025-10-09 09:21:19 +11:00
parent cd489fb235
commit 9d1eef82b9
1 changed files with 56 additions and 16 deletions
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -690,8 +690,8 @@ async fn run_autonomous(
            )
        } else {
            format!(
-                "You are G3 in implementation mode. You need to address the coach's feedback and improve your implementation.\n\nORIGINAL REQUIREMENTS:\n{}\n\nCOACH FEEDBACK TO ADDRESS:\n{}\n\nPlease make the necessary improvements to address the coach's feedback while ensuring all original requirements are met.",
+                "You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.",
-                requirements, coach_feedback
+                coach_feedback, requirements
            )
        };
@@ -731,7 +731,7 @@ async fn run_autonomous(
        // Coach mode: critique the implementation
        let coach_prompt = format!(
-            "You are G3 in coach mode. Your role is to critique and review implementations against requirements.
+            "You are G3 in coach mode. Your role is to critique and review implementations against requirements and provide concise, actionable feedback.
 REQUIREMENTS:
 {}
@@ -743,12 +743,19 @@ Review the current state of the project and provide a concise critique focusing
 3. What requirements are missing or incorrect
 4. Specific improvements needed to satisfy requirements
-IMPORTANT: You MUST use the final_output tool to provide your feedback.
+CRITICAL INSTRUCTIONS:
 1. You MUST use the final_output tool to provide your feedback
 2. The summary in final_output should be CONCISE and ACTIONABLE
 3. Focus ONLY on what needs to be fixed or improved
 4. Do NOT include your analysis process, file contents, or compilation output in the summary
-If the implementation correctly meets all requirements, call final_output with summary: 'IMPLEMENTATION_APPROVED'
+If the implementation correctly meets all requirements and compiles without errors:
-If improvements are needed, call final_output with specific actionable feedback as the summary.
+- Call final_output with summary: 'IMPLEMENTATION_APPROVED'
-Be thorough but don't be overly critical. APPROVE the implementation if it doesn't have compile errors, glaring omissions and generally fits the bill.",
+If improvements are needed:
 - Call final_output with a brief summary listing ONLY the specific issues to fix
 Remember: Be thorough in your review but concise in your feedback. APPROVE if the implementation works and generally fits the requirements.",
            requirements
        );
@@ -760,20 +767,53 @@ Be thorough but don't be overly critical. APPROVE the implementation if it doesn
        output.print("🎓 Coach review completed");
        // Extract the actual feedback text from the coach result
-        // The coach_result might contain timing information at the end (⏱️ ... | 💭 ...)
+        // IMPORTANT: We only want the final_output summary, not the entire conversation
-        // We need to extract just the feedback content
+        // The coach_result contains the full conversation including file reads, analysis, etc.
-        let coach_feedback_text = if let Some(timing_pos) = coach_result.rfind("\n⏱️") {
+        // We need to extract ONLY the final_output content
-            coach_result[..timing_pos].trim().to_string()
+        
        let coach_feedback_text = {
            // Look for the final_output content in the coach's response
            // In autonomous mode, the final_output is returned without the "=> " prefix
            // The coach result should end with the summary content from final_output
            // First, remove any timing information at the end
            let content_without_timing = if let Some(timing_pos) = coach_result.rfind("\n⏱️") {
                &coach_result[..timing_pos]
            } else {
-            coach_result.trim().to_string()
+                &coach_result
            };
            // The final_output content is typically the last substantial text in the response
            // after all tool executions. Look for it after the last tool execution marker
            // or take the last paragraph if no clear markers
            // Split by double newlines to find the last substantial block
            let blocks: Vec<&str> = content_without_timing.split("\n\n").collect();
            // Find the last non-empty block that isn't just whitespace
            let final_block = blocks.iter()
                .rev()
                .find(|block| !block.trim().is_empty())
                .map(|block| block.trim().to_string())
                .unwrap_or_else(|| {
                    // Fallback: if we can't find a clear block, take the whole thing
                    // but this shouldn't happen if the coach properly calls final_output
                    content_without_timing.trim().to_string()
                });
            final_block
        };
        // Log the size of the feedback for debugging
        info!(
            "Coach feedback extracted: {} characters (from {} total)",
            coach_feedback_text.len(),
            coach_result.len()
        );
        // Check if we got empty feedback (this can happen if the coach doesn't call final_output)
        if coach_feedback_text.is_empty() {
            output.print("⚠️ Coach did not provide feedback. This may be a model issue.");
            output.print("   Retrying with a more explicit prompt...");
            // For now, we'll treat empty feedback as needing improvements
            coach_feedback = "The implementation needs review. Please ensure all requirements are met and the code compiles without errors.".to_string();
            turn += 1;
            continue;