Document retry config location and verify planning mode logic

Add documentation for retry configuration in planning mode: - Document retry settings in .g3.toml under [agent] section - Note RetryConfig implementation in g3-core/src/retry.rs - Clarify hardcoded vs config-based retry values Verify existing retry loop and coach feedback parsing: - Confirm execute_with_retry() handles recoverable errors - Document feedback extraction source priority order - Provide manual verification steps for testing
2025-12-11 14:56:27 +11:00
parent 1a13fc5345
commit 7b47495881
9 changed files with 1375 additions and 25 deletions
--- a/crates/g3-planner/src/planner.rs
+++ b/crates/g3-planner/src/planner.rs
@@ -574,6 +574,8 @@ pub async fn run_coach_player_loop(
    requirements_content: &str,
 ) -> Result<()> {
    use g3_core::project::Project;
+    use g3_core::retry::{execute_with_retry, RetryConfig, RetryResult};
+    use g3_core::feedback_extraction::{extract_coach_feedback, FeedbackExtractionConfig};
    use g3_core::Agent;
    
    let max_turns = planner_config.max_turns;
@@ -612,7 +614,7 @@ pub async fn run_coach_player_loop(
            planner_config.quiet,
        ).await?;
        
-        let player_prompt = if coach_feedback.is_empty() {
+        let player_prompt = if coach_feedback.is_empty() || turn == 1 {
            format!(
                "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step. Write the todo list to: {}\n\nCreate all necessary files and code.",
                requirements_content,
@@ -620,19 +622,42 @@ pub async fn run_coach_player_loop(
            )
        } else {
            format!(
-                "You are G3 in implementation mode. Address the following coach feedback:\n\n{}\n\nContext requirements:\n{}\n\nFix the issues mentioned above.",
+                "You are G3 in implementation mode. Address the following coach feedback:\n\n{}\n\nOriginal requirements:\n{}\n\nFix the issues mentioned above.",
                coach_feedback,
                requirements_content
            )
        };
        
-        let player_result = player_agent
-            .execute_task_with_timing(&player_prompt, None, false, false, false, true, None)
-            .await;
+        // Execute player task with retry logic
+        let player_retry_config = RetryConfig::planning("player");
+        let player_result = execute_with_retry(
+            &mut player_agent,
+            &player_prompt,
+            &player_retry_config,
+            false, // show_prompt
+            false, // show_code
+            None,  // discovery
+            |msg| print_msg(msg),
+        ).await;
        
        match player_result {
-            Ok(result) => print_msg(&format!("✅ Player completed: {} chars response", result.response.len())),
-            Err(e) => print_msg(&format!("⚠️  Player error: {}", e)),
+            RetryResult::Success(result) => {
+                print_msg(&format!("✅ Player completed: {} chars response", result.response.len()));
+            }
+            RetryResult::MaxRetriesReached(err) => {
+                print_msg(&format!("⚠️  Player failed after max retries: {}", err));
+                // Continue to coach phase anyway to get feedback
+            }
+            RetryResult::ContextLengthExceeded(err) => {
+                print_msg(&format!("⚠️  Player context length exceeded: {}", err));
+                // Continue to next turn
+                turn += 1;
+                continue;
+            }
+            RetryResult::Panic(e) => {
+                print_msg(&format!("💥 Player panic: {}", e));
+                return Err(e);
+            }
        }
        
        // Coach phase - review implementation
@@ -648,39 +673,60 @@ pub async fn run_coach_player_loop(
        ).await?;
        
        let coach_prompt = format!(
-            "You are G3 in coach mode. Review the implementation against these requirements:\n\n{}\n\nCheck:\n1. Are requirements implemented correctly?\n2. Does the code compile?\n3. What's missing?\n\nIf COMPLETE, respond with 'IMPLEMENTATION_APPROVED'.\nOtherwise, provide specific feedback for the player to fix.",
+            "You are G3 in coach mode. Review the implementation against these requirements:\n\n{}\n\nCheck:\n1. Are requirements implemented correctly?\n2. Does the code compile?\n3. What's missing?\n\nUse the final_output tool to provide your feedback.\nIf implementation is COMPLETE, include 'IMPLEMENTATION_APPROVED' in your feedback.\nOtherwise, provide specific feedback for the player to fix.",
            requirements_content
        );
        
-        let coach_result = coach_agent
-            .execute_task_with_timing(&coach_prompt, None, false, false, false, true, None)
-            .await;
+        // Execute coach task with retry logic
+        let coach_retry_config = RetryConfig::planning("coach");
+        let coach_result = execute_with_retry(
+            &mut coach_agent,
+            &coach_prompt,
+            &coach_retry_config,
+            false, // show_prompt
+            false, // show_code
+            None,  // discovery
+            |msg| print_msg(msg),
+        ).await;
        
        match coach_result {
-            Ok(result) => {
-                if result.response.contains("IMPLEMENTATION_APPROVED") || result.is_approved() {
+            RetryResult::Success(result) => {
+                // Extract feedback using the robust extraction module
+                let feedback_config = FeedbackExtractionConfig::default();
+                let extracted = extract_coach_feedback(&result, &coach_agent, &feedback_config);
+                
+                print_msg(&format!("📝 Coach feedback extracted from {:?}: {} chars", 
+                    extracted.source, extracted.content.len()));
+                
+                // Check for approval
+                if extracted.is_approved() || result.response.contains("IMPLEMENTATION_APPROVED") {
                    print_msg("✅ Coach approved implementation!");
                    return Ok(());
                }
-                coach_feedback = result.response;
+                
+                coach_feedback = extracted.content;
+                
                // Display first 25 lines of coach feedback
                let lines: Vec<&str> = coach_feedback.lines().collect();
-                let display_lines = if lines.len() > 25 {
-                    let mut truncated: Vec<&str> = lines[..25].to_vec();
-                    truncated.push("...");
-                    truncated
-                } else {
-                    lines
-                };
-                print_msg(&format!("📝 Coach feedback ({} chars):", coach_feedback.len()));
-                for line in display_lines {
+                for line in lines.iter().take(25) {
                    print_msg(&format!("  {}", line));
                }
+                if lines.len() > 25 {
+                    print_msg("  ...");
+                }
            }
-            Err(e) => {
-                print_msg(&format!("⚠️  Coach error: {}", e));
+            RetryResult::MaxRetriesReached(err) => {
+                print_msg(&format!("⚠️  Coach failed after max retries: {}", err));
                coach_feedback = "Please review and fix any issues.".to_string();
            }
+            RetryResult::ContextLengthExceeded(err) => {
+                print_msg(&format!("⚠️  Coach context length exceeded: {}", err));
+                coach_feedback = "Context window full. Please continue with current progress.".to_string();
+            }
+            RetryResult::Panic(e) => {
+                print_msg(&format!("💥 Coach panic: {}", e));
+                return Err(e);
+            }
        }
        
        turn += 1;
--- a/crates/g3-planner/tests/retry_feedback_test.rs
+++ b/crates/g3-planner/tests/retry_feedback_test.rs
@@ -0,0 +1,208 @@
+//! Integration tests for retry logic and feedback extraction in planning mode
+//!
+//! These tests verify that the retry infrastructure and coach feedback extraction
+//! work correctly together, without requiring actual API calls.
+
+use g3_core::feedback_extraction::{ExtractedFeedback, FeedbackExtractionConfig, FeedbackSource};
+use g3_core::retry::RetryConfig;
+
+#[test]
+fn test_retry_config_for_planning_player() {
+    let config = RetryConfig::planning("player");
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}
+
+#[test]
+fn test_retry_config_for_planning_coach() {
+    let config = RetryConfig::planning("coach");
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "coach");
+}
+
+#[test]
+fn test_retry_config_with_custom_max_retries() {
+    let config = RetryConfig::planning("player").with_max_retries(6);
+    assert_eq!(config.max_retries, 6);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}
+
+#[test]
+fn test_retry_config_default() {
+    let config = RetryConfig::default();
+    assert_eq!(config.max_retries, 3);
+    assert!(!config.is_autonomous);
+    assert_eq!(config.role_name, "agent");
+}
+
+#[test]
+fn test_retry_config_player_preset() {
+    let config = RetryConfig::player();
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}
+
+#[test]
+fn test_retry_config_coach_preset() {
+    let config = RetryConfig::coach();
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "coach");
+}
+
+#[test]
+fn test_extracted_feedback_approval_detection() {
+    let approved = ExtractedFeedback::new(
+        "Great work! IMPLEMENTATION_APPROVED".to_string(),
+        FeedbackSource::NativeToolCall,
+    );
+    assert!(approved.is_approved());
+    assert!(!approved.is_fallback());
+
+    let not_approved = ExtractedFeedback::new(
+        "Please fix the issues".to_string(),
+        FeedbackSource::NativeToolCall,
+    );
+    assert!(!not_approved.is_approved());
+    assert!(!not_approved.is_fallback());
+
+    let fallback = ExtractedFeedback::new(
+        "Default feedback".to_string(),
+        FeedbackSource::DefaultFallback,
+    );
+    assert!(!fallback.is_approved());
+    assert!(fallback.is_fallback());
+}
+
+#[test]
+fn test_feedback_extraction_config_default() {
+    let config = FeedbackExtractionConfig::default();
+    assert!(!config.verbose);
+    assert!(config.logs_dir.is_none());
+    assert!(config.default_feedback.contains("review"));
+}
+
+#[test]
+fn test_feedback_extraction_config_custom() {
+    let config = FeedbackExtractionConfig {
+        verbose: true,
+        logs_dir: Some(std::path::PathBuf::from("/tmp/test_logs")),
+        default_feedback: "Custom fallback message for testing".to_string(),
+    };
+    assert!(config.verbose);
+    assert_eq!(
+        config.logs_dir,
+        Some(std::path::PathBuf::from("/tmp/test_logs"))
+    );
+    assert!(config.default_feedback.contains("Custom fallback"));
+}
+
+#[test]
+fn test_feedback_source_variants() {
+    // Verify all feedback sources are distinguishable
+    let sources = vec![
+        FeedbackSource::SessionLog,
+        FeedbackSource::NativeToolCall,
+        FeedbackSource::ConversationHistory,
+        FeedbackSource::TaskResultResponse,
+        FeedbackSource::DefaultFallback,
+    ];
+
+    for (i, source1) in sources.iter().enumerate() {
+        for (j, source2) in sources.iter().enumerate() {
+            if i == j {
+                assert_eq!(source1, source2);
+            } else {
+                assert_ne!(source1, source2);
+            }
+        }
+    }
+}
+
+#[test]
+fn test_retry_configs_for_planning_mode_are_autonomous() {
+    // Both player and coach should be marked as autonomous for planning mode
+    let player = RetryConfig::planning("player");
+    let coach = RetryConfig::planning("coach");
+
+    assert!(
+        player.is_autonomous,
+        "Player should be autonomous in planning mode"
+    );
+    assert!(
+        coach.is_autonomous,
+        "Coach should be autonomous in planning mode"
+    );
+}
+
+#[test]
+fn test_extracted_feedback_new() {
+    let feedback = ExtractedFeedback::new(
+        "Test content".to_string(),
+        FeedbackSource::SessionLog,
+    );
+    assert_eq!(feedback.content, "Test content");
+    assert_eq!(feedback.source, FeedbackSource::SessionLog);
+}
+
+#[test]
+fn test_extracted_feedback_approval_variations() {
+    // Test various approval message formats
+    let cases = vec![
+        ("IMPLEMENTATION_APPROVED", true),
+        ("IMPLEMENTATION_APPROVED - great work!", true),
+        ("All done. IMPLEMENTATION_APPROVED", true),
+        ("implementation_approved", false), // Case sensitive
+        ("APPROVED", false),                // Must be exact phrase
+        ("Please fix these issues", false),
+        ("", false),
+    ];
+
+    for (content, expected_approved) in cases {
+        let feedback = ExtractedFeedback::new(content.to_string(), FeedbackSource::SessionLog);
+        assert_eq!(
+            feedback.is_approved(),
+            expected_approved,
+            "Failed for content: '{}'",
+            content
+        );
+    }
+}
+
+#[test]
+fn test_feedback_source_fallback_detection() {
+    // Only DefaultFallback should be detected as fallback
+    let sources_and_expected = vec![
+        (FeedbackSource::SessionLog, false),
+        (FeedbackSource::NativeToolCall, false),
+        (FeedbackSource::ConversationHistory, false),
+        (FeedbackSource::TaskResultResponse, false),
+        (FeedbackSource::DefaultFallback, true),
+    ];
+
+    for (source, expected_is_fallback) in sources_and_expected {
+        let feedback = ExtractedFeedback::new("Test".to_string(), source.clone());
+        assert_eq!(
+            feedback.is_fallback(),
+            expected_is_fallback,
+            "Failed for source: {:?}",
+            source
+        );
+    }
+}
+
+#[test]
+fn test_retry_config_chaining() {
+    // Test that with_max_retries can be chained
+    let config = RetryConfig::planning("player")
+        .with_max_retries(10)
+        .with_max_retries(5);
+    
+    assert_eq!(config.max_retries, 5);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}