Document retry config location and verify planning mode logic

Add documentation for retry configuration in planning mode: - Document retry settings in .g3.toml under [agent] section - Note RetryConfig implementation in g3-core/src/retry.rs - Clarify hardcoded vs config-based retry values Verify existing retry loop and coach feedback parsing: - Confirm execute_with_retry() handles recoverable errors - Document feedback extraction source priority order - Provide manual verification steps for testing
2025-12-11 14:56:27 +11:00
parent 1a13fc5345
commit 7b47495881
9 changed files with 1375 additions and 25 deletions
--- a/crates/g3-core/src/feedback_extraction.rs
+++ b/crates/g3-core/src/feedback_extraction.rs
@@ -0,0 +1,567 @@
+//! Coach feedback extraction module
+//!
+//! This module provides robust extraction of coach feedback from various sources:
+//! - Session log files (JSON format)
+//! - Native tool calling JSON format
+//! - Conversation history
+//! - TaskResult response fallback
+//!
+//! Used by both autonomous mode (g3-cli) and planning mode (g3-planner).
+
+use crate::{logs_dir, Agent, TaskResult};
+use crate::ui_writer::UiWriter;
+use serde_json::Value;
+use std::path::PathBuf;
+use tracing::{debug, info, warn};
+
+/// Result of feedback extraction with source information
+#[derive(Debug, Clone)]
+pub struct ExtractedFeedback {
+    /// The extracted feedback text
+    pub content: String,
+    /// The source where feedback was found
+    pub source: FeedbackSource,
+}
+
+/// Source of the extracted feedback
+#[derive(Debug, Clone, PartialEq)]
+pub enum FeedbackSource {
+    /// From session log file (verified final_output tool call)
+    SessionLog,
+    /// From native tool call JSON in response
+    NativeToolCall,
+    /// From conversation history in agent
+    ConversationHistory,
+    /// From TaskResult response (fallback)
+    TaskResultResponse,
+    /// Default fallback message
+    DefaultFallback,
+}
+
+impl ExtractedFeedback {
+    /// Create a new extracted feedback
+    pub fn new(content: String, source: FeedbackSource) -> Self {
+        Self { content, source }
+    }
+
+    /// Check if the feedback indicates approval
+    pub fn is_approved(&self) -> bool {
+        self.content.contains("IMPLEMENTATION_APPROVED")
+    }
+
+    /// Check if the feedback is a fallback/default
+    pub fn is_fallback(&self) -> bool {
+        self.source == FeedbackSource::DefaultFallback
+    }
+}
+
+/// Configuration for feedback extraction
+#[derive(Debug, Clone)]
+pub struct FeedbackExtractionConfig {
+    /// Whether to print debug information
+    pub verbose: bool,
+    /// Custom logs directory (overrides default)
+    pub logs_dir: Option<PathBuf>,
+    /// Default feedback message if extraction fails
+    pub default_feedback: String,
+}
+
+impl Default for FeedbackExtractionConfig {
+    fn default() -> Self {
+        Self {
+            verbose: false,
+            logs_dir: None,
+            default_feedback: "The implementation needs review. Please ensure all requirements are met and the code compiles without errors.".to_string(),
+        }
+    }
+}
+
+/// Extract coach feedback using multiple fallback methods
+///
+/// Tries extraction in this order:
+/// 1. Session log file (most reliable for final_output tool calls)
+/// 2. Native tool call JSON in the response
+/// 3. Conversation history from the agent
+/// 4. TaskResult response parsing
+/// 5. Default fallback message
+///
+/// # Arguments
+/// * `coach_result` - The task result from coach execution
+/// * `agent` - The coach agent (for session ID and conversation history)
+/// * `config` - Extraction configuration
+///
+/// # Returns
+/// Extracted feedback with source information, never fails
+pub fn extract_coach_feedback<W>(
+    coach_result: &TaskResult,
+    agent: &Agent<W>,
+    config: &FeedbackExtractionConfig,
+) -> ExtractedFeedback
+where
+    W: UiWriter + Clone + Send + Sync + 'static,
+{
+    // Try session log first (most reliable)
+    if let Some(session_id) = agent.get_session_id() {
+        if let Some(feedback) = try_extract_from_session_log(&session_id, config) {
+            info!("Extracted coach feedback from session log: {} chars", feedback.len());
+            return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog);
+        }
+    }
+
+    // Try native tool call JSON parsing
+    if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) {
+        info!("Extracted coach feedback from native tool call: {} chars", feedback.len());
+        return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall);
+    }
+
+    // Try conversation history
+    if let Some(session_id) = agent.get_session_id() {
+        if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) {
+            info!("Extracted coach feedback from conversation history: {} chars", feedback.len());
+            return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory);
+        }
+    }
+
+    // Try TaskResult parsing
+    let extracted = coach_result.extract_final_output();
+    if !extracted.is_empty() {
+        info!("Extracted coach feedback from task result: {} chars", extracted.len());
+        return ExtractedFeedback::new(extracted, FeedbackSource::TaskResultResponse);
+    }
+
+    // Fallback to default
+    warn!("Could not extract coach feedback, using default");
+    ExtractedFeedback::new(config.default_feedback.clone(), FeedbackSource::DefaultFallback)
+}
+
+/// Try to extract feedback from session log file
+fn try_extract_from_session_log(
+    session_id: &str,
+    config: &FeedbackExtractionConfig,
+) -> Option<String> {
+    let logs_path = config.logs_dir.clone().unwrap_or_else(logs_dir);
+    let log_file_path = logs_path.join(format!("g3_session_{}.json", session_id));
+
+    if !log_file_path.exists() {
+        debug!("Session log file not found: {:?}", log_file_path);
+        return None;
+    }
+
+    let log_content = std::fs::read_to_string(&log_file_path).ok()?;
+    let log_json: Value = serde_json::from_str(&log_content).ok()?;
+
+    // Try to get conversation history from context_window
+    let messages = log_json
+        .get("context_window")?
+        .get("conversation_history")?
+        .as_array()?;
+
+    // Search backwards for final_output tool result
+    extract_final_output_from_messages(messages)
+}
+
+/// Try to extract feedback from native tool call JSON in response
+fn try_extract_from_native_tool_call(response: &str) -> Option<String> {
+    // Look for various patterns of final_output tool calls
+    
+    // Pattern 1: JSON tool call with "tool": "final_output"
+    if let Some(feedback) = try_extract_json_tool_call(response) {
+        return Some(feedback);
+    }
+
+    // Pattern 2: Anthropic-style native tool use block
+    if let Some(feedback) = try_extract_anthropic_tool_use(response) {
+        return Some(feedback);
+    }
+
+    // Pattern 3: OpenAI-style function call
+    if let Some(feedback) = try_extract_openai_function_call(response) {
+        return Some(feedback);
+    }
+
+    None
+}
+
+/// Extract JSON tool call pattern
+fn try_extract_json_tool_call(response: &str) -> Option<String> {
+    // Look for {"tool": "final_output", "args": {"summary": "..."}}
+    let mut search_pos = 0;
+    while let Some(pos) = response[search_pos..].find("\"tool\"") {
+        let actual_pos = search_pos + pos;
+        
+        // Find the start of the JSON object
+        let json_start = response[..actual_pos].rfind('{')?;
+        
+        // Try to find matching closing brace
+        if let Some(json_str) = extract_balanced_json(&response[json_start..]) {
+            if let Ok(json) = serde_json::from_str::<Value>(&json_str) {
+                if json.get("tool").and_then(|v| v.as_str()) == Some("final_output") {
+                    if let Some(args) = json.get("args") {
+                        if let Some(summary) = args.get("summary").and_then(|v| v.as_str()) {
+                            return Some(summary.to_string());
+                        }
+                    }
+                }
+            }
+        }
+        
+        search_pos = actual_pos + 1;
+    }
+    
+    None
+}
+
+/// Extract Anthropic-style tool use block
+fn try_extract_anthropic_tool_use(response: &str) -> Option<String> {
+    // Look for content_block with type "tool_use" and name "final_output"
+    if !response.contains("tool_use") || !response.contains("final_output") {
+        return None;
+    }
+
+    // Try to parse as JSON array of content blocks
+    if let Some(start) = response.find('[') {
+        if let Some(json_str) = extract_balanced_json(&response[start..]) {
+            if let Ok(blocks) = serde_json::from_str::<Vec<Value>>(&json_str) {
+                for block in blocks {
+                    if block.get("type").and_then(|v| v.as_str()) == Some("tool_use") {
+                        if block.get("name").and_then(|v| v.as_str()) == Some("final_output") {
+                            if let Some(input) = block.get("input") {
+                                if let Some(summary) = input.get("summary").and_then(|v| v.as_str()) {
+                                    return Some(summary.to_string());
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    None
+}
+
+/// Extract OpenAI-style function call
+fn try_extract_openai_function_call(response: &str) -> Option<String> {
+    // Look for function_call or tool_calls with final_output
+    if !response.contains("final_output") {
+        return None;
+    }
+
+    // Try to find function call JSON
+    if let Some(pos) = response.find("\"function_call\"") {
+        if let Some(json_start) = response[pos..].find('{') {
+            let start = pos + json_start;
+            if let Some(json_str) = extract_balanced_json(&response[start..]) {
+                if let Ok(json) = serde_json::from_str::<Value>(&json_str) {
+                    if json.get("name").and_then(|v| v.as_str()) == Some("final_output") {
+                        if let Some(args_str) = json.get("arguments").and_then(|v| v.as_str()) {
+                            if let Ok(args) = serde_json::from_str::<Value>(args_str) {
+                                if let Some(summary) = args.get("summary").and_then(|v| v.as_str()) {
+                                    return Some(summary.to_string());
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    None
+}
+
+/// Try to extract from conversation history in session log
+fn try_extract_from_conversation_history(
+    session_id: &str,
+    config: &FeedbackExtractionConfig,
+) -> Option<String> {
+    let logs_path = config.logs_dir.clone().unwrap_or_else(logs_dir);
+    let log_file_path = logs_path.join(format!("g3_session_{}.json", session_id));
+
+    if !log_file_path.exists() {
+        return None;
+    }
+
+    let log_content = std::fs::read_to_string(&log_file_path).ok()?;
+    let log_json: Value = serde_json::from_str(&log_content).ok()?;
+
+    // Check for tool_calls array in the log
+    if let Some(tool_calls) = log_json.get("tool_calls").and_then(|v| v.as_array()) {
+        // Look backwards for final_output
+        for call in tool_calls.iter().rev() {
+            if call.get("tool").and_then(|v| v.as_str()) == Some("final_output") {
+                if let Some(args) = call.get("args") {
+                    if let Some(summary) = args.get("summary").and_then(|v| v.as_str()) {
+                        return Some(summary.to_string());
+                    }
+                }
+            }
+        }
+    }
+
+    None
+}
+
+/// Extract final_output from message array
+fn extract_final_output_from_messages(messages: &[Value]) -> Option<String> {
+    // Go backwards through conversation to find the last final_output tool result
+    for i in (0..messages.len()).rev() {
+        let msg = &messages[i];
+        let role = msg.get("role").and_then(|v| v.as_str())?;
+        
+        // Check for User message with "Tool result:"
+        if role.eq_ignore_ascii_case("user") {
+            if let Some(content) = msg.get("content").and_then(|v| v.as_str()) {
+                if content.starts_with("Tool result:") {
+                    // Verify preceding message was a final_output tool call
+                    if i > 0 && is_final_output_tool_call(&messages[i - 1]) {
+                        let feedback = content
+                            .strip_prefix("Tool result: ")
+                            .or_else(|| content.strip_prefix("Tool result:"))
+                            .unwrap_or(content)
+                            .to_string();
+                        return Some(feedback);
+                    }
+                }
+            }
+        }
+        
+        // Also check for native tool results in assistant messages
+        if role.eq_ignore_ascii_case("assistant") {
+            if let Some(content) = msg.get("content") {
+                // Could be string or array (for native tool calling)
+                if let Some(content_str) = content.as_str() {
+                    if let Some(feedback) = try_extract_from_native_tool_call(content_str) {
+                        return Some(feedback);
+                    }
+                } else if let Some(content_array) = content.as_array() {
+                    for block in content_array {
+                        if block.get("type").and_then(|v| v.as_str()) == Some("tool_use") {
+                            if block.get("name").and_then(|v| v.as_str()) == Some("final_output") {
+                                if let Some(input) = block.get("input") {
+                                    if let Some(summary) = input.get("summary").and_then(|v| v.as_str()) {
+                                        return Some(summary.to_string());
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    None
+}
+
+/// Check if a message is a final_output tool call
+fn is_final_output_tool_call(msg: &Value) -> bool {
+    let role = match msg.get("role").and_then(|v| v.as_str()) {
+        Some(r) => r,
+        None => return false,
+    };
+    
+    if !role.eq_ignore_ascii_case("assistant") {
+        return false;
+    }
+    
+    if let Some(content) = msg.get("content") {
+        // Check string content
+        if let Some(content_str) = content.as_str() {
+            if content_str.contains("\"tool\": \"final_output\"") 
+               || content_str.contains("\"tool\":\"final_output\"") {
+                return true;
+            }
+        }
+        
+        // Check array content (native tool calling)
+        if let Some(content_array) = content.as_array() {
+            for block in content_array {
+                if block.get("type").and_then(|v| v.as_str()) == Some("tool_use") {
+                    if block.get("name").and_then(|v| v.as_str()) == Some("final_output") {
+                        return true;
+                    }
+                }
+            }
+        }
+    }
+    
+    // Check tool_calls field (OpenAI format)
+    if let Some(tool_calls) = msg.get("tool_calls").and_then(|v| v.as_array()) {
+        for call in tool_calls {
+            if let Some(function) = call.get("function") {
+                if function.get("name").and_then(|v| v.as_str()) == Some("final_output") {
+                    return true;
+                }
+            }
+        }
+    }
+    
+    false
+}
+
+/// Extract a balanced JSON object/array from a string
+fn extract_balanced_json(s: &str) -> Option<String> {
+    let chars: Vec<char> = s.chars().collect();
+    if chars.is_empty() {
+        return None;
+    }
+    
+    let opener = chars[0];
+    let closer = match opener {
+        '{' => '}',
+        '[' => ']',
+        _ => return None,
+    };
+    
+    let mut depth = 0;
+    let mut in_string = false;
+    let mut escape_next = false;
+    
+    for (i, &c) in chars.iter().enumerate() {
+        if escape_next {
+            escape_next = false;
+            continue;
+        }
+        
+        if c == '\\' && in_string {
+            escape_next = true;
+            continue;
+        }
+        
+        if c == '"' {
+            in_string = !in_string;
+            continue;
+        }
+        
+        if in_string {
+            continue;
+        }
+        
+        if c == opener {
+            depth += 1;
+        } else if c == closer {
+            depth -= 1;
+            if depth == 0 {
+                return Some(chars[..=i].iter().collect());
+            }
+        }
+    }
+    
+    None
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_extract_balanced_json_object() {
+        let input = r#"{"tool": "test", "args": {"key": "value"}} extra"#;
+        let result = extract_balanced_json(input);
+        assert_eq!(result, Some(r#"{"tool": "test", "args": {"key": "value"}}"#.to_string()));
+    }
+
+    #[test]
+    fn test_extract_balanced_json_array() {
+        let input = r#"[{"type": "test"}, {"type": "test2"}] extra"#;
+        let result = extract_balanced_json(input);
+        assert_eq!(result, Some(r#"[{"type": "test"}, {"type": "test2"}]"#.to_string()));
+    }
+
+    #[test]
+    fn test_extract_balanced_json_with_strings() {
+        let input = r#"{"message": "hello {world}", "count": 1}"#;
+        let result = extract_balanced_json(input);
+        assert_eq!(result, Some(input.to_string()));
+    }
+
+    #[test]
+    fn test_try_extract_json_tool_call() {
+        let response = r#"Some text {"tool": "final_output", "args": {"summary": "Test feedback"}} more text"#;
+        let result = try_extract_json_tool_call(response);
+        assert_eq!(result, Some("Test feedback".to_string()));
+    }
+
+    #[test]
+    fn test_try_extract_json_tool_call_not_final_output() {
+        let response = r#"{"tool": "shell", "args": {"command": "ls"}}"#;
+        let result = try_extract_json_tool_call(response);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_is_final_output_tool_call_string() {
+        let msg = serde_json::json!({
+            "role": "assistant",
+            "content": r#"{"tool": "final_output", "args": {"summary": "done"}}"#
+        });
+        assert!(is_final_output_tool_call(&msg));
+    }
+
+    #[test]
+    fn test_is_final_output_tool_call_native() {
+        let msg = serde_json::json!({
+            "role": "assistant",
+            "content": [{
+                "type": "tool_use",
+                "name": "final_output",
+                "input": {"summary": "done"}
+            }]
+        });
+        assert!(is_final_output_tool_call(&msg));
+    }
+
+    #[test]
+    fn test_is_final_output_tool_call_openai() {
+        let msg = serde_json::json!({
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "function": {
+                    "name": "final_output",
+                    "arguments": r#"{"summary": "done"}"#
+                }
+            }]
+        });
+        assert!(is_final_output_tool_call(&msg));
+    }
+
+    #[test]
+    fn test_extracted_feedback_is_approved() {
+        let feedback = ExtractedFeedback::new(
+            "IMPLEMENTATION_APPROVED - great work!".to_string(),
+            FeedbackSource::SessionLog,
+        );
+        assert!(feedback.is_approved());
+
+        let feedback = ExtractedFeedback::new(
+            "Please fix the following issues".to_string(),
+            FeedbackSource::SessionLog,
+        );
+        assert!(!feedback.is_approved());
+    }
+
+    #[test]
+    fn test_extracted_feedback_is_fallback() {
+        let feedback = ExtractedFeedback::new(
+            "Default message".to_string(),
+            FeedbackSource::DefaultFallback,
+        );
+        assert!(feedback.is_fallback());
+
+        let feedback = ExtractedFeedback::new(
+            "Real feedback".to_string(),
+            FeedbackSource::SessionLog,
+        );
+        assert!(!feedback.is_fallback());
+    }
+
+    #[test]
+    fn test_feedback_extraction_config_default() {
+        let config = FeedbackExtractionConfig::default();
+        assert!(!config.verbose);
+        assert!(config.logs_dir.is_none());
+        assert!(config.default_feedback.contains("review"));
+    }
+}
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1,10 +1,14 @@
 pub mod code_search;
 pub mod error_handling;
+pub mod feedback_extraction;
 pub mod project;
+pub mod retry;
 pub mod task_result;
 pub mod ui_writer;

 pub use task_result::TaskResult;
+pub use retry::{RetryConfig, RetryResult, execute_with_retry, retry_operation};
+pub use feedback_extraction::{ExtractedFeedback, FeedbackSource, FeedbackExtractionConfig, extract_coach_feedback};

 #[cfg(test)]
 mod task_result_comprehensive_tests;
--- a/crates/g3-core/src/retry.rs
+++ b/crates/g3-core/src/retry.rs
@@ -0,0 +1,356 @@
+//! Retry infrastructure for agent task execution
+//!
+//! This module provides reusable retry logic for executing agent tasks,
+//! including error classification, exponential backoff, and configurable retry strategies.
+//!
+//! Used by both autonomous mode (g3-cli) and planning mode (g3-planner).
+
+use crate::error_handling::{calculate_retry_delay, classify_error, ErrorType, RecoverableError};
+use crate::ui_writer::UiWriter;
+use crate::{Agent, DiscoveryOptions, TaskResult};
+use anyhow::Result;
+use std::time::Instant;
+use tracing::{info, warn};
+
+/// Configuration for retry behavior
+#[derive(Debug, Clone)]
+pub struct RetryConfig {
+    /// Maximum number of retry attempts
+    pub max_retries: u32,
+    /// Whether this is autonomous mode (affects backoff timing)
+    pub is_autonomous: bool,
+    /// Role name for logging (e.g., "player", "coach")
+    pub role_name: String,
+}
+
+impl Default for RetryConfig {
+    fn default() -> Self {
+        Self {
+            max_retries: 3,
+            is_autonomous: false,
+            role_name: "agent".to_string(),
+        }
+    }
+}
+
+impl RetryConfig {
+    /// Create a retry config for player agent
+    pub fn player() -> Self {
+        Self {
+            max_retries: 3,
+            is_autonomous: true,
+            role_name: "player".to_string(),
+        }
+    }
+
+    /// Create a retry config for coach agent
+    pub fn coach() -> Self {
+        Self {
+            max_retries: 3,
+            is_autonomous: true,
+            role_name: "coach".to_string(),
+        }
+    }
+
+    /// Create a retry config for planning mode
+    pub fn planning(role: &str) -> Self {
+        Self {
+            max_retries: 3,
+            is_autonomous: true,
+            role_name: role.to_string(),
+        }
+    }
+
+    /// Set custom max retries
+    pub fn with_max_retries(mut self, max_retries: u32) -> Self {
+        self.max_retries = max_retries;
+        self
+    }
+}
+
+/// Result of a retry operation
+#[derive(Debug)]
+pub enum RetryResult {
+    /// Task succeeded with result
+    Success(TaskResult),
+    /// Task failed after max retries (contains last error message)
+    MaxRetriesReached(String),
+    /// Context length exceeded - should end current turn
+    ContextLengthExceeded(String),
+    /// Panic detected - should terminate
+    Panic(anyhow::Error),
+}
+
+impl RetryResult {
+    /// Check if the result is a success
+    pub fn is_success(&self) -> bool {
+        matches!(self, RetryResult::Success(_))
+    }
+
+    /// Get the task result if successful
+    pub fn into_result(self) -> Option<TaskResult> {
+        match self {
+            RetryResult::Success(result) => Some(result),
+            _ => None,
+        }
+    }
+}
+
+/// Callback for handling context length exceeded errors
+pub type ContextExceededCallback<W> = Box<dyn FnOnce(&Agent<W>, &anyhow::Error, u32) + Send>;
+
+/// Execute an agent task with retry logic
+///
+/// This function handles:
+/// - Error classification (timeout, rate limit, server error, etc.)
+/// - Exponential backoff between retries
+/// - Context length exceeded errors (ends turn gracefully)
+/// - Panic detection (terminates execution)
+///
+/// # Arguments
+/// * `agent` - The agent to execute the task
+/// * `prompt` - The task prompt
+/// * `config` - Retry configuration
+/// * `show_prompt` - Whether to show the prompt
+/// * `show_code` - Whether to show code in output
+/// * `discovery` - Optional discovery options
+/// * `print_fn` - Function to print status messages
+///
+/// # Returns
+/// A `RetryResult` indicating success, failure, or special conditions
+pub async fn execute_with_retry<W, F>(
+    agent: &mut Agent<W>,
+    prompt: &str,
+    config: &RetryConfig,
+    show_prompt: bool,
+    show_code: bool,
+    discovery: Option<DiscoveryOptions<'_>>,
+    mut print_fn: F,
+) -> RetryResult
+where
+    W: UiWriter + Clone + Send + Sync + 'static,
+    F: FnMut(&str),
+{
+    let mut retry_count = 0;
+    let start_time = Instant::now();
+
+    loop {
+        let result = agent
+            .execute_task_with_timing(prompt, None, false, show_prompt, show_code, true, discovery.clone())
+            .await;
+
+        match result {
+            Ok(task_result) => {
+                if retry_count > 0 {
+                    info!(
+                        "{} task succeeded after {} retries (elapsed: {:?})",
+                        config.role_name,
+                        retry_count,
+                        start_time.elapsed()
+                    );
+                }
+                return RetryResult::Success(task_result);
+            }
+            Err(e) => {
+                let error_type = classify_error(&e);
+
+                // Check for context length exceeded
+                if matches!(
+                    error_type,
+                    ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)
+                ) {
+                    let msg = format!(
+                        "⚠️ Context length exceeded in {} turn: {}",
+                        config.role_name, e
+                    );
+                    print_fn(&msg);
+                    print_fn("📝 Logging error to session and ending current turn...");
+
+                    // Log to session with forensic context
+                    let forensic_context = format!(
+                        "Role: {}\nContext tokens: {}\nTotal available: {}\nPercentage used: {:.1}%\nPrompt length: {} chars\nError occurred at: {}",
+                        config.role_name,
+                        agent.get_context_window().used_tokens,
+                        agent.get_context_window().total_tokens,
+                        agent.get_context_window().percentage_used(),
+                        prompt.len(),
+                        chrono::Utc::now().to_rfc3339()
+                    );
+                    agent.log_error_to_session(&e, "assistant", Some(forensic_context));
+
+                    return RetryResult::ContextLengthExceeded(e.to_string());
+                }
+
+                // Check for panic
+                if e.to_string().contains("panic") {
+                    print_fn(&format!("💥 {} panic detected: {}", config.role_name, e));
+                    return RetryResult::Panic(e);
+                }
+
+                // Check if error is recoverable
+                match error_type {
+                    ErrorType::Recoverable(ref recoverable_type) => {
+                        retry_count += 1;
+
+                        if retry_count >= config.max_retries {
+                            let msg = format!(
+                                "🔄 Max retries ({}) reached for {}",
+                                config.max_retries, config.role_name
+                            );
+                            print_fn(&msg);
+                            return RetryResult::MaxRetriesReached(e.to_string());
+                        }
+
+                        // Calculate backoff delay
+                        let delay = calculate_retry_delay(retry_count, config.is_autonomous);
+
+                        let msg = format!(
+                            "⚠️ {} error (attempt {}/{}): {:?} - {}",
+                            config.role_name, retry_count, config.max_retries, recoverable_type, e
+                        );
+                        print_fn(&msg);
+
+                        let retry_msg = format!(
+                            "🔄 Retrying {} in {:?}...",
+                            config.role_name, delay
+                        );
+                        print_fn(&retry_msg);
+
+                        warn!(
+                            "Recoverable error ({:?}) in {} (attempt {}/{}). Retrying in {:?}...",
+                            recoverable_type, config.role_name, retry_count, config.max_retries, delay
+                        );
+
+                        tokio::time::sleep(delay).await;
+                    }
+                    ErrorType::NonRecoverable => {
+                        let msg = format!(
+                            "❌ Non-recoverable error in {}: {}",
+                            config.role_name, e
+                        );
+                        print_fn(&msg);
+                        return RetryResult::MaxRetriesReached(e.to_string());
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Execute a simple async operation with retry (for non-agent tasks)
+///
+/// This is a simpler retry wrapper for operations like LLM API calls
+/// that don't involve the full agent machinery.
+pub async fn retry_operation<F, Fut, T, P>(
+    operation_name: &str,
+    mut operation: F,
+    max_retries: u32,
+    is_autonomous: bool,
+    mut print_fn: P,
+) -> Result<T>
+where
+    F: FnMut() -> Fut,
+    Fut: std::future::Future<Output = Result<T>>,
+    P: FnMut(&str),
+{
+    let mut retry_count = 0;
+
+    loop {
+        match operation().await {
+            Ok(result) => {
+                if retry_count > 0 {
+                    info!(
+                        "Operation '{}' succeeded after {} retries",
+                        operation_name, retry_count
+                    );
+                }
+                return Ok(result);
+            }
+            Err(e) => {
+                let error_type = classify_error(&e);
+
+                match error_type {
+                    ErrorType::Recoverable(ref recoverable_type) => {
+                        retry_count += 1;
+
+                        if retry_count >= max_retries {
+                            let msg = format!(
+                                "❌ Operation '{}' failed after {} retries: {}",
+                                operation_name, retry_count, e
+                            );
+                            print_fn(&msg);
+                            return Err(e);
+                        }
+
+                        let delay = calculate_retry_delay(retry_count, is_autonomous);
+                        let msg = format!(
+                            "⚠️ {} error in '{}' (attempt {}/{}), retrying in {:?}...",
+                            format!("{:?}", recoverable_type),
+                            operation_name,
+                            retry_count,
+                            max_retries,
+                            delay
+                        );
+                        print_fn(&msg);
+
+                        tokio::time::sleep(delay).await;
+                    }
+                    ErrorType::NonRecoverable => {
+                        let msg = format!(
+                            "❌ Non-recoverable error in '{}': {}",
+                            operation_name, e
+                        );
+                        print_fn(&msg);
+                        return Err(e);
+                    }
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_retry_config_defaults() {
+        let config = RetryConfig::default();
+        assert_eq!(config.max_retries, 3);
+        assert!(!config.is_autonomous);
+        assert_eq!(config.role_name, "agent");
+    }
+
+    #[test]
+    fn test_retry_config_player() {
+        let config = RetryConfig::player();
+        assert_eq!(config.max_retries, 3);
+        assert!(config.is_autonomous);
+        assert_eq!(config.role_name, "player");
+    }
+
+    #[test]
+    fn test_retry_config_coach() {
+        let config = RetryConfig::coach();
+        assert_eq!(config.max_retries, 3);
+        assert!(config.is_autonomous);
+        assert_eq!(config.role_name, "coach");
+    }
+
+    #[test]
+    fn test_retry_config_with_max_retries() {
+        let config = RetryConfig::player().with_max_retries(5);
+        assert_eq!(config.max_retries, 5);
+    }
+
+    #[test]
+    fn test_retry_result_is_success() {
+        use crate::ContextWindow;
+        let ctx = ContextWindow::new(1000);
+        let result = RetryResult::Success(TaskResult::new("test".to_string(), ctx));
+        assert!(result.is_success());
+
+        let failed = RetryResult::MaxRetriesReached("error".to_string());
+        assert!(!failed.is_success());
+    }
+}
--- a/crates/g3-planner/src/planner.rs
+++ b/crates/g3-planner/src/planner.rs
@@ -574,6 +574,8 @@ pub async fn run_coach_player_loop(
    requirements_content: &str,
 ) -> Result<()> {
    use g3_core::project::Project;
+    use g3_core::retry::{execute_with_retry, RetryConfig, RetryResult};
+    use g3_core::feedback_extraction::{extract_coach_feedback, FeedbackExtractionConfig};
    use g3_core::Agent;
    
    let max_turns = planner_config.max_turns;
@@ -612,7 +614,7 @@ pub async fn run_coach_player_loop(
            planner_config.quiet,
        ).await?;
        
-        let player_prompt = if coach_feedback.is_empty() {
+        let player_prompt = if coach_feedback.is_empty() || turn == 1 {
            format!(
                "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step. Write the todo list to: {}\n\nCreate all necessary files and code.",
                requirements_content,
@@ -620,19 +622,42 @@ pub async fn run_coach_player_loop(
            )
        } else {
            format!(
-                "You are G3 in implementation mode. Address the following coach feedback:\n\n{}\n\nContext requirements:\n{}\n\nFix the issues mentioned above.",
+                "You are G3 in implementation mode. Address the following coach feedback:\n\n{}\n\nOriginal requirements:\n{}\n\nFix the issues mentioned above.",
                coach_feedback,
                requirements_content
            )
        };
        
-        let player_result = player_agent
-            .execute_task_with_timing(&player_prompt, None, false, false, false, true, None)
-            .await;
+        // Execute player task with retry logic
+        let player_retry_config = RetryConfig::planning("player");
+        let player_result = execute_with_retry(
+            &mut player_agent,
+            &player_prompt,
+            &player_retry_config,
+            false, // show_prompt
+            false, // show_code
+            None,  // discovery
+            |msg| print_msg(msg),
+        ).await;
        
        match player_result {
-            Ok(result) => print_msg(&format!("✅ Player completed: {} chars response", result.response.len())),
-            Err(e) => print_msg(&format!("⚠️  Player error: {}", e)),
+            RetryResult::Success(result) => {
+                print_msg(&format!("✅ Player completed: {} chars response", result.response.len()));
+            }
+            RetryResult::MaxRetriesReached(err) => {
+                print_msg(&format!("⚠️  Player failed after max retries: {}", err));
+                // Continue to coach phase anyway to get feedback
+            }
+            RetryResult::ContextLengthExceeded(err) => {
+                print_msg(&format!("⚠️  Player context length exceeded: {}", err));
+                // Continue to next turn
+                turn += 1;
+                continue;
+            }
+            RetryResult::Panic(e) => {
+                print_msg(&format!("💥 Player panic: {}", e));
+                return Err(e);
+            }
        }
        
        // Coach phase - review implementation
@@ -648,39 +673,60 @@ pub async fn run_coach_player_loop(
        ).await?;
        
        let coach_prompt = format!(
-            "You are G3 in coach mode. Review the implementation against these requirements:\n\n{}\n\nCheck:\n1. Are requirements implemented correctly?\n2. Does the code compile?\n3. What's missing?\n\nIf COMPLETE, respond with 'IMPLEMENTATION_APPROVED'.\nOtherwise, provide specific feedback for the player to fix.",
+            "You are G3 in coach mode. Review the implementation against these requirements:\n\n{}\n\nCheck:\n1. Are requirements implemented correctly?\n2. Does the code compile?\n3. What's missing?\n\nUse the final_output tool to provide your feedback.\nIf implementation is COMPLETE, include 'IMPLEMENTATION_APPROVED' in your feedback.\nOtherwise, provide specific feedback for the player to fix.",
            requirements_content
        );
        
-        let coach_result = coach_agent
-            .execute_task_with_timing(&coach_prompt, None, false, false, false, true, None)
-            .await;
+        // Execute coach task with retry logic
+        let coach_retry_config = RetryConfig::planning("coach");
+        let coach_result = execute_with_retry(
+            &mut coach_agent,
+            &coach_prompt,
+            &coach_retry_config,
+            false, // show_prompt
+            false, // show_code
+            None,  // discovery
+            |msg| print_msg(msg),
+        ).await;
        
        match coach_result {
-            Ok(result) => {
-                if result.response.contains("IMPLEMENTATION_APPROVED") || result.is_approved() {
+            RetryResult::Success(result) => {
+                // Extract feedback using the robust extraction module
+                let feedback_config = FeedbackExtractionConfig::default();
+                let extracted = extract_coach_feedback(&result, &coach_agent, &feedback_config);
+                
+                print_msg(&format!("📝 Coach feedback extracted from {:?}: {} chars", 
+                    extracted.source, extracted.content.len()));
+                
+                // Check for approval
+                if extracted.is_approved() || result.response.contains("IMPLEMENTATION_APPROVED") {
                    print_msg("✅ Coach approved implementation!");
                    return Ok(());
                }
-                coach_feedback = result.response;
+                
+                coach_feedback = extracted.content;
+                
                // Display first 25 lines of coach feedback
                let lines: Vec<&str> = coach_feedback.lines().collect();
-                let display_lines = if lines.len() > 25 {
-                    let mut truncated: Vec<&str> = lines[..25].to_vec();
-                    truncated.push("...");
-                    truncated
-                } else {
-                    lines
-                };
-                print_msg(&format!("📝 Coach feedback ({} chars):", coach_feedback.len()));
-                for line in display_lines {
+                for line in lines.iter().take(25) {
                    print_msg(&format!("  {}", line));
                }
+                if lines.len() > 25 {
+                    print_msg("  ...");
+                }
            }
-            Err(e) => {
-                print_msg(&format!("⚠️  Coach error: {}", e));
+            RetryResult::MaxRetriesReached(err) => {
+                print_msg(&format!("⚠️  Coach failed after max retries: {}", err));
                coach_feedback = "Please review and fix any issues.".to_string();
            }
+            RetryResult::ContextLengthExceeded(err) => {
+                print_msg(&format!("⚠️  Coach context length exceeded: {}", err));
+                coach_feedback = "Context window full. Please continue with current progress.".to_string();
+            }
+            RetryResult::Panic(e) => {
+                print_msg(&format!("💥 Coach panic: {}", e));
+                return Err(e);
+            }
        }
        
        turn += 1;
--- a/crates/g3-planner/tests/retry_feedback_test.rs
+++ b/crates/g3-planner/tests/retry_feedback_test.rs
@@ -0,0 +1,208 @@
+//! Integration tests for retry logic and feedback extraction in planning mode
+//!
+//! These tests verify that the retry infrastructure and coach feedback extraction
+//! work correctly together, without requiring actual API calls.
+
+use g3_core::feedback_extraction::{ExtractedFeedback, FeedbackExtractionConfig, FeedbackSource};
+use g3_core::retry::RetryConfig;
+
+#[test]
+fn test_retry_config_for_planning_player() {
+    let config = RetryConfig::planning("player");
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}
+
+#[test]
+fn test_retry_config_for_planning_coach() {
+    let config = RetryConfig::planning("coach");
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "coach");
+}
+
+#[test]
+fn test_retry_config_with_custom_max_retries() {
+    let config = RetryConfig::planning("player").with_max_retries(6);
+    assert_eq!(config.max_retries, 6);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}
+
+#[test]
+fn test_retry_config_default() {
+    let config = RetryConfig::default();
+    assert_eq!(config.max_retries, 3);
+    assert!(!config.is_autonomous);
+    assert_eq!(config.role_name, "agent");
+}
+
+#[test]
+fn test_retry_config_player_preset() {
+    let config = RetryConfig::player();
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}
+
+#[test]
+fn test_retry_config_coach_preset() {
+    let config = RetryConfig::coach();
+    assert_eq!(config.max_retries, 3);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "coach");
+}
+
+#[test]
+fn test_extracted_feedback_approval_detection() {
+    let approved = ExtractedFeedback::new(
+        "Great work! IMPLEMENTATION_APPROVED".to_string(),
+        FeedbackSource::NativeToolCall,
+    );
+    assert!(approved.is_approved());
+    assert!(!approved.is_fallback());
+
+    let not_approved = ExtractedFeedback::new(
+        "Please fix the issues".to_string(),
+        FeedbackSource::NativeToolCall,
+    );
+    assert!(!not_approved.is_approved());
+    assert!(!not_approved.is_fallback());
+
+    let fallback = ExtractedFeedback::new(
+        "Default feedback".to_string(),
+        FeedbackSource::DefaultFallback,
+    );
+    assert!(!fallback.is_approved());
+    assert!(fallback.is_fallback());
+}
+
+#[test]
+fn test_feedback_extraction_config_default() {
+    let config = FeedbackExtractionConfig::default();
+    assert!(!config.verbose);
+    assert!(config.logs_dir.is_none());
+    assert!(config.default_feedback.contains("review"));
+}
+
+#[test]
+fn test_feedback_extraction_config_custom() {
+    let config = FeedbackExtractionConfig {
+        verbose: true,
+        logs_dir: Some(std::path::PathBuf::from("/tmp/test_logs")),
+        default_feedback: "Custom fallback message for testing".to_string(),
+    };
+    assert!(config.verbose);
+    assert_eq!(
+        config.logs_dir,
+        Some(std::path::PathBuf::from("/tmp/test_logs"))
+    );
+    assert!(config.default_feedback.contains("Custom fallback"));
+}
+
+#[test]
+fn test_feedback_source_variants() {
+    // Verify all feedback sources are distinguishable
+    let sources = vec![
+        FeedbackSource::SessionLog,
+        FeedbackSource::NativeToolCall,
+        FeedbackSource::ConversationHistory,
+        FeedbackSource::TaskResultResponse,
+        FeedbackSource::DefaultFallback,
+    ];
+
+    for (i, source1) in sources.iter().enumerate() {
+        for (j, source2) in sources.iter().enumerate() {
+            if i == j {
+                assert_eq!(source1, source2);
+            } else {
+                assert_ne!(source1, source2);
+            }
+        }
+    }
+}
+
+#[test]
+fn test_retry_configs_for_planning_mode_are_autonomous() {
+    // Both player and coach should be marked as autonomous for planning mode
+    let player = RetryConfig::planning("player");
+    let coach = RetryConfig::planning("coach");
+
+    assert!(
+        player.is_autonomous,
+        "Player should be autonomous in planning mode"
+    );
+    assert!(
+        coach.is_autonomous,
+        "Coach should be autonomous in planning mode"
+    );
+}
+
+#[test]
+fn test_extracted_feedback_new() {
+    let feedback = ExtractedFeedback::new(
+        "Test content".to_string(),
+        FeedbackSource::SessionLog,
+    );
+    assert_eq!(feedback.content, "Test content");
+    assert_eq!(feedback.source, FeedbackSource::SessionLog);
+}
+
+#[test]
+fn test_extracted_feedback_approval_variations() {
+    // Test various approval message formats
+    let cases = vec![
+        ("IMPLEMENTATION_APPROVED", true),
+        ("IMPLEMENTATION_APPROVED - great work!", true),
+        ("All done. IMPLEMENTATION_APPROVED", true),
+        ("implementation_approved", false), // Case sensitive
+        ("APPROVED", false),                // Must be exact phrase
+        ("Please fix these issues", false),
+        ("", false),
+    ];
+
+    for (content, expected_approved) in cases {
+        let feedback = ExtractedFeedback::new(content.to_string(), FeedbackSource::SessionLog);
+        assert_eq!(
+            feedback.is_approved(),
+            expected_approved,
+            "Failed for content: '{}'",
+            content
+        );
+    }
+}
+
+#[test]
+fn test_feedback_source_fallback_detection() {
+    // Only DefaultFallback should be detected as fallback
+    let sources_and_expected = vec![
+        (FeedbackSource::SessionLog, false),
+        (FeedbackSource::NativeToolCall, false),
+        (FeedbackSource::ConversationHistory, false),
+        (FeedbackSource::TaskResultResponse, false),
+        (FeedbackSource::DefaultFallback, true),
+    ];
+
+    for (source, expected_is_fallback) in sources_and_expected {
+        let feedback = ExtractedFeedback::new("Test".to_string(), source.clone());
+        assert_eq!(
+            feedback.is_fallback(),
+            expected_is_fallback,
+            "Failed for source: {:?}",
+            source
+        );
+    }
+}
+
+#[test]
+fn test_retry_config_chaining() {
+    // Test that with_max_retries can be chained
+    let config = RetryConfig::planning("player")
+        .with_max_retries(10)
+        .with_max_retries(5);
+    
+    assert_eq!(config.max_retries, 5);
+    assert!(config.is_autonomous);
+    assert_eq!(config.role_name, "player");
+}