diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 4f2e2cd..1b408fa 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -9,6 +9,7 @@ use std::path::Path; use tokio_util::sync::CancellationToken; use tracing::{error, info}; +use g3_core::error_handling::{classify_error, ErrorType, RecoverableError}; mod retro_tui; mod tui; mod ui_writer_impl; @@ -173,7 +174,7 @@ pub async fn run() -> Result<()> { let result = agent .execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true) .await?; - output.print_markdown(&result); + output.print_markdown(&result.response); } else { // Interactive mode (default) if !cli.retro { @@ -382,25 +383,55 @@ async fn run_interactive_retro(config: Config, show_prompt: bool, show_code: boo // Execute the task tui.output(&format!("> {}", input)); tui.status("PROCESSING"); - - match agent - .execute_task_with_timing( - &input, - None, - false, - show_prompt, - show_code, - true, - ) - .await - { - Ok(response) => { - tui.output(&response); - tui.status("READY"); - } - Err(e) => { - tui.error(&format!("Task execution failed: {}", e)); - tui.status("ERROR"); + + const MAX_TIMEOUT_RETRIES: u32 = 3; + let mut attempt = 0; + + loop { + attempt += 1; + + match agent + .execute_task_with_timing( + &input, + None, + false, + show_prompt, + show_code, + true, + ) + .await + { + Ok(result) => { + if attempt > 1 { + tui.output(&format!("SYSTEM: REQUEST SUCCEEDED AFTER {} ATTEMPTS", attempt)); + } + tui.output(&result.response); + tui.status("READY"); + break; + } + Err(e) => { + // Check if this is a timeout error that we should retry + let error_type = classify_error(&e); + + if matches!(error_type, ErrorType::Recoverable(RecoverableError::Timeout)) && attempt < MAX_TIMEOUT_RETRIES { + // Calculate retry delay with exponential backoff + let delay_ms = 1000 * (2_u64.pow(attempt - 1)); + let delay = std::time::Duration::from_millis(delay_ms); + + tui.output(&format!("SYSTEM: TIMEOUT ERROR (ATTEMPT {}/{}). RETRYING IN {:?}...", + attempt, MAX_TIMEOUT_RETRIES, delay)); + tui.status("RETRYING"); + + // Wait before retrying + tokio::time::sleep(delay).await; + continue; + } + + // For non-timeout errors or after max retries + tui.error(&format!("Task execution failed: {}", e)); + tui.status("ERROR"); + break; + } } } } @@ -597,6 +628,8 @@ async fn execute_task( show_code: bool, output: &SimpleOutput, ) { + const MAX_TIMEOUT_RETRIES: u32 = 3; + let mut attempt = 0; // Show thinking indicator immediately output.print("šŸ¤” Thinking..."); // Note: flush is handled internally by println @@ -605,56 +638,94 @@ async fn execute_task( let cancellation_token = CancellationToken::new(); let cancel_token_clone = cancellation_token.clone(); - // Execute task with cancellation support - let execution_result = tokio::select! { - result = agent.execute_task_with_timing_cancellable( - input, None, false, show_prompt, show_code, true, cancellation_token - ) => { - result - } - _ = tokio::signal::ctrl_c() => { - cancel_token_clone.cancel(); - output.print("\nāš ļø Operation cancelled by user (Ctrl+C)"); - return; - } - }; + loop { + attempt += 1; + + // Execute task with cancellation support + let execution_result = tokio::select! { + result = agent.execute_task_with_timing_cancellable( + input, None, false, show_prompt, show_code, true, cancellation_token.clone() + ) => { + result + } + _ = tokio::signal::ctrl_c() => { + cancel_token_clone.cancel(); + output.print("\nāš ļø Operation cancelled by user (Ctrl+C)"); + return; + } + }; - match execution_result { - Ok(response) => output.print_markdown(&response), - Err(e) => { - if e.to_string().contains("cancelled") { - output.print("āš ļø Operation cancelled by user"); - } else { - // Enhanced error logging with detailed information - error!("=== TASK EXECUTION ERROR ==="); - error!("Error: {}", e); - - // Log error chain - let mut source = e.source(); - let mut depth = 1; - while let Some(err) = source { - error!(" Caused by [{}]: {}", depth, err); - source = err.source(); - depth += 1; + match execution_result { + Ok(result) => { + if attempt > 1 { + output.print(&format!("āœ… Request succeeded after {} attempts", attempt)); } - - // Log additional context - error!("Task input: {}", input); - error!("Error type: {}", std::any::type_name_of_val(&e)); - - // Display user-friendly error message - output.print(&format!("āŒ Error: {}", e)); - - // If it's a stream error, provide helpful guidance - if e.to_string().contains("No response received") { - output.print("šŸ’” This may be a temporary issue. Please try again or check the logs for more details."); - output.print(" Log files are saved in the 'logs/' directory."); + output.print_markdown(&result.response); + return; + } + Err(e) => { + if e.to_string().contains("cancelled") { + output.print("āš ļø Operation cancelled by user"); + return; } + + // Check if this is a timeout error that we should retry + let error_type = classify_error(&e); + + if matches!(error_type, ErrorType::Recoverable(RecoverableError::Timeout)) && attempt < MAX_TIMEOUT_RETRIES { + // Calculate retry delay with exponential backoff + let delay_ms = 1000 * (2_u64.pow(attempt - 1)); + let delay = std::time::Duration::from_millis(delay_ms); + + output.print(&format!( + "ā±ļø Timeout error detected (attempt {}/{}). Retrying in {:?}...", + attempt, MAX_TIMEOUT_RETRIES, delay + )); + + // Wait before retrying + tokio::time::sleep(delay).await; + continue; + } + + // For non-timeout errors or after max retries, handle as before + handle_execution_error(&e, input, output, attempt); + return; } } } } +fn handle_execution_error(e: &anyhow::Error, input: &str, output: &SimpleOutput, attempt: u32) { + // Enhanced error logging with detailed information + error!("=== TASK EXECUTION ERROR ==="); + error!("Error: {}", e); + if attempt > 1 { + error!("Failed after {} attempts", attempt); + } + + // Log error chain + let mut source = e.source(); + let mut depth = 1; + while let Some(err) = source { + error!(" Caused by [{}]: {}", depth, err); + source = err.source(); + depth += 1; + } + + // Log additional context + error!("Task input: {}", input); + error!("Error type: {}", std::any::type_name_of_val(&e)); + + // Display user-friendly error message + output.print(&format!("āŒ Error: {}", e)); + + // If it's a stream error, provide helpful guidance + if e.to_string().contains("No response received") || e.to_string().contains("timed out") { + output.print("šŸ’” This may be a temporary issue. Please try again or check the logs for more details."); + output.print(" Log files are saved in the 'logs/' directory."); + } +} + fn display_context_progress(agent: &Agent, output: &SimpleOutput) { let context = agent.get_context_window(); output.print_context( @@ -728,50 +799,63 @@ async fn run_autonomous( output.print("šŸ“‹ Requirements loaded from requirements.md"); output.print("šŸ”„ Starting coach-player feedback loop..."); + // Check if implementation files already exist + let skip_first_player = project.has_implementation_files(); + if skip_first_player { + output.print("šŸ“‚ Detected existing implementation files in workspace"); + output.print("ā­ļø Skipping first player turn - proceeding directly to coach review"); + } else { + output.print("šŸ“‚ No existing implementation files detected"); + output.print("šŸŽÆ Starting with player implementation"); + } + let mut turn = 1; let mut coach_feedback = String::new(); let mut implementation_approved = false; loop { - output.print(&format!( - "\n=== TURN {}/{} - PLAYER MODE ===", - turn, max_turns - )); + // Skip player turn if it's the first turn and implementation files exist + if !(turn == 1 && skip_first_player) { + output.print(&format!( + "\n=== TURN {}/{} - PLAYER MODE ===", + turn, max_turns + )); - // Player mode: implement requirements (with coach feedback if available) - let player_prompt = if coach_feedback.is_empty() { - format!( - "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.", - requirements - ) - } else { - format!( - "You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.", - coach_feedback, requirements - ) - }; + // Player mode: implement requirements (with coach feedback if available) + let player_prompt = if coach_feedback.is_empty() { + format!( + "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.", + requirements + ) + } else { + format!( + "You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.", + coach_feedback, requirements + ) + }; - output.print("šŸŽÆ Starting player implementation..."); + output.print("šŸŽÆ Starting player implementation..."); - // Execute player task and handle the result properly - match agent - .execute_task_with_timing(&player_prompt, None, false, show_prompt, show_code, true) - .await - { - Ok(player_result) => { - // Display player's implementation result - output.print("šŸ“ Player implementation completed:"); - output.print_markdown(&player_result); - } - Err(e) => { - output.print(&format!("āŒ Player implementation failed: {}", e)); - // Continue to coach review even if player had an error + // Execute player task and handle the result properly + match agent + .execute_task_with_timing(&player_prompt, None, false, show_prompt, show_code, true) + .await + { + Ok(result) => { + // Display player's implementation result + output.print("šŸ“ Player implementation completed:"); + output.print_markdown(&result.response); + } + Err(e) => { + output.print(&format!("āŒ Player implementation failed: {}", e)); + // Continue to coach review even if player had an error + } } + + // Give some time for file operations to complete + tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; } - // Give some time for file operations to complete - tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; - // Create a new agent instance for coach mode to ensure fresh context let config = g3_config::Config::load(None)?; let ui_writer = ConsoleUiWriter::new(); @@ -822,49 +906,14 @@ Remember: Be thorough in your review but concise in your feedback. APPROVE if th output.print("šŸŽ“ Coach review completed"); - // Extract the actual feedback text from the coach result - // IMPORTANT: We only want the final_output summary, not the entire conversation - // The coach_result contains the full conversation including file reads, analysis, etc. - // We need to extract ONLY the final_output content - - let coach_feedback_text = { - // Look for the final_output content in the coach's response - // In autonomous mode, the final_output is returned without the "=> " prefix - // The coach result should end with the summary content from final_output - - // First, remove any timing information at the end - let content_without_timing = if let Some(timing_pos) = coach_result.rfind("\nā±ļø") { - &coach_result[..timing_pos] - } else { - &coach_result - }; - - // The final_output content is typically the last substantial text in the response - // after all tool executions. Look for it after the last tool execution marker - // or take the last paragraph if no clear markers - - // Split by double newlines to find the last substantial block - let blocks: Vec<&str> = content_without_timing.split("\n\n").collect(); - - // Find the last non-empty block that isn't just whitespace - let final_block = blocks.iter() - .rev() - .find(|block| !block.trim().is_empty()) - .map(|block| block.trim().to_string()) - .unwrap_or_else(|| { - // Fallback: if we can't find a clear block, take the whole thing - // but this shouldn't happen if the coach properly calls final_output - content_without_timing.trim().to_string() - }); - - final_block - }; + // Extract the coach feedback using the semantic extraction from TaskResult + let coach_feedback_text = coach_result.extract_last_block(); // Log the size of the feedback for debugging info!( "Coach feedback extracted: {} characters (from {} total)", coach_feedback_text.len(), - coach_result.len() + coach_result.response.len() ); // Check if we got empty feedback (this can happen if the coach doesn't call final_output) @@ -878,7 +927,7 @@ Remember: Be thorough in your review but concise in your feedback. APPROVE if th output.print(&format!("Coach feedback:\n{}", coach_feedback_text)); // Check if coach approved the implementation - if coach_feedback_text.contains("IMPLEMENTATION_APPROVED") { + if coach_result.is_approved() { output.print("\n=== SESSION COMPLETED - IMPLEMENTATION APPROVED ==="); output.print("āœ… Coach approved the implementation!"); implementation_approved = true; diff --git a/crates/g3-core/src/error_handling.rs b/crates/g3-core/src/error_handling.rs index 9911f42..fe810d3 100644 --- a/crates/g3-core/src/error_handling.rs +++ b/crates/g3-core/src/error_handling.rs @@ -206,7 +206,12 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType { return ErrorType::Recoverable(RecoverableError::ModelBusy); } - if error_str.contains("timeout") || error_str.contains("timed out") { + // Enhanced timeout detection - check for various timeout patterns + if error_str.contains("timeout") || + error_str.contains("timed out") || + error_str.contains("operation timed out") || + error_str.contains("request or response body error") || // Common timeout pattern + error_str.contains("stream error") && error_str.contains("timed out") { return ErrorType::Recoverable(RecoverableError::Timeout); } diff --git a/crates/g3-core/src/error_handling_test.rs b/crates/g3-core/src/error_handling_test.rs index b0b55d5..1fde9e9 100644 --- a/crates/g3-core/src/error_handling_test.rs +++ b/crates/g3-core/src/error_handling_test.rs @@ -2,8 +2,7 @@ #[cfg(test)] mod tests { - use super::super::error_handling::*; - use anyhow::anyhow; + use crate::error_handling::*; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::Arc; @@ -28,7 +27,7 @@ mod tests { let count = counter.fetch_add(1, Ordering::SeqCst); if count < 2 { // Fail with recoverable error on first two attempts - Err(anyhow!("Rate limit exceeded")) + Err(anyhow::anyhow!("Rate limit exceeded")) } else { // Succeed on third attempt Ok("Success") @@ -65,7 +64,7 @@ mod tests { async move { counter.fetch_add(1, Ordering::SeqCst); // Always fail with non-recoverable error - Err(anyhow!("Invalid API key")) + Err(anyhow::anyhow!("Invalid API key")) } }, &context, @@ -97,7 +96,7 @@ mod tests { async move { counter.fetch_add(1, Ordering::SeqCst); // Always fail with recoverable error - Err(anyhow!("Network connection failed")) + Err(anyhow::anyhow!("Network connection failed")) } }, &context, diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 8979d01..c1c745f 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1,6 +1,11 @@ pub mod error_handling; pub mod project; pub mod ui_writer; +pub mod task_result; +pub use task_result::TaskResult; + +#[cfg(test)] +mod task_result_comprehensive_tests; use crate::ui_writer::UiWriter; #[cfg(test)] @@ -565,7 +570,7 @@ impl Agent { description: &str, language: Option<&str>, _auto_execute: bool, - ) -> Result { + ) -> Result { self.execute_task_with_options(description, language, false, false, false) .await } @@ -577,7 +582,7 @@ impl Agent { _auto_execute: bool, show_prompt: bool, show_code: bool, - ) -> Result { + ) -> Result { self.execute_task_with_timing( description, language, @@ -597,7 +602,7 @@ impl Agent { show_prompt: bool, show_code: bool, show_timing: bool, - ) -> Result { + ) -> Result { // Create a cancellation token that never cancels for backward compatibility let cancellation_token = CancellationToken::new(); self.execute_task_with_timing_cancellable( @@ -621,7 +626,7 @@ impl Agent { show_code: bool, show_timing: bool, cancellation_token: CancellationToken, - ) -> Result { + ) -> Result { // Execute the task directly without splitting self.execute_single_task( description, @@ -640,7 +645,7 @@ impl Agent { _show_code: bool, show_timing: bool, cancellation_token: CancellationToken, - ) -> Result { + ) -> Result { // Generate session ID based on the initial prompt if this is a new session if self.session_id.is_none() { self.session_id = Some(self.generate_session_id(description)); @@ -778,7 +783,7 @@ The tool will execute immediately and you'll receive the result (success or erro // Time the LLM call with cancellation support and streaming let llm_start = Instant::now(); let result = tokio::select! { - result = self.stream_completion(request) => result, + result = self.stream_completion(request, show_timing) => result, _ = cancellation_token.cancelled() => { // Save context window on cancellation self.save_context_window("cancelled"); @@ -786,8 +791,8 @@ The tool will execute immediately and you'll receive the result (success or erro } }; - let (response_content, think_time) = match result { - Ok(content) => content, + let task_result = match result { + Ok(result) => result, Err(e) => { // Save context window on error self.save_context_window("error"); @@ -795,7 +800,8 @@ The tool will execute immediately and you'll receive the result (success or erro } }; - let llm_duration = llm_start.elapsed(); + let response_content = task_result.response.clone(); + let _llm_duration = llm_start.elapsed(); // Create a mock usage for now (we'll need to track this during streaming) let mock_usage = g3_providers::Usage { @@ -822,18 +828,8 @@ The tool will execute immediately and you'll receive the result (success or erro // Save context window at the end of successful interaction self.save_context_window("completed"); - // With streaming tool execution, we don't need separate code execution - // The tools are already executed during streaming - if show_timing { - let timing_summary = format!( - "\nā±ļø {} | šŸ’­ {}", - Self::format_duration(llm_duration), - Self::format_duration(think_time) - ); - Ok(format!("{}\n{}", response_content, timing_summary)) - } else { - Ok(response_content) - } + // Return the task result which already includes timing if needed + Ok(task_result) } /// Generate a session ID based on the initial prompt @@ -919,8 +915,9 @@ The tool will execute immediately and you'll receive the result (success or erro async fn stream_completion( &mut self, request: CompletionRequest, - ) -> Result<(String, Duration)> { - self.stream_completion_with_tools(request).await + show_timing: bool, + ) -> Result { + self.stream_completion_with_tools(request, show_timing).await } /// Create tool definitions for native tool calling providers @@ -1076,7 +1073,8 @@ The tool will execute immediately and you'll receive the result (success or erro async fn stream_completion_with_tools( &mut self, mut request: CompletionRequest, - ) -> Result<(String, Duration)> { + show_timing: bool, + ) -> Result { use crate::error_handling::ErrorContext; use tokio_stream::StreamExt; @@ -1473,9 +1471,17 @@ The tool will execute immediately and you'll receive the result (success or erro } } self.ui_writer.println(""); - let ttft = + let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); - return Ok((full_response, ttft)); + + // Add timing if needed + let final_response = if show_timing { + format!("{}\n\nā±ļø {} | šŸ’­ {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft)) + } else { + full_response + }; + + return Ok(TaskResult::new(final_response, self.context_window.clone())); } // Closure marker with timing @@ -1680,9 +1686,17 @@ The tool will execute immediately and you'll receive the result (success or erro } self.ui_writer.println(""); - let ttft = + let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); - return Ok((full_response, ttft)); + + // Add timing if needed + let final_response = if show_timing { + format!("{}\n\nā±ļø {} | šŸ’­ {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft)) + } else { + full_response + }; + + return Ok(TaskResult::new(final_response, self.context_window.clone())); } break; // Tool was executed, break to continue outer loop } @@ -1747,16 +1761,32 @@ The tool will execute immediately and you'll receive the result (success or erro self.ui_writer.println(""); } - let ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); - return Ok((full_response, ttft)); + let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); + + // Add timing if needed + let final_response = if show_timing { + format!("{}\n\nā±ļø {} | šŸ’­ {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft)) + } else { + full_response + }; + + return Ok(TaskResult::new(final_response, self.context_window.clone())); } // Continue the loop to start a new stream with updated context } // If we exit the loop due to max iterations - let ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); - Ok((full_response, ttft)) + let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); + + // Add timing if needed + let final_response = if show_timing { + format!("{}\n\nā±ļø {} | šŸ’­ {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft)) + } else { + full_response + }; + + Ok(TaskResult::new(final_response, self.context_window.clone())) } async fn execute_tool(&self, tool_call: &ToolCall) -> Result { diff --git a/crates/g3-core/src/project.rs b/crates/g3-core/src/project.rs index fe487bf..242b8ce 100644 --- a/crates/g3-core/src/project.rs +++ b/crates/g3-core/src/project.rs @@ -81,6 +81,48 @@ impl Project { self.requirements_path.is_some() } + /// Check if implementation files exist in the workspace + pub fn has_implementation_files(&self) -> bool { + self.check_dir_for_implementation_files(&self.workspace_dir) + } + + /// Recursively check a directory for implementation files + fn check_dir_for_implementation_files(&self, dir: &Path) -> bool { + // Common source file extensions + let extensions = vec![ + "swift", "rs", "py", "js", "ts", "java", "cpp", "c", + "go", "rb", "php", "cs", "kt", "scala", "m", "h" + ]; + + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + + if path.is_file() { + // Check if it's a source file + if let Some(ext) = path.extension() { + if let Some(ext_str) = ext.to_str() { + if extensions.contains(&ext_str) { + return true; + } + } + } + } else if path.is_dir() { + // Skip hidden directories and common non-source directories + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + if !name.starts_with('.') && name != "logs" && name != "target" && name != "node_modules" { + // Recursively check subdirectories + if self.check_dir_for_implementation_files(&path) { + return true; + } + } + } + } + } + } + false + } + /// Read the requirements file content pub fn read_requirements(&self) -> Result> { if let Some(ref path) = self.requirements_path { diff --git a/crates/g3-core/src/task_result.rs b/crates/g3-core/src/task_result.rs new file mode 100644 index 0000000..ac8dfb9 --- /dev/null +++ b/crates/g3-core/src/task_result.rs @@ -0,0 +1,97 @@ +use crate::ContextWindow; + +/// Result of a task execution containing both the response and the context window +#[derive(Debug, Clone)] +pub struct TaskResult { + /// The actual response content from the task execution + pub response: String, + /// The complete context window at the time of completion + pub context_window: ContextWindow, +} + +impl TaskResult { + pub fn new(response: String, context_window: ContextWindow) -> Self { + Self { + response, + context_window, + } + } + + /// Extract the last block from the response (for coach feedback in autonomous mode) + /// This looks for the final_output content which is the last substantial block + pub fn extract_last_block(&self) -> String { + // Remove any timing information at the end + let content_without_timing = if let Some(timing_pos) = self.response.rfind("\nā±ļø") { + &self.response[..timing_pos] + } else { + &self.response + }; + + // Split by double newlines to find the last substantial block + let blocks: Vec<&str> = content_without_timing.split("\n\n").collect(); + + // Find the last non-empty block that isn't just whitespace + blocks.iter() + .rev() + .find(|block| !block.trim().is_empty()) + .map(|block| block.trim().to_string()) + .unwrap_or_else(|| { + // Fallback: if we can't find a clear block, take the whole thing + content_without_timing.trim().to_string() + }) + } + + /// Check if the response contains an approval (for autonomous mode) + pub fn is_approved(&self) -> bool { + self.extract_last_block().contains("IMPLEMENTATION_APPROVED") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_last_block() { + // Test case 1: Response with timing info + let context_window = ContextWindow::new(1000); + let response_with_timing = "Some initial content\n\nFinal block content\n\nā±ļø 2.3s | šŸ’­ 1.2s".to_string(); + let result = TaskResult::new(response_with_timing, context_window.clone()); + assert_eq!(result.extract_last_block(), "Final block content"); + + // Test case 2: Response without timing + let response_no_timing = "Some initial content\n\nFinal block content".to_string(); + let result = TaskResult::new(response_no_timing, context_window.clone()); + assert_eq!(result.extract_last_block(), "Final block content"); + + // Test case 3: Response with IMPLEMENTATION_APPROVED + let response_approved = "Some content\n\nIMPLEMENTATION_APPROVED".to_string(); + let result = TaskResult::new(response_approved, context_window.clone()); + assert!(result.is_approved()); + + // Test case 4: Response without approval + let response_not_approved = "Some content\n\nNeeds more work".to_string(); + let result = TaskResult::new(response_not_approved, context_window); + assert!(!result.is_approved()); + } + + #[test] + fn test_extract_last_block_edge_cases() { + let context_window = ContextWindow::new(1000); + + // Test empty response + let empty_response = "".to_string(); + let result = TaskResult::new(empty_response, context_window.clone()); + assert_eq!(result.extract_last_block(), ""); + + // Test single block + let single_block = "Just one block".to_string(); + let result = TaskResult::new(single_block, context_window.clone()); + assert_eq!(result.extract_last_block(), "Just one block"); + + // Test multiple empty blocks + let multiple_empty = "\n\n\n\nSome content\n\n\n\n".to_string(); + let result = TaskResult::new(multiple_empty, context_window); + assert_eq!(result.extract_last_block(), "Some content"); + } +} diff --git a/crates/g3-core/src/task_result_comprehensive_tests.rs b/crates/g3-core/src/task_result_comprehensive_tests.rs new file mode 100644 index 0000000..f15b1f3 --- /dev/null +++ b/crates/g3-core/src/task_result_comprehensive_tests.rs @@ -0,0 +1,276 @@ +use crate::{ContextWindow, TaskResult}; +use g3_providers::{Message, MessageRole}; +use std::sync::Arc; + +#[test] +fn test_task_result_basic_functionality() { + // Create a context window with some messages + let mut context = ContextWindow::new(10000); + context.add_message(Message { + role: MessageRole::User, + content: "Test message 1".to_string(), + }); + context.add_message(Message { + role: MessageRole::Assistant, + content: "Response 1".to_string(), + }); + + // Create a TaskResult + let response = "This is the response\n\nFinal output block".to_string(); + let result = TaskResult::new(response.clone(), context.clone()); + + // Test basic properties + assert_eq!(result.response, response); + assert_eq!(result.context_window.conversation_history.len(), 2); + assert_eq!(result.context_window.total_tokens, 10000); +} + +#[test] +fn test_extract_last_block_various_formats() { + let context = ContextWindow::new(1000); + + // Test 1: Standard format with multiple blocks + let response1 = "First block\n\nSecond block\n\nThird block".to_string(); + let result1 = TaskResult::new(response1, context.clone()); + assert_eq!(result1.extract_last_block(), "Third block"); + + // Test 2: With timing information + let response2 = "Content\n\nFinal block\n\nā±ļø 2.3s | šŸ’­ 1.2s".to_string(); + let result2 = TaskResult::new(response2, context.clone()); + assert_eq!(result2.extract_last_block(), "Final block"); + + // Test 3: Single line response + let response3 = "Single line response".to_string(); + let result3 = TaskResult::new(response3, context.clone()); + assert_eq!(result3.extract_last_block(), "Single line response"); + + // Test 4: Empty response + let response4 = "".to_string(); + let result4 = TaskResult::new(response4, context.clone()); + assert_eq!(result4.extract_last_block(), ""); + + // Test 5: Only whitespace + let response5 = "\n\n\n \n\n".to_string(); + let result5 = TaskResult::new(response5, context.clone()); + assert_eq!(result5.extract_last_block(), ""); + + // Test 6: Multiple blocks with empty ones + let response6 = "First\n\n\n\n\n\nLast block here".to_string(); + let result6 = TaskResult::new(response6, context.clone()); + assert_eq!(result6.extract_last_block(), "Last block here"); +} + +#[test] +fn test_is_approved_detection() { + let context = ContextWindow::new(1000); + + // Test approved cases + let approved_responses = vec![ + "Analysis complete\n\nIMPLEMENTATION_APPROVED", + "Some content\n\nThe implementation is good. IMPLEMENTATION_APPROVED", + "IMPLEMENTATION_APPROVED", + "Review done\n\nāœ… IMPLEMENTATION_APPROVED - All tests pass", + ]; + + for response in approved_responses { + let result = TaskResult::new(response.to_string(), context.clone()); + assert!(result.is_approved(), "Failed to detect approval in: {}", response); + } + + // Test not approved cases + let not_approved_responses = vec![ + "Needs more work", + "Implementation needs fixes", + "IMPLEMENTATION_REJECTED", + "Almost there but not APPROVED", + "", + ]; + + for response in not_approved_responses { + let result = TaskResult::new(response.to_string(), context.clone()); + assert!(!result.is_approved(), "Incorrectly detected approval in: {}", response); + } +} + +#[test] +fn test_context_window_preservation() { + // Create a context window with specific state + let mut context = ContextWindow::new(5000); + context.used_tokens = 1234; + + // Add some messages + for i in 0..5 { + context.add_message(Message { + role: if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant }, + content: format!("Message {}", i), + }); + } + + // Create TaskResult + let result = TaskResult::new("Response".to_string(), context.clone()); + + // Verify context is preserved + assert_eq!(result.context_window.total_tokens, 5000); + assert!(result.context_window.used_tokens > 1234); // Should have increased + assert_eq!(result.context_window.conversation_history.len(), 5); + + // Verify messages are preserved correctly + for i in 0..5 { + let is_user = matches!(result.context_window.conversation_history[i].role, MessageRole::User); + let expected_is_user = i % 2 == 0; + assert_eq!(is_user, expected_is_user, "Message {} has wrong role", i); + assert_eq!(result.context_window.conversation_history[i].content, format!("Message {}", i)); + } +} + +#[test] +fn test_coach_feedback_extraction_scenarios() { + let context = ContextWindow::new(1000); + + // Scenario 1: Coach feedback with file operations and analysis + let coach_response = r#"Reading file: src/main.rs +šŸ“„ File content (23 lines): +fn main() { + println!("Hello"); +} + +Analyzing implementation... + +The implementation needs the following fixes: +1. Add error handling +2. Implement missing functions +3. Add tests"#; + + let result = TaskResult::new(coach_response.to_string(), context.clone()); + let feedback = result.extract_last_block(); + assert!(feedback.contains("Add error handling")); + assert!(feedback.contains("Implement missing functions")); + assert!(feedback.contains("Add tests")); + + // Scenario 2: Coach approval + let approval_response = r#"Checking compilation... +āœ… Build successful + +Running tests... +āœ… All tests pass + +IMPLEMENTATION_APPROVED"#; + + let result = TaskResult::new(approval_response.to_string(), context.clone()); + assert!(result.is_approved()); + assert_eq!(result.extract_last_block(), "IMPLEMENTATION_APPROVED"); + + // Scenario 3: Complex feedback with timing + let complex_response = r#"Tool execution log... + +Analysis complete. + +The following issues were found: +- Memory leak in process_data() +- Missing input validation + +ā±ļø 5.2s | šŸ’­ 2.1s"#; + + let result = TaskResult::new(complex_response.to_string(), context.clone()); + let feedback = result.extract_last_block(); + assert!(feedback.contains("Memory leak")); + assert!(feedback.contains("Missing input validation")); + assert!(!feedback.contains("ā±ļø")); // Timing should be stripped +} + +#[test] +fn test_edge_cases_and_special_characters() { + let context = ContextWindow::new(1000); + + // Test with special characters and emojis + let response_with_emojis = "First part šŸš€\n\nāœ… Final part with emojis šŸŽ‰".to_string(); + let result = TaskResult::new(response_with_emojis, context.clone()); + assert_eq!(result.extract_last_block(), "āœ… Final part with emojis šŸŽ‰"); + + // Test with code blocks + let response_with_code = "Explanation\n\n```rust\nfn main() {}\n```\n\nFinal comment".to_string(); + let result = TaskResult::new(response_with_code, context.clone()); + assert_eq!(result.extract_last_block(), "Final comment"); + + // Test with mixed newlines + let mixed_newlines = "Part 1\r\n\r\nPart 2\n\nPart 3".to_string(); + let result = TaskResult::new(mixed_newlines, context.clone()); + assert_eq!(result.extract_last_block(), "Part 3"); +} + +#[test] +fn test_large_response_handling() { + let context = ContextWindow::new(100000); + + // Create a large response + let mut large_response = String::new(); + for i in 0..100 { + large_response.push_str(&format!("Block {} with some content\n\n", i)); + } + large_response.push_str("This is the final block after 100 other blocks"); + + let result = TaskResult::new(large_response, context); + assert_eq!(result.extract_last_block(), "This is the final block after 100 other blocks"); +} + +#[test] +fn test_concurrent_access() { + use std::thread; + + let context = ContextWindow::new(1000); + let result = Arc::new(TaskResult::new( + "Concurrent test\n\nFinal block".to_string(), + context, + )); + + let mut handles = vec![]; + + // Spawn multiple threads to access the TaskResult + for _ in 0..10 { + let result_clone = Arc::clone(&result); + let handle = thread::spawn(move || { + // Each thread extracts the last block + let block = result_clone.extract_last_block(); + assert_eq!(block, "Final block"); + + // Check approval status + assert!(!result_clone.is_approved()); + }); + handles.push(handle); + } + + // Wait for all threads to complete + for handle in handles { + handle.join().unwrap(); + } +} + +fn main() { + println!("Running TaskResult comprehensive tests..."); + + test_task_result_basic_functionality(); + println!("āœ… Basic functionality test passed"); + + test_extract_last_block_various_formats(); + println!("āœ… Extract last block test passed"); + + test_is_approved_detection(); + println!("āœ… Approval detection test passed"); + + test_context_window_preservation(); + println!("āœ… Context window preservation test passed"); + + test_coach_feedback_extraction_scenarios(); + println!("āœ… Coach feedback extraction test passed"); + + test_edge_cases_and_special_characters(); + println!("āœ… Edge cases test passed"); + + test_large_response_handling(); + println!("āœ… Large response handling test passed"); + + test_concurrent_access(); + println!("āœ… Concurrent access test passed"); + + println!("\nšŸŽ‰ All TaskResult tests passed successfully!"); +} diff --git a/crates/g3-core/src/task_result_tests.rs b/crates/g3-core/src/task_result_tests.rs new file mode 100644 index 0000000..f49ebd4 --- /dev/null +++ b/crates/g3-core/src/task_result_tests.rs @@ -0,0 +1,48 @@ +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_last_block() { + // Test case 1: Response with timing info + let context_window = ContextWindow::new(1000); + let response_with_timing = "Some initial content\n\nFinal block content\n\nā±ļø 2.3s | šŸ’­ 1.2s".to_string(); + let result = TaskResult::new(response_with_timing, context_window.clone()); + assert_eq!(result.extract_last_block(), "Final block content"); + + // Test case 2: Response without timing + let response_no_timing = "Some initial content\n\nFinal block content".to_string(); + let result = TaskResult::new(response_no_timing, context_window.clone()); + assert_eq!(result.extract_last_block(), "Final block content"); + + // Test case 3: Response with IMPLEMENTATION_APPROVED + let response_approved = "Some content\n\nIMPLEMENTATION_APPROVED".to_string(); + let result = TaskResult::new(response_approved, context_window.clone()); + assert!(result.is_approved()); + + // Test case 4: Response without approval + let response_not_approved = "Some content\n\nNeeds more work".to_string(); + let result = TaskResult::new(response_not_approved, context_window); + assert!(!result.is_approved()); + } + + #[test] + fn test_extract_last_block_edge_cases() { + let context_window = ContextWindow::new(1000); + + // Test empty response + let empty_response = "".to_string(); + let result = TaskResult::new(empty_response, context_window.clone()); + assert_eq!(result.extract_last_block(), ""); + + // Test single block + let single_block = "Just one block".to_string(); + let result = TaskResult::new(single_block, context_window.clone()); + assert_eq!(result.extract_last_block(), "Just one block"); + + // Test multiple empty blocks + let multiple_empty = "\n\n\n\nSome content\n\n\n\n".to_string(); + let result = TaskResult::new(multiple_empty, context_window); + assert_eq!(result.extract_last_block(), "Some content"); + } +}