error recovery and tests
This commit is contained in:
@@ -9,6 +9,7 @@ use std::path::Path;
|
|||||||
use tokio_util::sync::CancellationToken;
|
use tokio_util::sync::CancellationToken;
|
||||||
use tracing::{error, info};
|
use tracing::{error, info};
|
||||||
|
|
||||||
|
use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
|
||||||
mod retro_tui;
|
mod retro_tui;
|
||||||
mod tui;
|
mod tui;
|
||||||
mod ui_writer_impl;
|
mod ui_writer_impl;
|
||||||
@@ -173,7 +174,7 @@ pub async fn run() -> Result<()> {
|
|||||||
let result = agent
|
let result = agent
|
||||||
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
|
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
|
||||||
.await?;
|
.await?;
|
||||||
output.print_markdown(&result);
|
output.print_markdown(&result.response);
|
||||||
} else {
|
} else {
|
||||||
// Interactive mode (default)
|
// Interactive mode (default)
|
||||||
if !cli.retro {
|
if !cli.retro {
|
||||||
@@ -383,24 +384,54 @@ async fn run_interactive_retro(config: Config, show_prompt: bool, show_code: boo
|
|||||||
tui.output(&format!("> {}", input));
|
tui.output(&format!("> {}", input));
|
||||||
tui.status("PROCESSING");
|
tui.status("PROCESSING");
|
||||||
|
|
||||||
match agent
|
const MAX_TIMEOUT_RETRIES: u32 = 3;
|
||||||
.execute_task_with_timing(
|
let mut attempt = 0;
|
||||||
&input,
|
|
||||||
None,
|
loop {
|
||||||
false,
|
attempt += 1;
|
||||||
show_prompt,
|
|
||||||
show_code,
|
match agent
|
||||||
true,
|
.execute_task_with_timing(
|
||||||
)
|
&input,
|
||||||
.await
|
None,
|
||||||
{
|
false,
|
||||||
Ok(response) => {
|
show_prompt,
|
||||||
tui.output(&response);
|
show_code,
|
||||||
tui.status("READY");
|
true,
|
||||||
}
|
)
|
||||||
Err(e) => {
|
.await
|
||||||
tui.error(&format!("Task execution failed: {}", e));
|
{
|
||||||
tui.status("ERROR");
|
Ok(result) => {
|
||||||
|
if attempt > 1 {
|
||||||
|
tui.output(&format!("SYSTEM: REQUEST SUCCEEDED AFTER {} ATTEMPTS", attempt));
|
||||||
|
}
|
||||||
|
tui.output(&result.response);
|
||||||
|
tui.status("READY");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
// Check if this is a timeout error that we should retry
|
||||||
|
let error_type = classify_error(&e);
|
||||||
|
|
||||||
|
if matches!(error_type, ErrorType::Recoverable(RecoverableError::Timeout)) && attempt < MAX_TIMEOUT_RETRIES {
|
||||||
|
// Calculate retry delay with exponential backoff
|
||||||
|
let delay_ms = 1000 * (2_u64.pow(attempt - 1));
|
||||||
|
let delay = std::time::Duration::from_millis(delay_ms);
|
||||||
|
|
||||||
|
tui.output(&format!("SYSTEM: TIMEOUT ERROR (ATTEMPT {}/{}). RETRYING IN {:?}...",
|
||||||
|
attempt, MAX_TIMEOUT_RETRIES, delay));
|
||||||
|
tui.status("RETRYING");
|
||||||
|
|
||||||
|
// Wait before retrying
|
||||||
|
tokio::time::sleep(delay).await;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// For non-timeout errors or after max retries
|
||||||
|
tui.error(&format!("Task execution failed: {}", e));
|
||||||
|
tui.status("ERROR");
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -597,6 +628,8 @@ async fn execute_task<W: UiWriter>(
|
|||||||
show_code: bool,
|
show_code: bool,
|
||||||
output: &SimpleOutput,
|
output: &SimpleOutput,
|
||||||
) {
|
) {
|
||||||
|
const MAX_TIMEOUT_RETRIES: u32 = 3;
|
||||||
|
let mut attempt = 0;
|
||||||
// Show thinking indicator immediately
|
// Show thinking indicator immediately
|
||||||
output.print("🤔 Thinking...");
|
output.print("🤔 Thinking...");
|
||||||
// Note: flush is handled internally by println
|
// Note: flush is handled internally by println
|
||||||
@@ -605,56 +638,94 @@ async fn execute_task<W: UiWriter>(
|
|||||||
let cancellation_token = CancellationToken::new();
|
let cancellation_token = CancellationToken::new();
|
||||||
let cancel_token_clone = cancellation_token.clone();
|
let cancel_token_clone = cancellation_token.clone();
|
||||||
|
|
||||||
// Execute task with cancellation support
|
loop {
|
||||||
let execution_result = tokio::select! {
|
attempt += 1;
|
||||||
result = agent.execute_task_with_timing_cancellable(
|
|
||||||
input, None, false, show_prompt, show_code, true, cancellation_token
|
|
||||||
) => {
|
|
||||||
result
|
|
||||||
}
|
|
||||||
_ = tokio::signal::ctrl_c() => {
|
|
||||||
cancel_token_clone.cancel();
|
|
||||||
output.print("\n⚠️ Operation cancelled by user (Ctrl+C)");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
match execution_result {
|
// Execute task with cancellation support
|
||||||
Ok(response) => output.print_markdown(&response),
|
let execution_result = tokio::select! {
|
||||||
Err(e) => {
|
result = agent.execute_task_with_timing_cancellable(
|
||||||
if e.to_string().contains("cancelled") {
|
input, None, false, show_prompt, show_code, true, cancellation_token.clone()
|
||||||
output.print("⚠️ Operation cancelled by user");
|
) => {
|
||||||
} else {
|
result
|
||||||
// Enhanced error logging with detailed information
|
}
|
||||||
error!("=== TASK EXECUTION ERROR ===");
|
_ = tokio::signal::ctrl_c() => {
|
||||||
error!("Error: {}", e);
|
cancel_token_clone.cancel();
|
||||||
|
output.print("\n⚠️ Operation cancelled by user (Ctrl+C)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Log error chain
|
match execution_result {
|
||||||
let mut source = e.source();
|
Ok(result) => {
|
||||||
let mut depth = 1;
|
if attempt > 1 {
|
||||||
while let Some(err) = source {
|
output.print(&format!("✅ Request succeeded after {} attempts", attempt));
|
||||||
error!(" Caused by [{}]: {}", depth, err);
|
}
|
||||||
source = err.source();
|
output.print_markdown(&result.response);
|
||||||
depth += 1;
|
return;
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
if e.to_string().contains("cancelled") {
|
||||||
|
output.print("⚠️ Operation cancelled by user");
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Log additional context
|
// Check if this is a timeout error that we should retry
|
||||||
error!("Task input: {}", input);
|
let error_type = classify_error(&e);
|
||||||
error!("Error type: {}", std::any::type_name_of_val(&e));
|
|
||||||
|
|
||||||
// Display user-friendly error message
|
if matches!(error_type, ErrorType::Recoverable(RecoverableError::Timeout)) && attempt < MAX_TIMEOUT_RETRIES {
|
||||||
output.print(&format!("❌ Error: {}", e));
|
// Calculate retry delay with exponential backoff
|
||||||
|
let delay_ms = 1000 * (2_u64.pow(attempt - 1));
|
||||||
|
let delay = std::time::Duration::from_millis(delay_ms);
|
||||||
|
|
||||||
// If it's a stream error, provide helpful guidance
|
output.print(&format!(
|
||||||
if e.to_string().contains("No response received") {
|
"⏱️ Timeout error detected (attempt {}/{}). Retrying in {:?}...",
|
||||||
output.print("💡 This may be a temporary issue. Please try again or check the logs for more details.");
|
attempt, MAX_TIMEOUT_RETRIES, delay
|
||||||
output.print(" Log files are saved in the 'logs/' directory.");
|
));
|
||||||
|
|
||||||
|
// Wait before retrying
|
||||||
|
tokio::time::sleep(delay).await;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For non-timeout errors or after max retries, handle as before
|
||||||
|
handle_execution_error(&e, input, output, attempt);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn handle_execution_error(e: &anyhow::Error, input: &str, output: &SimpleOutput, attempt: u32) {
|
||||||
|
// Enhanced error logging with detailed information
|
||||||
|
error!("=== TASK EXECUTION ERROR ===");
|
||||||
|
error!("Error: {}", e);
|
||||||
|
if attempt > 1 {
|
||||||
|
error!("Failed after {} attempts", attempt);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log error chain
|
||||||
|
let mut source = e.source();
|
||||||
|
let mut depth = 1;
|
||||||
|
while let Some(err) = source {
|
||||||
|
error!(" Caused by [{}]: {}", depth, err);
|
||||||
|
source = err.source();
|
||||||
|
depth += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Log additional context
|
||||||
|
error!("Task input: {}", input);
|
||||||
|
error!("Error type: {}", std::any::type_name_of_val(&e));
|
||||||
|
|
||||||
|
// Display user-friendly error message
|
||||||
|
output.print(&format!("❌ Error: {}", e));
|
||||||
|
|
||||||
|
// If it's a stream error, provide helpful guidance
|
||||||
|
if e.to_string().contains("No response received") || e.to_string().contains("timed out") {
|
||||||
|
output.print("💡 This may be a temporary issue. Please try again or check the logs for more details.");
|
||||||
|
output.print(" Log files are saved in the 'logs/' directory.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn display_context_progress<W: UiWriter>(agent: &Agent<W>, output: &SimpleOutput) {
|
fn display_context_progress<W: UiWriter>(agent: &Agent<W>, output: &SimpleOutput) {
|
||||||
let context = agent.get_context_window();
|
let context = agent.get_context_window();
|
||||||
output.print_context(
|
output.print_context(
|
||||||
@@ -728,50 +799,63 @@ async fn run_autonomous(
|
|||||||
output.print("📋 Requirements loaded from requirements.md");
|
output.print("📋 Requirements loaded from requirements.md");
|
||||||
output.print("🔄 Starting coach-player feedback loop...");
|
output.print("🔄 Starting coach-player feedback loop...");
|
||||||
|
|
||||||
|
// Check if implementation files already exist
|
||||||
|
let skip_first_player = project.has_implementation_files();
|
||||||
|
if skip_first_player {
|
||||||
|
output.print("📂 Detected existing implementation files in workspace");
|
||||||
|
output.print("⏭️ Skipping first player turn - proceeding directly to coach review");
|
||||||
|
} else {
|
||||||
|
output.print("📂 No existing implementation files detected");
|
||||||
|
output.print("🎯 Starting with player implementation");
|
||||||
|
}
|
||||||
|
|
||||||
let mut turn = 1;
|
let mut turn = 1;
|
||||||
let mut coach_feedback = String::new();
|
let mut coach_feedback = String::new();
|
||||||
let mut implementation_approved = false;
|
let mut implementation_approved = false;
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
output.print(&format!(
|
// Skip player turn if it's the first turn and implementation files exist
|
||||||
"\n=== TURN {}/{} - PLAYER MODE ===",
|
if !(turn == 1 && skip_first_player) {
|
||||||
turn, max_turns
|
output.print(&format!(
|
||||||
));
|
"\n=== TURN {}/{} - PLAYER MODE ===",
|
||||||
|
turn, max_turns
|
||||||
|
));
|
||||||
|
|
||||||
// Player mode: implement requirements (with coach feedback if available)
|
// Player mode: implement requirements (with coach feedback if available)
|
||||||
let player_prompt = if coach_feedback.is_empty() {
|
let player_prompt = if coach_feedback.is_empty() {
|
||||||
format!(
|
format!(
|
||||||
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
|
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
|
||||||
requirements
|
requirements
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
format!(
|
format!(
|
||||||
"You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.",
|
"You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.",
|
||||||
coach_feedback, requirements
|
coach_feedback, requirements
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
output.print("🎯 Starting player implementation...");
|
output.print("🎯 Starting player implementation...");
|
||||||
|
|
||||||
// Execute player task and handle the result properly
|
// Execute player task and handle the result properly
|
||||||
match agent
|
match agent
|
||||||
.execute_task_with_timing(&player_prompt, None, false, show_prompt, show_code, true)
|
.execute_task_with_timing(&player_prompt, None, false, show_prompt, show_code, true)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
Ok(player_result) => {
|
Ok(result) => {
|
||||||
// Display player's implementation result
|
// Display player's implementation result
|
||||||
output.print("📝 Player implementation completed:");
|
output.print("📝 Player implementation completed:");
|
||||||
output.print_markdown(&player_result);
|
output.print_markdown(&result.response);
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
output.print(&format!("❌ Player implementation failed: {}", e));
|
output.print(&format!("❌ Player implementation failed: {}", e));
|
||||||
// Continue to coach review even if player had an error
|
// Continue to coach review even if player had an error
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Give some time for file operations to complete
|
||||||
|
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Give some time for file operations to complete
|
|
||||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
|
||||||
|
|
||||||
// Create a new agent instance for coach mode to ensure fresh context
|
// Create a new agent instance for coach mode to ensure fresh context
|
||||||
let config = g3_config::Config::load(None)?;
|
let config = g3_config::Config::load(None)?;
|
||||||
let ui_writer = ConsoleUiWriter::new();
|
let ui_writer = ConsoleUiWriter::new();
|
||||||
@@ -822,49 +906,14 @@ Remember: Be thorough in your review but concise in your feedback. APPROVE if th
|
|||||||
|
|
||||||
output.print("🎓 Coach review completed");
|
output.print("🎓 Coach review completed");
|
||||||
|
|
||||||
// Extract the actual feedback text from the coach result
|
// Extract the coach feedback using the semantic extraction from TaskResult
|
||||||
// IMPORTANT: We only want the final_output summary, not the entire conversation
|
let coach_feedback_text = coach_result.extract_last_block();
|
||||||
// The coach_result contains the full conversation including file reads, analysis, etc.
|
|
||||||
// We need to extract ONLY the final_output content
|
|
||||||
|
|
||||||
let coach_feedback_text = {
|
|
||||||
// Look for the final_output content in the coach's response
|
|
||||||
// In autonomous mode, the final_output is returned without the "=> " prefix
|
|
||||||
// The coach result should end with the summary content from final_output
|
|
||||||
|
|
||||||
// First, remove any timing information at the end
|
|
||||||
let content_without_timing = if let Some(timing_pos) = coach_result.rfind("\n⏱️") {
|
|
||||||
&coach_result[..timing_pos]
|
|
||||||
} else {
|
|
||||||
&coach_result
|
|
||||||
};
|
|
||||||
|
|
||||||
// The final_output content is typically the last substantial text in the response
|
|
||||||
// after all tool executions. Look for it after the last tool execution marker
|
|
||||||
// or take the last paragraph if no clear markers
|
|
||||||
|
|
||||||
// Split by double newlines to find the last substantial block
|
|
||||||
let blocks: Vec<&str> = content_without_timing.split("\n\n").collect();
|
|
||||||
|
|
||||||
// Find the last non-empty block that isn't just whitespace
|
|
||||||
let final_block = blocks.iter()
|
|
||||||
.rev()
|
|
||||||
.find(|block| !block.trim().is_empty())
|
|
||||||
.map(|block| block.trim().to_string())
|
|
||||||
.unwrap_or_else(|| {
|
|
||||||
// Fallback: if we can't find a clear block, take the whole thing
|
|
||||||
// but this shouldn't happen if the coach properly calls final_output
|
|
||||||
content_without_timing.trim().to_string()
|
|
||||||
});
|
|
||||||
|
|
||||||
final_block
|
|
||||||
};
|
|
||||||
|
|
||||||
// Log the size of the feedback for debugging
|
// Log the size of the feedback for debugging
|
||||||
info!(
|
info!(
|
||||||
"Coach feedback extracted: {} characters (from {} total)",
|
"Coach feedback extracted: {} characters (from {} total)",
|
||||||
coach_feedback_text.len(),
|
coach_feedback_text.len(),
|
||||||
coach_result.len()
|
coach_result.response.len()
|
||||||
);
|
);
|
||||||
|
|
||||||
// Check if we got empty feedback (this can happen if the coach doesn't call final_output)
|
// Check if we got empty feedback (this can happen if the coach doesn't call final_output)
|
||||||
@@ -878,7 +927,7 @@ Remember: Be thorough in your review but concise in your feedback. APPROVE if th
|
|||||||
output.print(&format!("Coach feedback:\n{}", coach_feedback_text));
|
output.print(&format!("Coach feedback:\n{}", coach_feedback_text));
|
||||||
|
|
||||||
// Check if coach approved the implementation
|
// Check if coach approved the implementation
|
||||||
if coach_feedback_text.contains("IMPLEMENTATION_APPROVED") {
|
if coach_result.is_approved() {
|
||||||
output.print("\n=== SESSION COMPLETED - IMPLEMENTATION APPROVED ===");
|
output.print("\n=== SESSION COMPLETED - IMPLEMENTATION APPROVED ===");
|
||||||
output.print("✅ Coach approved the implementation!");
|
output.print("✅ Coach approved the implementation!");
|
||||||
implementation_approved = true;
|
implementation_approved = true;
|
||||||
|
|||||||
@@ -206,7 +206,12 @@ pub fn classify_error(error: &anyhow::Error) -> ErrorType {
|
|||||||
return ErrorType::Recoverable(RecoverableError::ModelBusy);
|
return ErrorType::Recoverable(RecoverableError::ModelBusy);
|
||||||
}
|
}
|
||||||
|
|
||||||
if error_str.contains("timeout") || error_str.contains("timed out") {
|
// Enhanced timeout detection - check for various timeout patterns
|
||||||
|
if error_str.contains("timeout") ||
|
||||||
|
error_str.contains("timed out") ||
|
||||||
|
error_str.contains("operation timed out") ||
|
||||||
|
error_str.contains("request or response body error") || // Common timeout pattern
|
||||||
|
error_str.contains("stream error") && error_str.contains("timed out") {
|
||||||
return ErrorType::Recoverable(RecoverableError::Timeout);
|
return ErrorType::Recoverable(RecoverableError::Timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,7 @@
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::super::error_handling::*;
|
use crate::error_handling::*;
|
||||||
use anyhow::anyhow;
|
|
||||||
use std::sync::atomic::{AtomicU32, Ordering};
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@@ -28,7 +27,7 @@ mod tests {
|
|||||||
let count = counter.fetch_add(1, Ordering::SeqCst);
|
let count = counter.fetch_add(1, Ordering::SeqCst);
|
||||||
if count < 2 {
|
if count < 2 {
|
||||||
// Fail with recoverable error on first two attempts
|
// Fail with recoverable error on first two attempts
|
||||||
Err(anyhow!("Rate limit exceeded"))
|
Err(anyhow::anyhow!("Rate limit exceeded"))
|
||||||
} else {
|
} else {
|
||||||
// Succeed on third attempt
|
// Succeed on third attempt
|
||||||
Ok("Success")
|
Ok("Success")
|
||||||
@@ -65,7 +64,7 @@ mod tests {
|
|||||||
async move {
|
async move {
|
||||||
counter.fetch_add(1, Ordering::SeqCst);
|
counter.fetch_add(1, Ordering::SeqCst);
|
||||||
// Always fail with non-recoverable error
|
// Always fail with non-recoverable error
|
||||||
Err(anyhow!("Invalid API key"))
|
Err(anyhow::anyhow!("Invalid API key"))
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
&context,
|
&context,
|
||||||
@@ -97,7 +96,7 @@ mod tests {
|
|||||||
async move {
|
async move {
|
||||||
counter.fetch_add(1, Ordering::SeqCst);
|
counter.fetch_add(1, Ordering::SeqCst);
|
||||||
// Always fail with recoverable error
|
// Always fail with recoverable error
|
||||||
Err(anyhow!("Network connection failed"))
|
Err(anyhow::anyhow!("Network connection failed"))
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
&context,
|
&context,
|
||||||
|
|||||||
@@ -1,6 +1,11 @@
|
|||||||
pub mod error_handling;
|
pub mod error_handling;
|
||||||
pub mod project;
|
pub mod project;
|
||||||
pub mod ui_writer;
|
pub mod ui_writer;
|
||||||
|
pub mod task_result;
|
||||||
|
pub use task_result::TaskResult;
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod task_result_comprehensive_tests;
|
||||||
use crate::ui_writer::UiWriter;
|
use crate::ui_writer::UiWriter;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -565,7 +570,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
description: &str,
|
description: &str,
|
||||||
language: Option<&str>,
|
language: Option<&str>,
|
||||||
_auto_execute: bool,
|
_auto_execute: bool,
|
||||||
) -> Result<String> {
|
) -> Result<TaskResult> {
|
||||||
self.execute_task_with_options(description, language, false, false, false)
|
self.execute_task_with_options(description, language, false, false, false)
|
||||||
.await
|
.await
|
||||||
}
|
}
|
||||||
@@ -577,7 +582,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
_auto_execute: bool,
|
_auto_execute: bool,
|
||||||
show_prompt: bool,
|
show_prompt: bool,
|
||||||
show_code: bool,
|
show_code: bool,
|
||||||
) -> Result<String> {
|
) -> Result<TaskResult> {
|
||||||
self.execute_task_with_timing(
|
self.execute_task_with_timing(
|
||||||
description,
|
description,
|
||||||
language,
|
language,
|
||||||
@@ -597,7 +602,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
show_prompt: bool,
|
show_prompt: bool,
|
||||||
show_code: bool,
|
show_code: bool,
|
||||||
show_timing: bool,
|
show_timing: bool,
|
||||||
) -> Result<String> {
|
) -> Result<TaskResult> {
|
||||||
// Create a cancellation token that never cancels for backward compatibility
|
// Create a cancellation token that never cancels for backward compatibility
|
||||||
let cancellation_token = CancellationToken::new();
|
let cancellation_token = CancellationToken::new();
|
||||||
self.execute_task_with_timing_cancellable(
|
self.execute_task_with_timing_cancellable(
|
||||||
@@ -621,7 +626,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
show_code: bool,
|
show_code: bool,
|
||||||
show_timing: bool,
|
show_timing: bool,
|
||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) -> Result<String> {
|
) -> Result<TaskResult> {
|
||||||
// Execute the task directly without splitting
|
// Execute the task directly without splitting
|
||||||
self.execute_single_task(
|
self.execute_single_task(
|
||||||
description,
|
description,
|
||||||
@@ -640,7 +645,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
_show_code: bool,
|
_show_code: bool,
|
||||||
show_timing: bool,
|
show_timing: bool,
|
||||||
cancellation_token: CancellationToken,
|
cancellation_token: CancellationToken,
|
||||||
) -> Result<String> {
|
) -> Result<TaskResult> {
|
||||||
// Generate session ID based on the initial prompt if this is a new session
|
// Generate session ID based on the initial prompt if this is a new session
|
||||||
if self.session_id.is_none() {
|
if self.session_id.is_none() {
|
||||||
self.session_id = Some(self.generate_session_id(description));
|
self.session_id = Some(self.generate_session_id(description));
|
||||||
@@ -778,7 +783,7 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
// Time the LLM call with cancellation support and streaming
|
// Time the LLM call with cancellation support and streaming
|
||||||
let llm_start = Instant::now();
|
let llm_start = Instant::now();
|
||||||
let result = tokio::select! {
|
let result = tokio::select! {
|
||||||
result = self.stream_completion(request) => result,
|
result = self.stream_completion(request, show_timing) => result,
|
||||||
_ = cancellation_token.cancelled() => {
|
_ = cancellation_token.cancelled() => {
|
||||||
// Save context window on cancellation
|
// Save context window on cancellation
|
||||||
self.save_context_window("cancelled");
|
self.save_context_window("cancelled");
|
||||||
@@ -786,8 +791,8 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let (response_content, think_time) = match result {
|
let task_result = match result {
|
||||||
Ok(content) => content,
|
Ok(result) => result,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
// Save context window on error
|
// Save context window on error
|
||||||
self.save_context_window("error");
|
self.save_context_window("error");
|
||||||
@@ -795,7 +800,8 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let llm_duration = llm_start.elapsed();
|
let response_content = task_result.response.clone();
|
||||||
|
let _llm_duration = llm_start.elapsed();
|
||||||
|
|
||||||
// Create a mock usage for now (we'll need to track this during streaming)
|
// Create a mock usage for now (we'll need to track this during streaming)
|
||||||
let mock_usage = g3_providers::Usage {
|
let mock_usage = g3_providers::Usage {
|
||||||
@@ -822,18 +828,8 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
// Save context window at the end of successful interaction
|
// Save context window at the end of successful interaction
|
||||||
self.save_context_window("completed");
|
self.save_context_window("completed");
|
||||||
|
|
||||||
// With streaming tool execution, we don't need separate code execution
|
// Return the task result which already includes timing if needed
|
||||||
// The tools are already executed during streaming
|
Ok(task_result)
|
||||||
if show_timing {
|
|
||||||
let timing_summary = format!(
|
|
||||||
"\n⏱️ {} | 💭 {}",
|
|
||||||
Self::format_duration(llm_duration),
|
|
||||||
Self::format_duration(think_time)
|
|
||||||
);
|
|
||||||
Ok(format!("{}\n{}", response_content, timing_summary))
|
|
||||||
} else {
|
|
||||||
Ok(response_content)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate a session ID based on the initial prompt
|
/// Generate a session ID based on the initial prompt
|
||||||
@@ -919,8 +915,9 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
async fn stream_completion(
|
async fn stream_completion(
|
||||||
&mut self,
|
&mut self,
|
||||||
request: CompletionRequest,
|
request: CompletionRequest,
|
||||||
) -> Result<(String, Duration)> {
|
show_timing: bool,
|
||||||
self.stream_completion_with_tools(request).await
|
) -> Result<TaskResult> {
|
||||||
|
self.stream_completion_with_tools(request, show_timing).await
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create tool definitions for native tool calling providers
|
/// Create tool definitions for native tool calling providers
|
||||||
@@ -1076,7 +1073,8 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
async fn stream_completion_with_tools(
|
async fn stream_completion_with_tools(
|
||||||
&mut self,
|
&mut self,
|
||||||
mut request: CompletionRequest,
|
mut request: CompletionRequest,
|
||||||
) -> Result<(String, Duration)> {
|
show_timing: bool,
|
||||||
|
) -> Result<TaskResult> {
|
||||||
use crate::error_handling::ErrorContext;
|
use crate::error_handling::ErrorContext;
|
||||||
use tokio_stream::StreamExt;
|
use tokio_stream::StreamExt;
|
||||||
|
|
||||||
@@ -1473,9 +1471,17 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.ui_writer.println("");
|
self.ui_writer.println("");
|
||||||
let ttft =
|
let _ttft =
|
||||||
first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
||||||
return Ok((full_response, ttft));
|
|
||||||
|
// Add timing if needed
|
||||||
|
let final_response = if show_timing {
|
||||||
|
format!("{}\n\n⏱️ {} | 💭 {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft))
|
||||||
|
} else {
|
||||||
|
full_response
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(TaskResult::new(final_response, self.context_window.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Closure marker with timing
|
// Closure marker with timing
|
||||||
@@ -1680,9 +1686,17 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.ui_writer.println("");
|
self.ui_writer.println("");
|
||||||
let ttft =
|
let _ttft =
|
||||||
first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
||||||
return Ok((full_response, ttft));
|
|
||||||
|
// Add timing if needed
|
||||||
|
let final_response = if show_timing {
|
||||||
|
format!("{}\n\n⏱️ {} | 💭 {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft))
|
||||||
|
} else {
|
||||||
|
full_response
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(TaskResult::new(final_response, self.context_window.clone()));
|
||||||
}
|
}
|
||||||
break; // Tool was executed, break to continue outer loop
|
break; // Tool was executed, break to continue outer loop
|
||||||
}
|
}
|
||||||
@@ -1747,16 +1761,32 @@ The tool will execute immediately and you'll receive the result (success or erro
|
|||||||
self.ui_writer.println("");
|
self.ui_writer.println("");
|
||||||
}
|
}
|
||||||
|
|
||||||
let ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
||||||
return Ok((full_response, ttft));
|
|
||||||
|
// Add timing if needed
|
||||||
|
let final_response = if show_timing {
|
||||||
|
format!("{}\n\n⏱️ {} | 💭 {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft))
|
||||||
|
} else {
|
||||||
|
full_response
|
||||||
|
};
|
||||||
|
|
||||||
|
return Ok(TaskResult::new(final_response, self.context_window.clone()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Continue the loop to start a new stream with updated context
|
// Continue the loop to start a new stream with updated context
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we exit the loop due to max iterations
|
// If we exit the loop due to max iterations
|
||||||
let ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed());
|
||||||
Ok((full_response, ttft))
|
|
||||||
|
// Add timing if needed
|
||||||
|
let final_response = if show_timing {
|
||||||
|
format!("{}\n\n⏱️ {} | 💭 {}", full_response, Self::format_duration(total_execution_time), Self::format_duration(_ttft))
|
||||||
|
} else {
|
||||||
|
full_response
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(TaskResult::new(final_response, self.context_window.clone()))
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
|
async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
|
||||||
|
|||||||
@@ -81,6 +81,48 @@ impl Project {
|
|||||||
self.requirements_path.is_some()
|
self.requirements_path.is_some()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if implementation files exist in the workspace
|
||||||
|
pub fn has_implementation_files(&self) -> bool {
|
||||||
|
self.check_dir_for_implementation_files(&self.workspace_dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively check a directory for implementation files
|
||||||
|
fn check_dir_for_implementation_files(&self, dir: &Path) -> bool {
|
||||||
|
// Common source file extensions
|
||||||
|
let extensions = vec![
|
||||||
|
"swift", "rs", "py", "js", "ts", "java", "cpp", "c",
|
||||||
|
"go", "rb", "php", "cs", "kt", "scala", "m", "h"
|
||||||
|
];
|
||||||
|
|
||||||
|
if let Ok(entries) = std::fs::read_dir(dir) {
|
||||||
|
for entry in entries.flatten() {
|
||||||
|
let path = entry.path();
|
||||||
|
|
||||||
|
if path.is_file() {
|
||||||
|
// Check if it's a source file
|
||||||
|
if let Some(ext) = path.extension() {
|
||||||
|
if let Some(ext_str) = ext.to_str() {
|
||||||
|
if extensions.contains(&ext_str) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if path.is_dir() {
|
||||||
|
// Skip hidden directories and common non-source directories
|
||||||
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||||
|
if !name.starts_with('.') && name != "logs" && name != "target" && name != "node_modules" {
|
||||||
|
// Recursively check subdirectories
|
||||||
|
if self.check_dir_for_implementation_files(&path) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
/// Read the requirements file content
|
/// Read the requirements file content
|
||||||
pub fn read_requirements(&self) -> Result<Option<String>> {
|
pub fn read_requirements(&self) -> Result<Option<String>> {
|
||||||
if let Some(ref path) = self.requirements_path {
|
if let Some(ref path) = self.requirements_path {
|
||||||
|
|||||||
97
crates/g3-core/src/task_result.rs
Normal file
97
crates/g3-core/src/task_result.rs
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
use crate::ContextWindow;
|
||||||
|
|
||||||
|
/// Result of a task execution containing both the response and the context window
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct TaskResult {
|
||||||
|
/// The actual response content from the task execution
|
||||||
|
pub response: String,
|
||||||
|
/// The complete context window at the time of completion
|
||||||
|
pub context_window: ContextWindow,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TaskResult {
|
||||||
|
pub fn new(response: String, context_window: ContextWindow) -> Self {
|
||||||
|
Self {
|
||||||
|
response,
|
||||||
|
context_window,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract the last block from the response (for coach feedback in autonomous mode)
|
||||||
|
/// This looks for the final_output content which is the last substantial block
|
||||||
|
pub fn extract_last_block(&self) -> String {
|
||||||
|
// Remove any timing information at the end
|
||||||
|
let content_without_timing = if let Some(timing_pos) = self.response.rfind("\n⏱️") {
|
||||||
|
&self.response[..timing_pos]
|
||||||
|
} else {
|
||||||
|
&self.response
|
||||||
|
};
|
||||||
|
|
||||||
|
// Split by double newlines to find the last substantial block
|
||||||
|
let blocks: Vec<&str> = content_without_timing.split("\n\n").collect();
|
||||||
|
|
||||||
|
// Find the last non-empty block that isn't just whitespace
|
||||||
|
blocks.iter()
|
||||||
|
.rev()
|
||||||
|
.find(|block| !block.trim().is_empty())
|
||||||
|
.map(|block| block.trim().to_string())
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
// Fallback: if we can't find a clear block, take the whole thing
|
||||||
|
content_without_timing.trim().to_string()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the response contains an approval (for autonomous mode)
|
||||||
|
pub fn is_approved(&self) -> bool {
|
||||||
|
self.extract_last_block().contains("IMPLEMENTATION_APPROVED")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_last_block() {
|
||||||
|
// Test case 1: Response with timing info
|
||||||
|
let context_window = ContextWindow::new(1000);
|
||||||
|
let response_with_timing = "Some initial content\n\nFinal block content\n\n⏱️ 2.3s | 💭 1.2s".to_string();
|
||||||
|
let result = TaskResult::new(response_with_timing, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Final block content");
|
||||||
|
|
||||||
|
// Test case 2: Response without timing
|
||||||
|
let response_no_timing = "Some initial content\n\nFinal block content".to_string();
|
||||||
|
let result = TaskResult::new(response_no_timing, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Final block content");
|
||||||
|
|
||||||
|
// Test case 3: Response with IMPLEMENTATION_APPROVED
|
||||||
|
let response_approved = "Some content\n\nIMPLEMENTATION_APPROVED".to_string();
|
||||||
|
let result = TaskResult::new(response_approved, context_window.clone());
|
||||||
|
assert!(result.is_approved());
|
||||||
|
|
||||||
|
// Test case 4: Response without approval
|
||||||
|
let response_not_approved = "Some content\n\nNeeds more work".to_string();
|
||||||
|
let result = TaskResult::new(response_not_approved, context_window);
|
||||||
|
assert!(!result.is_approved());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_last_block_edge_cases() {
|
||||||
|
let context_window = ContextWindow::new(1000);
|
||||||
|
|
||||||
|
// Test empty response
|
||||||
|
let empty_response = "".to_string();
|
||||||
|
let result = TaskResult::new(empty_response, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "");
|
||||||
|
|
||||||
|
// Test single block
|
||||||
|
let single_block = "Just one block".to_string();
|
||||||
|
let result = TaskResult::new(single_block, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Just one block");
|
||||||
|
|
||||||
|
// Test multiple empty blocks
|
||||||
|
let multiple_empty = "\n\n\n\nSome content\n\n\n\n".to_string();
|
||||||
|
let result = TaskResult::new(multiple_empty, context_window);
|
||||||
|
assert_eq!(result.extract_last_block(), "Some content");
|
||||||
|
}
|
||||||
|
}
|
||||||
276
crates/g3-core/src/task_result_comprehensive_tests.rs
Normal file
276
crates/g3-core/src/task_result_comprehensive_tests.rs
Normal file
@@ -0,0 +1,276 @@
|
|||||||
|
use crate::{ContextWindow, TaskResult};
|
||||||
|
use g3_providers::{Message, MessageRole};
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_task_result_basic_functionality() {
|
||||||
|
// Create a context window with some messages
|
||||||
|
let mut context = ContextWindow::new(10000);
|
||||||
|
context.add_message(Message {
|
||||||
|
role: MessageRole::User,
|
||||||
|
content: "Test message 1".to_string(),
|
||||||
|
});
|
||||||
|
context.add_message(Message {
|
||||||
|
role: MessageRole::Assistant,
|
||||||
|
content: "Response 1".to_string(),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create a TaskResult
|
||||||
|
let response = "This is the response\n\nFinal output block".to_string();
|
||||||
|
let result = TaskResult::new(response.clone(), context.clone());
|
||||||
|
|
||||||
|
// Test basic properties
|
||||||
|
assert_eq!(result.response, response);
|
||||||
|
assert_eq!(result.context_window.conversation_history.len(), 2);
|
||||||
|
assert_eq!(result.context_window.total_tokens, 10000);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_last_block_various_formats() {
|
||||||
|
let context = ContextWindow::new(1000);
|
||||||
|
|
||||||
|
// Test 1: Standard format with multiple blocks
|
||||||
|
let response1 = "First block\n\nSecond block\n\nThird block".to_string();
|
||||||
|
let result1 = TaskResult::new(response1, context.clone());
|
||||||
|
assert_eq!(result1.extract_last_block(), "Third block");
|
||||||
|
|
||||||
|
// Test 2: With timing information
|
||||||
|
let response2 = "Content\n\nFinal block\n\n⏱️ 2.3s | 💭 1.2s".to_string();
|
||||||
|
let result2 = TaskResult::new(response2, context.clone());
|
||||||
|
assert_eq!(result2.extract_last_block(), "Final block");
|
||||||
|
|
||||||
|
// Test 3: Single line response
|
||||||
|
let response3 = "Single line response".to_string();
|
||||||
|
let result3 = TaskResult::new(response3, context.clone());
|
||||||
|
assert_eq!(result3.extract_last_block(), "Single line response");
|
||||||
|
|
||||||
|
// Test 4: Empty response
|
||||||
|
let response4 = "".to_string();
|
||||||
|
let result4 = TaskResult::new(response4, context.clone());
|
||||||
|
assert_eq!(result4.extract_last_block(), "");
|
||||||
|
|
||||||
|
// Test 5: Only whitespace
|
||||||
|
let response5 = "\n\n\n \n\n".to_string();
|
||||||
|
let result5 = TaskResult::new(response5, context.clone());
|
||||||
|
assert_eq!(result5.extract_last_block(), "");
|
||||||
|
|
||||||
|
// Test 6: Multiple blocks with empty ones
|
||||||
|
let response6 = "First\n\n\n\n\n\nLast block here".to_string();
|
||||||
|
let result6 = TaskResult::new(response6, context.clone());
|
||||||
|
assert_eq!(result6.extract_last_block(), "Last block here");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_is_approved_detection() {
|
||||||
|
let context = ContextWindow::new(1000);
|
||||||
|
|
||||||
|
// Test approved cases
|
||||||
|
let approved_responses = vec![
|
||||||
|
"Analysis complete\n\nIMPLEMENTATION_APPROVED",
|
||||||
|
"Some content\n\nThe implementation is good. IMPLEMENTATION_APPROVED",
|
||||||
|
"IMPLEMENTATION_APPROVED",
|
||||||
|
"Review done\n\n✅ IMPLEMENTATION_APPROVED - All tests pass",
|
||||||
|
];
|
||||||
|
|
||||||
|
for response in approved_responses {
|
||||||
|
let result = TaskResult::new(response.to_string(), context.clone());
|
||||||
|
assert!(result.is_approved(), "Failed to detect approval in: {}", response);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test not approved cases
|
||||||
|
let not_approved_responses = vec![
|
||||||
|
"Needs more work",
|
||||||
|
"Implementation needs fixes",
|
||||||
|
"IMPLEMENTATION_REJECTED",
|
||||||
|
"Almost there but not APPROVED",
|
||||||
|
"",
|
||||||
|
];
|
||||||
|
|
||||||
|
for response in not_approved_responses {
|
||||||
|
let result = TaskResult::new(response.to_string(), context.clone());
|
||||||
|
assert!(!result.is_approved(), "Incorrectly detected approval in: {}", response);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_context_window_preservation() {
|
||||||
|
// Create a context window with specific state
|
||||||
|
let mut context = ContextWindow::new(5000);
|
||||||
|
context.used_tokens = 1234;
|
||||||
|
|
||||||
|
// Add some messages
|
||||||
|
for i in 0..5 {
|
||||||
|
context.add_message(Message {
|
||||||
|
role: if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant },
|
||||||
|
content: format!("Message {}", i),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create TaskResult
|
||||||
|
let result = TaskResult::new("Response".to_string(), context.clone());
|
||||||
|
|
||||||
|
// Verify context is preserved
|
||||||
|
assert_eq!(result.context_window.total_tokens, 5000);
|
||||||
|
assert!(result.context_window.used_tokens > 1234); // Should have increased
|
||||||
|
assert_eq!(result.context_window.conversation_history.len(), 5);
|
||||||
|
|
||||||
|
// Verify messages are preserved correctly
|
||||||
|
for i in 0..5 {
|
||||||
|
let is_user = matches!(result.context_window.conversation_history[i].role, MessageRole::User);
|
||||||
|
let expected_is_user = i % 2 == 0;
|
||||||
|
assert_eq!(is_user, expected_is_user, "Message {} has wrong role", i);
|
||||||
|
assert_eq!(result.context_window.conversation_history[i].content, format!("Message {}", i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_coach_feedback_extraction_scenarios() {
|
||||||
|
let context = ContextWindow::new(1000);
|
||||||
|
|
||||||
|
// Scenario 1: Coach feedback with file operations and analysis
|
||||||
|
let coach_response = r#"Reading file: src/main.rs
|
||||||
|
📄 File content (23 lines):
|
||||||
|
fn main() {
|
||||||
|
println!("Hello");
|
||||||
|
}
|
||||||
|
|
||||||
|
Analyzing implementation...
|
||||||
|
|
||||||
|
The implementation needs the following fixes:
|
||||||
|
1. Add error handling
|
||||||
|
2. Implement missing functions
|
||||||
|
3. Add tests"#;
|
||||||
|
|
||||||
|
let result = TaskResult::new(coach_response.to_string(), context.clone());
|
||||||
|
let feedback = result.extract_last_block();
|
||||||
|
assert!(feedback.contains("Add error handling"));
|
||||||
|
assert!(feedback.contains("Implement missing functions"));
|
||||||
|
assert!(feedback.contains("Add tests"));
|
||||||
|
|
||||||
|
// Scenario 2: Coach approval
|
||||||
|
let approval_response = r#"Checking compilation...
|
||||||
|
✅ Build successful
|
||||||
|
|
||||||
|
Running tests...
|
||||||
|
✅ All tests pass
|
||||||
|
|
||||||
|
IMPLEMENTATION_APPROVED"#;
|
||||||
|
|
||||||
|
let result = TaskResult::new(approval_response.to_string(), context.clone());
|
||||||
|
assert!(result.is_approved());
|
||||||
|
assert_eq!(result.extract_last_block(), "IMPLEMENTATION_APPROVED");
|
||||||
|
|
||||||
|
// Scenario 3: Complex feedback with timing
|
||||||
|
let complex_response = r#"Tool execution log...
|
||||||
|
|
||||||
|
Analysis complete.
|
||||||
|
|
||||||
|
The following issues were found:
|
||||||
|
- Memory leak in process_data()
|
||||||
|
- Missing input validation
|
||||||
|
|
||||||
|
⏱️ 5.2s | 💭 2.1s"#;
|
||||||
|
|
||||||
|
let result = TaskResult::new(complex_response.to_string(), context.clone());
|
||||||
|
let feedback = result.extract_last_block();
|
||||||
|
assert!(feedback.contains("Memory leak"));
|
||||||
|
assert!(feedback.contains("Missing input validation"));
|
||||||
|
assert!(!feedback.contains("⏱️")); // Timing should be stripped
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_edge_cases_and_special_characters() {
|
||||||
|
let context = ContextWindow::new(1000);
|
||||||
|
|
||||||
|
// Test with special characters and emojis
|
||||||
|
let response_with_emojis = "First part 🚀\n\n✅ Final part with emojis 🎉".to_string();
|
||||||
|
let result = TaskResult::new(response_with_emojis, context.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "✅ Final part with emojis 🎉");
|
||||||
|
|
||||||
|
// Test with code blocks
|
||||||
|
let response_with_code = "Explanation\n\n```rust\nfn main() {}\n```\n\nFinal comment".to_string();
|
||||||
|
let result = TaskResult::new(response_with_code, context.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Final comment");
|
||||||
|
|
||||||
|
// Test with mixed newlines
|
||||||
|
let mixed_newlines = "Part 1\r\n\r\nPart 2\n\nPart 3".to_string();
|
||||||
|
let result = TaskResult::new(mixed_newlines, context.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Part 3");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_large_response_handling() {
|
||||||
|
let context = ContextWindow::new(100000);
|
||||||
|
|
||||||
|
// Create a large response
|
||||||
|
let mut large_response = String::new();
|
||||||
|
for i in 0..100 {
|
||||||
|
large_response.push_str(&format!("Block {} with some content\n\n", i));
|
||||||
|
}
|
||||||
|
large_response.push_str("This is the final block after 100 other blocks");
|
||||||
|
|
||||||
|
let result = TaskResult::new(large_response, context);
|
||||||
|
assert_eq!(result.extract_last_block(), "This is the final block after 100 other blocks");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_concurrent_access() {
|
||||||
|
use std::thread;
|
||||||
|
|
||||||
|
let context = ContextWindow::new(1000);
|
||||||
|
let result = Arc::new(TaskResult::new(
|
||||||
|
"Concurrent test\n\nFinal block".to_string(),
|
||||||
|
context,
|
||||||
|
));
|
||||||
|
|
||||||
|
let mut handles = vec![];
|
||||||
|
|
||||||
|
// Spawn multiple threads to access the TaskResult
|
||||||
|
for _ in 0..10 {
|
||||||
|
let result_clone = Arc::clone(&result);
|
||||||
|
let handle = thread::spawn(move || {
|
||||||
|
// Each thread extracts the last block
|
||||||
|
let block = result_clone.extract_last_block();
|
||||||
|
assert_eq!(block, "Final block");
|
||||||
|
|
||||||
|
// Check approval status
|
||||||
|
assert!(!result_clone.is_approved());
|
||||||
|
});
|
||||||
|
handles.push(handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all threads to complete
|
||||||
|
for handle in handles {
|
||||||
|
handle.join().unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
println!("Running TaskResult comprehensive tests...");
|
||||||
|
|
||||||
|
test_task_result_basic_functionality();
|
||||||
|
println!("✅ Basic functionality test passed");
|
||||||
|
|
||||||
|
test_extract_last_block_various_formats();
|
||||||
|
println!("✅ Extract last block test passed");
|
||||||
|
|
||||||
|
test_is_approved_detection();
|
||||||
|
println!("✅ Approval detection test passed");
|
||||||
|
|
||||||
|
test_context_window_preservation();
|
||||||
|
println!("✅ Context window preservation test passed");
|
||||||
|
|
||||||
|
test_coach_feedback_extraction_scenarios();
|
||||||
|
println!("✅ Coach feedback extraction test passed");
|
||||||
|
|
||||||
|
test_edge_cases_and_special_characters();
|
||||||
|
println!("✅ Edge cases test passed");
|
||||||
|
|
||||||
|
test_large_response_handling();
|
||||||
|
println!("✅ Large response handling test passed");
|
||||||
|
|
||||||
|
test_concurrent_access();
|
||||||
|
println!("✅ Concurrent access test passed");
|
||||||
|
|
||||||
|
println!("\n🎉 All TaskResult tests passed successfully!");
|
||||||
|
}
|
||||||
48
crates/g3-core/src/task_result_tests.rs
Normal file
48
crates/g3-core/src/task_result_tests.rs
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_last_block() {
|
||||||
|
// Test case 1: Response with timing info
|
||||||
|
let context_window = ContextWindow::new(1000);
|
||||||
|
let response_with_timing = "Some initial content\n\nFinal block content\n\n⏱️ 2.3s | 💭 1.2s".to_string();
|
||||||
|
let result = TaskResult::new(response_with_timing, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Final block content");
|
||||||
|
|
||||||
|
// Test case 2: Response without timing
|
||||||
|
let response_no_timing = "Some initial content\n\nFinal block content".to_string();
|
||||||
|
let result = TaskResult::new(response_no_timing, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Final block content");
|
||||||
|
|
||||||
|
// Test case 3: Response with IMPLEMENTATION_APPROVED
|
||||||
|
let response_approved = "Some content\n\nIMPLEMENTATION_APPROVED".to_string();
|
||||||
|
let result = TaskResult::new(response_approved, context_window.clone());
|
||||||
|
assert!(result.is_approved());
|
||||||
|
|
||||||
|
// Test case 4: Response without approval
|
||||||
|
let response_not_approved = "Some content\n\nNeeds more work".to_string();
|
||||||
|
let result = TaskResult::new(response_not_approved, context_window);
|
||||||
|
assert!(!result.is_approved());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_extract_last_block_edge_cases() {
|
||||||
|
let context_window = ContextWindow::new(1000);
|
||||||
|
|
||||||
|
// Test empty response
|
||||||
|
let empty_response = "".to_string();
|
||||||
|
let result = TaskResult::new(empty_response, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "");
|
||||||
|
|
||||||
|
// Test single block
|
||||||
|
let single_block = "Just one block".to_string();
|
||||||
|
let result = TaskResult::new(single_block, context_window.clone());
|
||||||
|
assert_eq!(result.extract_last_block(), "Just one block");
|
||||||
|
|
||||||
|
// Test multiple empty blocks
|
||||||
|
let multiple_empty = "\n\n\n\nSome content\n\n\n\n".to_string();
|
||||||
|
let result = TaskResult::new(multiple_empty, context_window);
|
||||||
|
assert_eq!(result.extract_last_block(), "Some content");
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user