Tune coach pickiness down

This commit is contained in:
Dhanji Prasanna
2025-10-17 10:19:36 +11:00
parent dd211fab1c
commit e42c76f3b9

View File

@@ -1,7 +1,5 @@
use anyhow::Result; use anyhow::Result;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
/// Extract coach feedback by reading from the coach agent's specific log file
/// Uses the coach agent's session ID to find the exact log file
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct TurnMetrics { struct TurnMetrics {
@@ -99,12 +97,19 @@ fn generate_turn_histogram(turn_metrics: &[TurnMetrics]) -> String {
histogram histogram
} }
fn extract_coach_feedback_from_logs(_coach_result: &g3_core::TaskResult, coach_agent: &g3_core::Agent<ConsoleUiWriter>, output: &SimpleOutput) -> Result<String> { /// Extract coach feedback by reading from the coach agent's specific log file
/// Uses the coach agent's session ID to find the exact log file
fn extract_coach_feedback_from_logs(
_coach_result: &g3_core::TaskResult,
coach_agent: &g3_core::Agent<ConsoleUiWriter>,
output: &SimpleOutput,
) -> Result<String> {
// CORRECT APPROACH: Get the session ID from the current coach agent // CORRECT APPROACH: Get the session ID from the current coach agent
// and read its specific log file directly // and read its specific log file directly
// Get the coach agent's session ID // Get the coach agent's session ID
let session_id = coach_agent.get_session_id() let session_id = coach_agent
.get_session_id()
.ok_or_else(|| anyhow::anyhow!("Coach agent has no session ID"))?; .ok_or_else(|| anyhow::anyhow!("Coach agent has no session ID"))?;
// Construct the log file path for this specific coach session // Construct the log file path for this specific coach session
@@ -122,7 +127,10 @@ fn extract_coach_feedback_from_logs(_coach_result: &g3_core::TaskResult, coach_a
if let Some(last_message) = messages.last() { if let Some(last_message) = messages.last() {
if let Some(content) = last_message.get("content") { if let Some(content) = last_message.get("content") {
if let Some(content_str) = content.as_str() { if let Some(content_str) = content.as_str() {
output.print(&format!("✅ Extracted coach feedback from session: {}", session_id)); output.print(&format!(
"✅ Extracted coach feedback from session: {}",
session_id
));
return Ok(content_str.to_string()); return Ok(content_str.to_string());
} }
} }
@@ -134,7 +142,10 @@ fn extract_coach_feedback_from_logs(_coach_result: &g3_core::TaskResult, coach_a
} }
} }
Err(anyhow::anyhow!("Could not extract feedback from coach session: {}", session_id)) Err(anyhow::anyhow!(
"Could not extract feedback from coach session: {}",
session_id
))
} }
use clap::Parser; use clap::Parser;
@@ -316,7 +327,8 @@ pub async fn run() -> Result<()> {
if !valid_providers.contains(&provider.as_str()) { if !valid_providers.contains(&provider.as_str()) {
return Err(anyhow::anyhow!( return Err(anyhow::anyhow!(
"Invalid provider '{}'. Valid options: {:?}", "Invalid provider '{}'. Valid options: {:?}",
provider, valid_providers provider,
valid_providers
)); ));
} }
} }
@@ -335,9 +347,21 @@ pub async fn run() -> Result<()> {
}; };
let mut agent = if cli.autonomous { let mut agent = if cli.autonomous {
Agent::new_autonomous_with_readme_and_quiet(config.clone(), ui_writer, combined_content.clone(), cli.quiet).await? Agent::new_autonomous_with_readme_and_quiet(
config.clone(),
ui_writer,
combined_content.clone(),
cli.quiet,
)
.await?
} else { } else {
Agent::new_with_readme_and_quiet(config.clone(), ui_writer, combined_content.clone(), cli.quiet).await? Agent::new_with_readme_and_quiet(
config.clone(),
ui_writer,
combined_content.clone(),
cli.quiet,
)
.await?
}; };
// Execute task, autonomous mode, or start interactive mode // Execute task, autonomous mode, or start interactive mode
@@ -374,7 +398,7 @@ pub async fn run() -> Result<()> {
if cli.retro { if cli.retro {
// Use retro terminal UI // Use retro terminal UI
run_interactive_retro( run_interactive_retro(
config, // Already has overrides applied config, // Already has overrides applied
cli.show_prompt, cli.show_prompt,
cli.show_code, cli.show_code,
cli.theme, cli.theme,
@@ -1119,7 +1143,10 @@ async fn run_autonomous(
output.print("❌ Error: requirements.md not found in workspace directory"); output.print("❌ Error: requirements.md not found in workspace directory");
output.print(" Please either:"); output.print(" Please either:");
output.print(" 1. Create a requirements.md file with your project requirements at:"); output.print(" 1. Create a requirements.md file with your project requirements at:");
output.print(&format!(" {}/requirements.md", project.workspace().display())); output.print(&format!(
" {}/requirements.md",
project.workspace().display()
));
output.print(" 2. Or use the --requirements flag to provide requirements text directly:"); output.print(" 2. Or use the --requirements flag to provide requirements text directly:");
output.print(" g3 --autonomous --requirements \"Your requirements here\""); output.print(" g3 --autonomous --requirements \"Your requirements here\"");
output.print(""); output.print("");
@@ -1254,11 +1281,17 @@ async fn run_autonomous(
// If there's no coach feedback on subsequent turns, this is an error // If there's no coach feedback on subsequent turns, this is an error
if coach_feedback.is_empty() { if coach_feedback.is_empty() {
if turn > 1 { if turn > 1 {
return Err(anyhow::anyhow!("Player mode error: No coach feedback received on turn {}", turn)); return Err(anyhow::anyhow!(
"Player mode error: No coach feedback received on turn {}",
turn
));
} }
output.print("📋 Player starting initial implementation (no prior coach feedback)"); output.print("📋 Player starting initial implementation (no prior coach feedback)");
} else { } else {
output.print(&format!("📋 Player received coach feedback ({} chars):", coach_feedback.len())); output.print(&format!(
"📋 Player received coach feedback ({} chars):",
coach_feedback.len()
));
output.print(&format!("{}", coach_feedback)); output.print(&format!("{}", coach_feedback));
} }
output.print(""); // Empty line for readability output.print(""); // Empty line for readability
@@ -1384,7 +1417,8 @@ async fn run_autonomous(
// Use the same config with overrides that was passed to the player agent // Use the same config with overrides that was passed to the player agent
let config = agent.get_config().clone(); let config = agent.get_config().clone();
let ui_writer = ConsoleUiWriter::new(); let ui_writer = ConsoleUiWriter::new();
let mut coach_agent = Agent::new_autonomous_with_readme_and_quiet(config, ui_writer, None, quiet).await?; let mut coach_agent =
Agent::new_autonomous_with_readme_and_quiet(config, ui_writer, None, quiet).await?;
// Ensure coach agent is also in the workspace directory // Ensure coach agent is also in the workspace directory
project.enter_workspace()?; project.enter_workspace()?;
@@ -1414,13 +1448,13 @@ CRITICAL INSTRUCTIONS:
3. Focus ONLY on what needs to be fixed or improved 3. Focus ONLY on what needs to be fixed or improved
4. Do NOT include your analysis process, file contents, or compilation output in the summary 4. Do NOT include your analysis process, file contents, or compilation output in the summary
If the implementation correctly meets all requirements and compiles without errors: If the implementation generally meets all requirements and compiles without errors:
- Call final_output with summary: 'IMPLEMENTATION_APPROVED' - Call final_output with summary: 'IMPLEMENTATION_APPROVED'
If improvements are needed: If improvements are needed:
- Call final_output with a brief summary listing ONLY the specific issues to fix - Call final_output with a brief summary listing ONLY the specific issues to fix
Remember: Be thorough in your review but concise in your feedback. APPROVE if the implementation works and generally fits the requirements.", Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and generally fits the requirements. Don't be picky.",
requirements requirements
); );
@@ -1531,7 +1565,8 @@ Remember: Be thorough in your review but concise in your feedback. APPROVE if th
let coach_result = coach_result_opt.unwrap(); let coach_result = coach_result_opt.unwrap();
// Extract the complete coach feedback from final_output // Extract the complete coach feedback from final_output
let coach_feedback_text = extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?; let coach_feedback_text =
extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?;
// Log the size of the feedback for debugging // Log the size of the feedback for debugging
info!( info!(