From 2008a81193ae9e44e19818160a3982964d2aa82c Mon Sep 17 00:00:00 2001 From: Dhanji Prasanna Date: Mon, 20 Oct 2025 14:12:08 +1100 Subject: [PATCH] fix to pass feedback to player (broken by todo system) --- crates/g3-cli/src/lib.rs | 142 ++++++++++++++++++++++++++++++++------ crates/g3-core/src/lib.rs | 36 ++++++++-- 2 files changed, 152 insertions(+), 26 deletions(-) diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index bf1aa84..3b1a50d 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -100,17 +100,14 @@ fn generate_turn_histogram(turn_metrics: &[TurnMetrics]) -> String { /// Extract coach feedback by reading from the coach agent's specific log file /// Uses the coach agent's session ID to find the exact log file fn extract_coach_feedback_from_logs( - _coach_result: &g3_core::TaskResult, + coach_result: &g3_core::TaskResult, coach_agent: &g3_core::Agent, output: &SimpleOutput, -) -> Result { - // CORRECT APPROACH: Get the session ID from the current coach agent - // and read its specific log file directly - +) -> String { // Get the coach agent's session ID let session_id = coach_agent .get_session_id() - .ok_or_else(|| anyhow::anyhow!("Coach agent has no session ID"))?; + .expect("Coach agent has no session ID"); // Construct the log file path for this specific coach session let logs_dir = std::path::Path::new("logs"); @@ -123,15 +120,75 @@ fn extract_coach_feedback_from_logs( if let Some(context_window) = log_json.get("context_window") { if let Some(conversation_history) = context_window.get("conversation_history") { if let Some(messages) = conversation_history.as_array() { - // Simply get the last message content - this is the coach's final feedback - if let Some(last_message) = messages.last() { - if let Some(content) = last_message.get("content") { - if let Some(content_str) = content.as_str() { - output.print(&format!( - "✅ Extracted coach feedback from session: {}", - session_id - )); - return Ok(content_str.to_string()); + // Look for the last assistant message (regardless of tool used) + for message in messages.iter().rev() { + if let Some(role) = message.get("role") { + if role.as_str() == Some("assistant") { + if let Some(content) = message.get("content") { + if let Some(content_str) = content.as_str() { + // First, check if this is plain text feedback (no tool call) + // This happens when the coach returns final feedback directly + if !content_str.contains("{\"tool\"") { + let trimmed = content_str.trim(); + if !trimmed.is_empty() { + output.print(&format!( + "✅ Extracted coach feedback from session: {} ({} chars) [plain text]", + session_id, + trimmed.len() + )); + return trimmed.to_string(); + } + } + + // Look for ANY tool call in the message + // Pattern: {"tool": "...", "args": {...}} + if let Some(tool_start) = content_str.find("{\"tool\"") { + let json_part = &content_str[tool_start..]; + + // Find the end of the JSON object + if let Some(json_end) = find_json_end(json_part) { + let json_str = &json_part[..json_end]; + + if let Ok(tool_call) = serde_json::from_str::(json_str) { + if let Some(args) = tool_call.get("args") { + // Try to extract feedback from different possible fields + let feedback = if let Some(summary) = args.get("summary") { + // final_output tool uses "summary" + summary.as_str().map(|s| s.to_string()) + } else if let Some(content) = args.get("content") { + // todo_write and other tools might use "content" + content.as_str().map(|s| s.to_string()) + } else { + // Fallback: use the entire args as JSON string + Some(serde_json::to_string_pretty(args).unwrap_or_default()) + }; + + if let Some(feedback_str) = feedback { + if !feedback_str.trim().is_empty() { + output.print(&format!( + "✅ Extracted coach feedback from session: {} ({} chars)", + session_id, + feedback_str.len() + )); + + // Validate feedback length + if feedback_str.len() < 80 && !feedback_str.contains("IMPLEMENTATION_APPROVED") { + panic!( + "Coach feedback is too short ({} chars): '{}'", + feedback_str.len(), + feedback_str + ); + } + + return feedback_str; + } + } + } + } + } + } + } + } } } } @@ -142,10 +199,47 @@ fn extract_coach_feedback_from_logs( } } - Err(anyhow::anyhow!( - "Could not extract feedback from coach session: {}", - session_id - )) + // If we couldn't extract from logs, panic with detailed error + panic!( + "CRITICAL: Could not extract coach feedback from session: {}\n\ + Log file path: {:?}\n\ + Log file exists: {}\n\ + This indicates the coach did not call any tool or the log is corrupted.\n\ + Coach result response length: {} chars", + session_id, + log_file_path, + log_file_path.exists(), + coach_result.response.len() + ); +} + +/// Helper function to find the end of a JSON object using brace counting +fn find_json_end(json_str: &str) -> Option { + let mut depth = 0; + let mut in_string = false; + let mut escape_next = false; + + for (i, ch) in json_str.char_indices() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' if in_string => escape_next = true, + '"' => in_string = !in_string, + '{' if !in_string => depth += 1, + '}' if !in_string => { + depth -= 1; + if depth == 0 { + return Some(i + 1); + } + } + _ => {} + } + } + + None } use clap::Parser; @@ -1255,6 +1349,10 @@ async fn run_autonomous( loop { let turn_start_time = Instant::now(); let turn_start_tokens = agent.get_context_window().used_tokens; + + // Reset filter suppression state at the start of each turn + g3_core::fixed_filter_json::reset_fixed_json_tool_state(); + // Skip player turn if it's the first turn and implementation files exist if !(turn == 1 && skip_first_player) { output.print(&format!( @@ -1416,6 +1514,10 @@ async fn run_autonomous( // Create a new agent instance for coach mode to ensure fresh context // Use the same config with overrides that was passed to the player agent let config = agent.get_config().clone(); + + // Reset filter suppression state before creating coach agent + g3_core::fixed_filter_json::reset_fixed_json_tool_state(); + let ui_writer = ConsoleUiWriter::new(); let mut coach_agent = Agent::new_autonomous_with_readme_and_quiet(config, ui_writer, None, quiet).await?; @@ -1566,7 +1668,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE if the i // Extract the complete coach feedback from final_output let coach_feedback_text = - extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?; + extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output); // Log the size of the feedback for debugging info!( diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 3268841..3050f1e 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -8,7 +8,8 @@ pub use task_result::TaskResult; mod task_result_comprehensive_tests; use crate::ui_writer::UiWriter; -mod fixed_filter_json; +// Make fixed_filter_json public so it can be accessed from g3-cli +pub mod fixed_filter_json; #[cfg(test)] mod fixed_filter_tests; @@ -1688,6 +1689,9 @@ The tool will execute immediately and you'll receive the result (success or erro .replace("[/INST]", "") .replace("<>", ""); + // Store the raw content BEFORE filtering for the context window log + let raw_content_for_log = clean_content.clone(); + // Filter out JSON tool calls from the display let filtered_content = fixed_filter_json::fixed_filter_json_tool_calls(&clean_content); @@ -1858,20 +1862,20 @@ The tool will execute immediately and you'll receive the result (success or erro self.ui_writer.print_agent_prompt(); } - // Add the tool call and result to the context window - // Only include the text content if it's not already in full_response - let tool_message = if !full_response.contains(final_display_content) { + // Add the tool call and result to the context window using RAW unfiltered content + // This ensures the log file contains the true raw content including JSON tool calls + let tool_message = if !full_response.contains(final_display_content) && !raw_content_for_log.trim().is_empty() { Message { role: MessageRole::Assistant, content: format!( "{}\n\n{{\"tool\": \"{}\", \"args\": {}}}", - final_display_content.trim(), + raw_content_for_log.trim(), tool_call.tool, tool_call.args ), } } else { - // If we've already added the text, just include the tool call + // If we've already added the text or there's no text, just include the tool call Message { role: MessageRole::Assistant, content: format!( @@ -2187,6 +2191,26 @@ The tool will execute immediately and you'll receive the result (success or erro let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed()); + // Add the RAW unfiltered response to context window before returning + // This ensures the log contains the true raw content including any JSON + if !full_response.trim().is_empty() { + // Get the raw text from the parser (before filtering) + let raw_text = parser.get_text_content(); + let raw_clean = raw_text + .replace("<|im_end|>", "") + .replace("", "") + .replace("[/INST]", "") + .replace("<>", ""); + + if !raw_clean.trim().is_empty() { + let assistant_message = Message { + role: MessageRole::Assistant, + content: raw_clean, + }; + self.context_window.add_message(assistant_message); + } + } + // Add timing if needed let final_response = if show_timing { format!(