Compare commits

..

10 Commits

Author SHA1 Message Date
Jochen
bbeaaea2e3 temporarily disable codebase_fast_start
it seems the llm gets "lazy" and assumes all the tool
calls meant it's done most of the work.
I need to revise this approach.
2025-11-27 16:36:40 +11:00
Jochen
7e1ce36a4b Merge pull request #35 from dhanji/jochen_write_existing_file
remove check for whether a file exists in the workspace
2025-11-27 13:44:45 +11:00
Jochen
9f6592efc2 remove redundant 'if' 2025-11-27 13:34:54 +11:00
Jochen
99125fc39e completely remove the skipping first player logic 2025-11-27 13:21:40 +11:00
Jochen
a2a82a2526 Merge pull request #36 from dhanji/jochen_fix_cache_control_if
add cache_control to user messages
2025-11-27 13:13:54 +11:00
Jochen
5170744099 add cache_control to user messages 2025-11-27 13:12:42 +11:00
Jochen
fb0aabb5c4 Merge pull request #34 from dhanji/jochen-g3-ensemble-fork
a fixed fork of dhanji/g3-ensembles
2025-11-27 11:41:23 +11:00
Jochen
4655516c15 Merge pull request #33 from dhanji/jochen_fix_multi_cache
never add more than 4 cache controls
2025-11-27 11:41:05 +11:00
Jochen
c58aa80932 explain what file was found in workspace 2025-11-26 21:43:59 +11:00
Jochen
c837308148 never add more than 4 cache controls
Anthropic API throws errors otherwise.
2025-11-26 18:38:30 +11:00
3 changed files with 231 additions and 275 deletions

View File

@@ -183,6 +183,7 @@ use rustyline::error::ReadlineError;
use rustyline::DefaultEditor; use rustyline::DefaultEditor;
use std::path::Path; use std::path::Path;
use std::path::PathBuf; use std::path::PathBuf;
use std::process::exit;
use sha2::{Digest, Sha256}; use sha2::{Digest, Sha256};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{error, info}; use tracing::{error, info};
@@ -298,6 +299,10 @@ pub async fn run() -> Result<()> {
return run_flock_mode(project_dir.clone(), flock_workspace.clone(), num_segments, cli.flock_max_turns).await; return run_flock_mode(project_dir.clone(), flock_workspace.clone(), num_segments, cli.flock_max_turns).await;
} }
if cli.codebase_fast_start.is_some() {
print!("codebase_fast_start is temporarily disabled.");
exit(1);
}
// Otherwise, continue with normal mode // Otherwise, continue with normal mode
// Only initialize logging if not in retro mode // Only initialize logging if not in retro mode
@@ -1761,16 +1766,6 @@ async fn run_autonomous(
let loop_start = Instant::now(); let loop_start = Instant::now();
output.print("🔄 Starting coach-player feedback loop..."); output.print("🔄 Starting coach-player feedback loop...");
// Check if implementation files already exist
let skip_first_player = project.has_implementation_files();
if skip_first_player {
output.print("📂 Detected existing implementation files in workspace");
output.print("⏭️ Skipping first player turn - proceeding directly to coach review");
} else {
output.print("📂 No existing implementation files detected");
output.print("🎯 Starting with player implementation");
}
// Load fast-discovery messages before the loop starts (if enabled) // Load fast-discovery messages before the loop starts (if enabled)
let (discovery_messages, discovery_working_dir): (Vec<g3_providers::Message>, Option<String>) = let (discovery_messages, discovery_working_dir): (Vec<g3_providers::Message>, Option<String>) =
if let Some(ref codebase_path) = codebase_fast_start { if let Some(ref codebase_path) = codebase_fast_start {
@@ -1811,203 +1806,201 @@ async fn run_autonomous(
loop { loop {
let turn_start_time = Instant::now(); let turn_start_time = Instant::now();
let turn_start_tokens = agent.get_context_window().used_tokens; let turn_start_tokens = agent.get_context_window().used_tokens;
// Skip player turn if it's the first turn and implementation files exist
if !(turn == 1 && skip_first_player) {
output.print(&format!(
"\n=== TURN {}/{} - PLAYER MODE ===",
turn, max_turns
));
// Surface provider info for player agent output.print(&format!(
agent.print_provider_banner("Player"); "\n=== TURN {}/{} - PLAYER MODE ===",
turn, max_turns
));
// Player mode: implement requirements (with coach feedback if available) // Surface provider info for player agent
let player_prompt = if coach_feedback.is_empty() { agent.print_provider_banner("Player");
format!(
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nRequirements SHA256: {}\n\nImplement this step by step, creating all necessary files and code.",
requirements, requirements_sha
)
} else {
format!(
"You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.",
coach_feedback, requirements
)
};
output.print(&format!("🎯 Starting player implementation... (elapsed: {})", format_elapsed_time(loop_start.elapsed()))); // Player mode: implement requirements (with coach feedback if available)
let player_prompt = if coach_feedback.is_empty() {
format!(
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nRequirements SHA256: {}\n\nImplement this step by step, creating all necessary files and code.",
requirements, requirements_sha
)
} else {
format!(
"You are G3 in implementation mode. Address the following specific feedback from the coach:\n\n{}\n\nContext: You are improving an implementation based on these requirements:\n{}\n\nFocus on fixing the issues mentioned in the coach feedback above.",
coach_feedback, requirements
)
};
// Display what feedback the player is receiving output.print(&format!("🎯 Starting player implementation... (elapsed: {})", format_elapsed_time(loop_start.elapsed())));
// If there's no coach feedback on subsequent turns, this is an error
if coach_feedback.is_empty() {
if turn > 1 {
return Err(anyhow::anyhow!(
"Player mode error: No coach feedback received on turn {}",
turn
));
}
output.print("📋 Player starting initial implementation (no prior coach feedback)");
} else {
output.print(&format!(
"📋 Player received coach feedback ({} chars):",
coach_feedback.len()
));
output.print(&coach_feedback.to_string());
}
output.print(""); // Empty line for readability
// Execute player task with retry on error // Display what feedback the player is receiving
let mut _player_retry_count = 0; // If there's no coach feedback on subsequent turns, this is an error
const MAX_PLAYER_RETRIES: u32 = 3; if coach_feedback.is_empty() {
let mut player_failed = false; if turn > 1 {
return Err(anyhow::anyhow!(
loop { "Player mode error: No coach feedback received on turn {}",
match agent
.execute_task_with_timing(
&player_prompt,
None,
false,
show_prompt,
show_code,
true,
if has_discovery {
Some(DiscoveryOptions {
messages: &discovery_messages,
fast_start_path: discovery_working_dir.as_deref(),
})
} else { None },
)
.await
{
Ok(result) => {
// Display player's implementation result
output.print("📝 Player implementation completed:");
output.print_smart(&result.response);
break;
}
Err(e) => {
// Check if this is a context length exceeded error
use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
let error_type = classify_error(&e);
if matches!(error_type, ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)) {
output.print(&format!("⚠️ Context length exceeded in player turn: {}", e));
output.print("📝 Logging error to session and ending current turn...");
// Build forensic context
let forensic_context = format!(
"Turn: {}\n\
Role: Player\n\
Context tokens: {}\n\
Total available: {}\n\
Percentage used: {:.1}%\n\
Prompt length: {} chars\n\
Error occurred at: {}",
turn,
agent.get_context_window().used_tokens,
agent.get_context_window().total_tokens,
agent.get_context_window().percentage_used(),
player_prompt.len(),
chrono::Utc::now().to_rfc3339()
);
// Log to session JSON
agent.log_error_to_session(&e, "assistant", Some(forensic_context));
// Mark turn as failed and continue to next turn
player_failed = true;
break;
} else if e.to_string().contains("panic") {
output.print(&format!("💥 Player panic detected: {}", e));
// Generate final report even for panic
let elapsed = start_time.elapsed();
let context_window = agent.get_context_window();
output.print(&format!("\n{}", "=".repeat(60)));
output.print("📊 AUTONOMOUS MODE SESSION REPORT");
output.print(&"=".repeat(60));
output.print(&format!(
"⏱️ Total Duration: {:.2}s",
elapsed.as_secs_f64()
));
output.print(&format!("🔄 Turns Taken: {}/{}", turn, max_turns));
output.print("📝 Final Status: 💥 PLAYER PANIC");
output.print("\n📈 Token Usage Statistics:");
output.print(&format!(
" • Used Tokens: {}",
context_window.used_tokens
));
output.print(&format!(
" • Total Available: {}",
context_window.total_tokens
));
output.print(&format!(
" • Cumulative Tokens: {}",
context_window.cumulative_tokens
));
output.print(&format!(
" • Usage Percentage: {:.1}%",
context_window.percentage_used()
));
// Add per-turn histogram
output.print(&generate_turn_histogram(&turn_metrics));
output.print(&"=".repeat(60));
return Err(e);
}
_player_retry_count += 1;
output.print(&format!(
"⚠️ Player error (attempt {}/{}): {}",
_player_retry_count, MAX_PLAYER_RETRIES, e
));
if _player_retry_count >= MAX_PLAYER_RETRIES {
output.print(
"🔄 Max retries reached for player, marking turn as failed...",
);
player_failed = true;
break; // Exit retry loop
}
output.print("🔄 Retrying player implementation...");
}
}
}
// If player failed after max retries, increment turn and continue
if player_failed {
output.print(&format!(
"⚠️ Player turn {} failed after max retries. Moving to next turn.",
turn turn
)); ));
// Record turn metrics before incrementing }
let turn_duration = turn_start_time.elapsed(); output.print("📋 Player starting initial implementation (no prior coach feedback)");
let turn_tokens = agent.get_context_window().used_tokens.saturating_sub(turn_start_tokens); } else {
turn_metrics.push(TurnMetrics { output.print(&format!(
turn_number: turn, "📋 Player received coach feedback ({} chars):",
tokens_used: turn_tokens, coach_feedback.len()
wall_clock_time: turn_duration, ));
}); output.print(&coach_feedback.to_string());
turn += 1; }
output.print(""); // Empty line for readability
// Check if we've reached max turns // Execute player task with retry on error
if turn > max_turns { let mut _player_retry_count = 0;
output.print("\n=== SESSION COMPLETED - MAX TURNS REACHED ==="); const MAX_PLAYER_RETRIES: u32 = 3;
output.print(&format!("⏰ Maximum turns ({}) reached", max_turns)); let mut player_failed = false;
loop {
match agent
.execute_task_with_timing(
&player_prompt,
None,
false,
show_prompt,
show_code,
true,
if has_discovery {
Some(DiscoveryOptions {
messages: &discovery_messages,
fast_start_path: discovery_working_dir.as_deref(),
})
} else { None },
)
.await
{
Ok(result) => {
// Display player's implementation result
output.print("📝 Player implementation completed:");
output.print_smart(&result.response);
break; break;
} }
Err(e) => {
// Check if this is a context length exceeded error
use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
let error_type = classify_error(&e);
// Continue to next iteration with empty feedback (restart from scratch) if matches!(error_type, ErrorType::Recoverable(RecoverableError::ContextLengthExceeded)) {
coach_feedback = String::new(); output.print(&format!("⚠️ Context length exceeded in player turn: {}", e));
continue; output.print("📝 Logging error to session and ending current turn...");
// Build forensic context
let forensic_context = format!(
"Turn: {}\n\
Role: Player\n\
Context tokens: {}\n\
Total available: {}\n\
Percentage used: {:.1}%\n\
Prompt length: {} chars\n\
Error occurred at: {}",
turn,
agent.get_context_window().used_tokens,
agent.get_context_window().total_tokens,
agent.get_context_window().percentage_used(),
player_prompt.len(),
chrono::Utc::now().to_rfc3339()
);
// Log to session JSON
agent.log_error_to_session(&e, "assistant", Some(forensic_context));
// Mark turn as failed and continue to next turn
player_failed = true;
break;
} else if e.to_string().contains("panic") {
output.print(&format!("💥 Player panic detected: {}", e));
// Generate final report even for panic
let elapsed = start_time.elapsed();
let context_window = agent.get_context_window();
output.print(&format!("\n{}", "=".repeat(60)));
output.print("📊 AUTONOMOUS MODE SESSION REPORT");
output.print(&"=".repeat(60));
output.print(&format!(
"⏱️ Total Duration: {:.2}s",
elapsed.as_secs_f64()
));
output.print(&format!("🔄 Turns Taken: {}/{}", turn, max_turns));
output.print("📝 Final Status: 💥 PLAYER PANIC");
output.print("\n📈 Token Usage Statistics:");
output.print(&format!(
" • Used Tokens: {}",
context_window.used_tokens
));
output.print(&format!(
" • Total Available: {}",
context_window.total_tokens
));
output.print(&format!(
" • Cumulative Tokens: {}",
context_window.cumulative_tokens
));
output.print(&format!(
" • Usage Percentage: {:.1}%",
context_window.percentage_used()
));
// Add per-turn histogram
output.print(&generate_turn_histogram(&turn_metrics));
output.print(&"=".repeat(60));
return Err(e);
}
_player_retry_count += 1;
output.print(&format!(
"⚠️ Player error (attempt {}/{}): {}",
_player_retry_count, MAX_PLAYER_RETRIES, e
));
if _player_retry_count >= MAX_PLAYER_RETRIES {
output.print(
"🔄 Max retries reached for player, marking turn as failed...",
);
player_failed = true;
break; // Exit retry loop
}
output.print("🔄 Retrying player implementation...");
}
}
}
// If player failed after max retries, increment turn and continue
if player_failed {
output.print(&format!(
"⚠️ Player turn {} failed after max retries. Moving to next turn.",
turn
));
// Record turn metrics before incrementing
let turn_duration = turn_start_time.elapsed();
let turn_tokens = agent.get_context_window().used_tokens.saturating_sub(turn_start_tokens);
turn_metrics.push(TurnMetrics {
turn_number: turn,
tokens_used: turn_tokens,
wall_clock_time: turn_duration,
});
turn += 1;
// Check if we've reached max turns
if turn > max_turns {
output.print("\n=== SESSION COMPLETED - MAX TURNS REACHED ===");
output.print(&format!("⏰ Maximum turns ({}) reached", max_turns));
break;
} }
// Give some time for file operations to complete // Continue to next iteration with empty feedback (restart from scratch)
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await; coach_feedback = String::new();
continue;
} }
// Give some time for file operations to complete
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
// Create a new agent instance for coach mode to ensure fresh context // Create a new agent instance for coach mode to ensure fresh context
// Use the same config with overrides that was passed to the player agent // Use the same config with overrides that was passed to the player agent
let base_config = agent.get_config().clone(); let base_config = agent.get_config().clone();

View File

@@ -1087,6 +1087,14 @@ impl<W: UiWriter> Agent<W> {
} }
} }
/// Count how many cache_control annotations exist in the conversation history
fn count_cache_controls_in_history(&self) -> usize {
self.context_window.conversation_history
.iter()
.filter(|msg| msg.cache_control.is_some())
.count()
}
/// Get the configured max_tokens for a provider from top-level config /// Get the configured max_tokens for a provider from top-level config
fn provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> { fn provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> {
match provider_name { match provider_name {
@@ -1404,7 +1412,21 @@ impl<W: UiWriter> Agent<W> {
} }
// Add user message to context window // Add user message to context window
let user_message = Message::new(MessageRole::User, format!("Task: {}", description)); let user_message = {
// Check if we should use cache control (every 10 tool calls)
// But only if we haven't already added 4 cache_control annotations
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::User, format!("Task: {}", description), cache_config, provider)
} else {
Message::new(MessageRole::User, format!("Task: {}", description))
}
};
self.context_window.add_message(user_message); self.context_window.add_message(user_message);
// Execute fast-discovery tool calls if provided (immediately after user message) // Execute fast-discovery tool calls if provided (immediately after user message)
@@ -1426,7 +1448,7 @@ impl<W: UiWriter> Agent<W> {
// Add cache_control to the last user message if provider supports it (anthropic) // Add cache_control to the last user message if provider supports it (anthropic)
let is_last = idx == message_count - 1; let is_last = idx == message_count - 1;
let result_message = if is_last && supports_cache { let result_message = if supports_cache && is_last && self.count_cache_controls_in_history() < 4 {
Message::with_cache_control( Message::with_cache_control(
MessageRole::User, MessageRole::User,
format!("Tool result: {}", result), format!("Tool result: {}", result),
@@ -1506,24 +1528,7 @@ impl<W: UiWriter> Agent<W> {
// Add assistant response to context window only if not empty // Add assistant response to context window only if not empty
// This prevents the "Skipping empty message" warning when only tools were executed // This prevents the "Skipping empty message" warning when only tools were executed
if !response_content.trim().is_empty() { if !response_content.trim().is_empty() {
let assistant_message = { let assistant_message = Message::new(MessageRole::Assistant, response_content.clone());
// Check if we should use cache control (every 10 tool calls)
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::Assistant, response_content.clone(), cache_config, provider)
} else {
Message::new(MessageRole::Assistant, response_content.clone())
}
} else {
Message::new(MessageRole::Assistant, response_content.clone())
}
};
self.context_window.add_message(assistant_message); self.context_window.add_message(assistant_message);
} else { } else {
debug!("Assistant response was empty (likely only tool execution), skipping message addition"); debug!("Assistant response was empty (likely only tool execution), skipping message addition");
@@ -3372,7 +3377,25 @@ impl<W: UiWriter> Agent<W> {
tool_call.tool, tool_call.args tool_call.tool, tool_call.args
)) ))
}; };
let result_message = Message::new(MessageRole::User, format!("Tool result: {}", tool_result)); let result_message = {
// Check if we should use cache control (every 10 tool calls)
// But only if we haven't already added 4 cache_control annotations
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 && self.count_cache_controls_in_history() < 4 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::User, format!("Tool result: {}", tool_result), cache_config, provider)
} else {
Message::new(MessageRole::User, format!("Tool result: {}", tool_result))
}
} else {
Message::new(MessageRole::User, format!("Tool result: {}", tool_result))
}
};
self.context_window.add_message(tool_message); self.context_window.add_message(tool_message);
self.context_window.add_message(result_message); self.context_window.add_message(result_message);
@@ -3722,24 +3745,7 @@ impl<W: UiWriter> Agent<W> {
.replace("<</SYS>>", ""); .replace("<</SYS>>", "");
if !raw_clean.trim().is_empty() { if !raw_clean.trim().is_empty() {
let assistant_message = { let assistant_message = Message::new(MessageRole::Assistant, raw_clean);
// Check if we should use cache control (every 10 tool calls)
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::Assistant, raw_clean, cache_config, provider)
} else {
Message::new(MessageRole::Assistant, raw_clean)
}
} else {
Message::new(MessageRole::Assistant, raw_clean)
}
};
self.context_window.add_message(assistant_message); self.context_window.add_message(assistant_message);
} }
} }

View File

@@ -98,49 +98,6 @@ impl Project {
self.requirements_text.is_some() || self.requirements_path.is_some() self.requirements_text.is_some() || self.requirements_path.is_some()
} }
/// Check if implementation files exist in the workspace
pub fn has_implementation_files(&self) -> bool {
self.check_dir_for_implementation_files(&self.workspace_dir)
}
/// Recursively check a directory for implementation files
#[allow(clippy::only_used_in_recursion)]
fn check_dir_for_implementation_files(&self, dir: &Path) -> bool {
// Common source file extensions
let extensions = vec![
"swift", "rs", "py", "js", "ts", "java", "cpp", "c",
"go", "rb", "php", "cs", "kt", "scala", "m", "h"
];
if let Ok(entries) = std::fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() {
// Check if it's a source file
if let Some(ext) = path.extension() {
if let Some(ext_str) = ext.to_str() {
if extensions.contains(&ext_str) {
return true;
}
}
}
} else if path.is_dir() {
// Skip hidden directories and common non-source directories
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
if !name.starts_with('.') && name != "logs" && name != "target" && name != "node_modules" {
// Recursively check subdirectories
if self.check_dir_for_implementation_files(&path) {
return true;
}
}
}
}
}
}
false
}
/// Read the requirements file content /// Read the requirements file content
pub fn read_requirements(&self) -> Result<Option<String>> { pub fn read_requirements(&self) -> Result<Option<String>> {
// Prioritize requirements text override // Prioritize requirements text override