Compare commits

...

15 Commits

Author SHA1 Message Date
Jochen
5170744099 add cache_control to user messages 2025-11-27 13:12:42 +11:00
Jochen
fb0aabb5c4 Merge pull request #34 from dhanji/jochen-g3-ensemble-fork
a fixed fork of dhanji/g3-ensembles
2025-11-27 11:41:23 +11:00
Jochen
4655516c15 Merge pull request #33 from dhanji/jochen_fix_multi_cache
never add more than 4 cache controls
2025-11-27 11:41:05 +11:00
Jochen
fdb3080fc2 fix partitions parser 2025-11-26 21:07:45 +11:00
Jochen
c837308148 never add more than 4 cache controls
Anthropic API throws errors otherwise.
2025-11-26 18:38:30 +11:00
Jochen
9bbedd869a Fixed JSON encoding in partition 2025-11-26 18:08:12 +11:00
Dhanji Prasanna
4cfa0147ca first cut of horizontal partitioning
# Conflicts:
#	Cargo.lock

# Conflicts:
#	Cargo.lock
#	crates/g3-cli/src/lib.rs
2025-11-26 17:12:07 +11:00
Jochen
c6c35bf2ca Merge pull request #31 from dhanji/jochen_fast_start
add code exploration fast start
2025-11-26 17:10:42 +11:00
Jochen
c9fde4ecef Merge pull request #32 from dhanji/jochen_reorder_system_prompt
minor change: reorder system prompt
2025-11-26 11:07:08 +11:00
Jochen
1e1702001c Add logging for discovery 2025-11-26 10:41:35 +11:00
Jochen
c419833ddf updated the prompt 2025-11-26 10:26:52 +11:00
Jochen
c19127f809 make sure user requirements are included 2025-11-26 10:26:52 +11:00
Jochen
bd29addefa reorder system prompt 2025-11-26 10:26:52 +11:00
Jochen
2e252cd298 added timer 2025-11-25 22:51:33 +11:00
Jochen
ad198a8501 add code exploration fast start
This tries to short-circuit multiple round-trips to llm for reading code.
It's a precursor to trying to context engineer tailored to specific tasks.
In initial experiments, it's only marginally faster than regular mode, and burns more tokens.
2025-11-25 22:51:32 +11:00
28 changed files with 4084 additions and 174 deletions

33
Cargo.lock generated
View File

@@ -1365,6 +1365,9 @@ dependencies = [
"dirs 5.0.1",
"g3-config",
"g3-core",
"g3-ensembles",
"g3-planner",
"g3-providers",
"hex",
"indicatif",
"ratatui",
@@ -1486,6 +1489,23 @@ dependencies = [
"walkdir",
]
[[package]]
name = "g3-ensembles"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"clap",
"g3-config",
"g3-core",
"serde",
"serde_json",
"tempfile",
"tokio",
"tracing",
"uuid",
]
[[package]]
name = "g3-execution"
version = "0.1.0"
@@ -1499,6 +1519,19 @@ dependencies = [
"tracing",
]
[[package]]
name = "g3-planner"
version = "0.1.0"
dependencies = [
"anyhow",
"chrono",
"const_format",
"g3-providers",
"serde",
"serde_json",
"tokio",
]
[[package]]
name = "g3-providers"
version = "0.1.0"

View File

@@ -2,11 +2,13 @@
members = [
"crates/g3-cli",
"crates/g3-core",
"crates/g3-planner",
"crates/g3-providers",
"crates/g3-config",
"crates/g3-execution",
"crates/g3-computer-control",
"crates/g3-console"
"crates/g3-console",
"crates/g3-ensembles"
]
resolver = "2"

View File

@@ -96,6 +96,7 @@ These commands give you fine-grained control over context management, allowing y
- Window listing and identification
- **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
- **Final Output**: Formatted result presentation
- **Flock Mode**: Parallel multi-agent development for large projects - see [Flock Mode Guide](docs/FLOCK_MODE.md)
### Provider Flexibility
- Support for multiple LLM providers through a unified interface
@@ -129,6 +130,7 @@ G3 is designed for:
- API integration and testing
- Documentation generation
- Complex multi-step workflows
- Parallel development of modular architectures
- Desktop application automation and testing
## Getting Started

View File

@@ -7,7 +7,10 @@ description = "CLI interface for G3 AI coding agent"
[dependencies]
g3-core = { path = "../g3-core" }
g3-config = { path = "../g3-config" }
g3-planner = { path = "../g3-planner" }
g3-providers = { path = "../g3-providers" }
clap = { workspace = true }
g3-ensembles = { path = "../g3-ensembles" }
tokio = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }

View File

@@ -98,6 +98,25 @@ fn generate_turn_histogram(turn_metrics: &[TurnMetrics]) -> String {
histogram
}
/// Format a Duration as human-readable elapsed time (e.g., "1h 23m 45s", "5m 30s", "45s")
fn format_elapsed_time(duration: Duration) -> String {
let total_secs = duration.as_secs();
let hours = total_secs / 3600;
let minutes = (total_secs % 3600) / 60;
let seconds = total_secs % 60;
if hours > 0 {
format!("{}h {}m {}s", hours, minutes, seconds)
} else if minutes > 0 {
format!("{}m {}s", minutes, seconds)
} else if seconds > 0 {
format!("{}s", seconds)
} else {
// For very short durations, show milliseconds
format!("{}ms", duration.as_millis())
}
}
/// Extract coach feedback by reading from the coach agent's specific log file
/// Uses the coach agent's session ID to find the exact log file
fn extract_coach_feedback_from_logs(
@@ -159,7 +178,7 @@ fn extract_coach_feedback_from_logs(
use clap::Parser;
use g3_config::Config;
use g3_core::{project::Project, ui_writer::UiWriter, Agent};
use g3_core::{project::Project, ui_writer::UiWriter, Agent, DiscoveryOptions};
use rustyline::error::ReadlineError;
use rustyline::DefaultEditor;
use std::path::Path;
@@ -247,11 +266,40 @@ pub struct Cli {
/// Enable WebDriver browser automation tools
#[arg(long)]
pub webdriver: bool,
/// Enable flock mode - parallel multi-agent development
#[arg(long, requires = "flock_workspace", requires = "segments")]
pub project: Option<PathBuf>,
/// Flock workspace directory (where segment copies will be created)
#[arg(long, requires = "project")]
pub flock_workspace: Option<PathBuf>,
/// Number of segments to partition work into (for flock mode)
#[arg(long, requires = "project")]
pub segments: Option<usize>,
/// Maximum turns per segment in flock mode (default: 5)
#[arg(long, default_value = "5")]
pub flock_max_turns: usize,
/// Enable fast codebase discovery before first LLM turn
#[arg(long, value_name = "PATH")]
pub codebase_fast_start: Option<PathBuf>
}
pub async fn run() -> Result<()> {
let cli = Cli::parse();
// Check if flock mode is enabled
if let (Some(project_dir), Some(flock_workspace), Some(num_segments)) =
(&cli.project, &cli.flock_workspace, cli.segments) {
// Run flock mode
return run_flock_mode(project_dir.clone(), flock_workspace.clone(), num_segments, cli.flock_max_turns).await;
}
// Otherwise, continue with normal mode
// Only initialize logging if not in retro mode
if !cli.machine {
// Initialize logging with filtering
@@ -440,6 +488,39 @@ pub async fn run() -> Result<()> {
Ok(())
}
/// Run flock mode - parallel multi-agent development
async fn run_flock_mode(
project_dir: PathBuf,
flock_workspace: PathBuf,
num_segments: usize,
max_turns: usize,
) -> Result<()> {
let output = SimpleOutput::new();
output.print("");
output.print("🦅 G3 FLOCK MODE - Parallel Multi-Agent Development");
output.print("");
output.print(&format!("📁 Project: {}", project_dir.display()));
output.print(&format!("🗂️ Workspace: {}", flock_workspace.display()));
output.print(&format!("🔢 Segments: {}", num_segments));
output.print(&format!("🔄 Max Turns per Segment: {}", max_turns));
output.print("");
// Create flock configuration
let config = g3_ensembles::FlockConfig::new(project_dir, flock_workspace, num_segments)?
.with_max_turns(max_turns);
// Create and run flock mode
let mut flock = g3_ensembles::FlockMode::new(config)?;
match flock.run().await {
Ok(_) => output.print("\n✅ Flock mode completed successfully"),
Err(e) => output.print(&format!("\n❌ Flock mode failed: {}", e)),
}
Ok(())
}
/// Accumulative autonomous mode: accumulates requirements from user input
/// and runs autonomous mode after each input
async fn run_accumulative_mode(
@@ -676,6 +757,7 @@ async fn run_accumulative_mode(
cli.show_code,
cli.max_turns,
cli.quiet,
cli.codebase_fast_start.clone(),
) => result,
_ = tokio::signal::ctrl_c() => {
output.print("\n⚠️ Autonomous run cancelled by user (Ctrl+C)");
@@ -727,6 +809,7 @@ async fn run_autonomous_machine(
show_code: bool,
max_turns: usize,
_quiet: bool,
_codebase_fast_start: Option<PathBuf>,
) -> Result<()> {
println!("AUTONOMOUS_MODE_STARTED");
println!("WORKSPACE: {}", project.workspace().display());
@@ -757,7 +840,7 @@ async fn run_autonomous_machine(
);
println!("TASK_START");
let result = agent.execute_task_with_timing(&task, None, false, show_prompt, show_code, true).await?;
let result = agent.execute_task_with_timing(&task, None, false, show_prompt, show_code, true, None).await?;
println!("AGENT_RESPONSE:");
println!("{}", result.response);
println!("END_AGENT_RESPONSE");
@@ -784,13 +867,14 @@ async fn run_with_console_mode(
cli.show_code,
cli.max_turns,
cli.quiet,
cli.codebase_fast_start.clone(),
)
.await?;
} else if let Some(task) = cli.task {
// Single-shot mode
let output = SimpleOutput::new();
let result = agent
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true, None)
.await?;
output.print_smart(&result.response);
} else {
@@ -815,12 +899,13 @@ async fn run_with_machine_mode(
cli.show_code,
cli.max_turns,
cli.quiet,
cli.codebase_fast_start.clone(),
)
.await?;
} else if let Some(task) = cli.task {
// Single-shot mode
let result = agent
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
.execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true, None)
.await?;
println!("AGENT_RESPONSE:");
println!("{}", result.response);
@@ -1212,7 +1297,7 @@ async fn execute_task<W: UiWriter>(
// Execute task with cancellation support
let execution_result = tokio::select! {
result = agent.execute_task_with_timing_cancellable(
input, None, false, show_prompt, show_code, true, cancellation_token.clone()
input, None, false, show_prompt, show_code, true, cancellation_token.clone(), None
) => {
result
}
@@ -1403,7 +1488,7 @@ async fn execute_task_machine(
// Execute task with cancellation support
let execution_result = tokio::select! {
result = agent.execute_task_with_timing_cancellable(
input, None, false, show_prompt, show_code, true, cancellation_token.clone()
input, None, false, show_prompt, show_code, true, cancellation_token.clone(), None
) => {
result
}
@@ -1552,6 +1637,7 @@ async fn run_autonomous(
show_code: bool,
max_turns: usize,
quiet: bool,
codebase_fast_start: Option<PathBuf>,
) -> Result<()> {
let start_time = std::time::Instant::now();
let output = SimpleOutput::new();
@@ -1672,6 +1758,7 @@ async fn run_autonomous(
// Pass SHA to agent for staleness checking
agent.set_requirements_sha(requirements_sha.clone());
let loop_start = Instant::now();
output.print("🔄 Starting coach-player feedback loop...");
// Check if implementation files already exist
@@ -1684,6 +1771,39 @@ async fn run_autonomous(
output.print("🎯 Starting with player implementation");
}
// Load fast-discovery messages before the loop starts (if enabled)
let (discovery_messages, discovery_working_dir): (Vec<g3_providers::Message>, Option<String>) =
if let Some(ref codebase_path) = codebase_fast_start {
// Canonicalize the path to ensure it's absolute
let canonical_path = codebase_path.canonicalize().unwrap_or_else(|_| codebase_path.clone());
let path_str = canonical_path.to_string_lossy();
output.print(&format!("🔍 Fast-discovery mode: will explore codebase at {}", path_str));
// Get the provider from the agent and use async LLM-based discovery
match agent.get_provider() {
Ok(provider) => {
// Create a status callback that prints to output
let output_clone = output.clone();
let status_callback: g3_planner::StatusCallback = Box::new(move |msg: &str| {
output_clone.print(msg);
});
match g3_planner::get_initial_discovery_messages(&path_str, Some(&requirements), provider, Some(&status_callback)).await {
Ok(messages) => (messages, Some(path_str.to_string())),
Err(e) => {
output.print(&format!("⚠️ LLM discovery failed: {}, skipping fast-start", e));
(Vec::new(), None)
}
}
}
Err(e) => {
output.print(&format!("⚠️ Could not get provider: {}, skipping fast-start", e));
(Vec::new(), None)
}
}
} else {
(Vec::new(), None)
};
let has_discovery = !discovery_messages.is_empty();
let mut turn = 1;
let mut coach_feedback = String::new();
let mut implementation_approved = false;
@@ -1714,7 +1834,7 @@ async fn run_autonomous(
)
};
output.print("🎯 Starting player implementation...");
output.print(&format!("🎯 Starting player implementation... (elapsed: {})", format_elapsed_time(loop_start.elapsed())));
// Display what feedback the player is receiving
// If there's no coach feedback on subsequent turns, this is an error
@@ -1749,6 +1869,12 @@ async fn run_autonomous(
show_prompt,
show_code,
true,
if has_discovery {
Some(DiscoveryOptions {
messages: &discovery_messages,
fast_start_path: discovery_working_dir.as_deref(),
})
} else { None },
)
.await
{
@@ -1936,7 +2062,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
requirements
);
output.print("🎓 Starting coach review...");
output.print(&format!("🎓 Starting coach review... (elapsed: {})", format_elapsed_time(loop_start.elapsed())));
// Execute coach task with retry on error
let mut coach_retry_count = 0;
@@ -1946,7 +2072,13 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
loop {
match coach_agent
.execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true)
.execute_task_with_timing(&coach_prompt, None, false, show_prompt, show_code, true,
if has_discovery {
Some(DiscoveryOptions {
messages: &discovery_messages,
fast_start_path: discovery_working_dir.as_deref(),
})
} else { None })
.await
{
Ok(result) => {
@@ -2176,9 +2308,9 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
output.print(&"=".repeat(60));
if implementation_approved {
output.print("\n🎉 Autonomous mode completed successfully");
output.print(&format!("\n🎉 Autonomous mode completed successfully (total loop time: {})", format_elapsed_time(loop_start.elapsed())));
} else {
output.print("\n🔄 Autonomous mode terminated (max iterations)");
output.print(&format!("\n🔄 Autonomous mode terminated (max iterations) (total loop time: {})", format_elapsed_time(loop_start.elapsed())));
}
Ok(())

View File

@@ -1,4 +1,5 @@
/// Simple output helper for printing messages
#[derive(Clone)]
pub struct SimpleOutput {
machine_mode: bool,
}

View File

@@ -46,6 +46,13 @@ pub struct ToolCall {
pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments
}
/// Options for fast-start discovery execution
#[derive(Debug, Clone)]
pub struct DiscoveryOptions<'a> {
pub messages: &'a [Message],
pub fast_start_path: Option<&'a str>,
}
#[derive(Debug, Clone)]
pub enum StreamState {
Generating,
@@ -760,6 +767,8 @@ pub struct Agent<W: UiWriter> {
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
tool_call_count: usize,
requirements_sha: Option<String>,
/// Working directory for tool execution (set by --codebase-fast-start)
working_dir: Option<String>,
}
impl<W: UiWriter> Agent<W> {
@@ -1032,6 +1041,7 @@ impl<W: UiWriter> Agent<W> {
},
tool_call_count: 0,
requirements_sha: None,
working_dir: None,
})
}
@@ -1077,6 +1087,14 @@ impl<W: UiWriter> Agent<W> {
}
}
/// Count how many cache_control annotations exist in the conversation history
fn count_cache_controls_in_history(&self) -> usize {
self.context_window.conversation_history
.iter()
.filter(|msg| msg.cache_control.is_some())
.count()
}
/// Get the configured max_tokens for a provider from top-level config
fn provider_max_tokens(config: &Config, provider_name: &str) -> Option<u32> {
match provider_name {
@@ -1282,6 +1300,11 @@ impl<W: UiWriter> Agent<W> {
Ok((provider.name().to_string(), provider.model().to_string()))
}
/// Get the default LLM provider
pub fn get_provider(&self) -> Result<&dyn g3_providers::LLMProvider> {
self.providers.get(None)
}
/// Get the current session ID for this agent
pub fn get_session_id(&self) -> Option<&str> {
self.session_id.as_deref()
@@ -1293,7 +1316,7 @@ impl<W: UiWriter> Agent<W> {
language: Option<&str>,
_auto_execute: bool,
) -> Result<TaskResult> {
self.execute_task_with_options(description, language, false, false, false)
self.execute_task_with_options(description, language, false, false, false, None)
.await
}
@@ -1304,6 +1327,7 @@ impl<W: UiWriter> Agent<W> {
_auto_execute: bool,
show_prompt: bool,
show_code: bool,
discovery_options: Option<DiscoveryOptions<'_>>,
) -> Result<TaskResult> {
self.execute_task_with_timing(
description,
@@ -1312,6 +1336,7 @@ impl<W: UiWriter> Agent<W> {
show_prompt,
show_code,
false,
discovery_options,
)
.await
}
@@ -1324,6 +1349,7 @@ impl<W: UiWriter> Agent<W> {
show_prompt: bool,
show_code: bool,
show_timing: bool,
discovery_options: Option<DiscoveryOptions<'_>>,
) -> Result<TaskResult> {
// Create a cancellation token that never cancels for backward compatibility
let cancellation_token = CancellationToken::new();
@@ -1335,6 +1361,7 @@ impl<W: UiWriter> Agent<W> {
show_code,
show_timing,
cancellation_token,
discovery_options,
)
.await
}
@@ -1349,6 +1376,7 @@ impl<W: UiWriter> Agent<W> {
show_code: bool,
show_timing: bool,
cancellation_token: CancellationToken,
discovery_options: Option<DiscoveryOptions<'_>>,
) -> Result<TaskResult> {
// Execute the task directly without splitting
self.execute_single_task(
@@ -1357,6 +1385,7 @@ impl<W: UiWriter> Agent<W> {
show_code,
show_timing,
cancellation_token,
discovery_options,
)
.await
}
@@ -1368,6 +1397,7 @@ impl<W: UiWriter> Agent<W> {
_show_code: bool,
show_timing: bool,
cancellation_token: CancellationToken,
discovery_options: Option<DiscoveryOptions<'_>>,
) -> Result<TaskResult> {
// Reset the JSON tool call filter state at the start of each new task
// This prevents the filter from staying in suppression mode between user interactions
@@ -1382,9 +1412,56 @@ impl<W: UiWriter> Agent<W> {
}
// Add user message to context window
let user_message = Message::new(MessageRole::User, format!("Task: {}", description));
let user_message = {
// Check if we should use cache control (every 10 tool calls)
// But only if we haven't already added 4 cache_control annotations
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::User, format!("Task: {}", description), cache_config, provider)
} else {
Message::new(MessageRole::User, format!("Task: {}", description))
}
};
self.context_window.add_message(user_message);
// Execute fast-discovery tool calls if provided (immediately after user message)
if let Some(ref options) = discovery_options {
self.ui_writer.println("▶️ Playing back discovery commands...");
// Store the working directory for subsequent tool calls in the streaming loop
if let Some(path) = options.fast_start_path {
self.working_dir = Some(path.to_string());
}
let provider = self.providers.get(None)?;
let supports_cache = provider.supports_cache_control();
let message_count = options.messages.len();
for (idx, discovery_msg) in options.messages.iter().enumerate() {
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(&discovery_msg.content) {
self.add_message_to_context(discovery_msg.clone());
let result = self.execute_tool_call_in_dir(&tool_call, options.fast_start_path).await
.unwrap_or_else(|e| format!("Error: {}", e));
// Add cache_control to the last user message if provider supports it (anthropic)
let is_last = idx == message_count - 1;
let result_message = if supports_cache && is_last && self.count_cache_controls_in_history() < 4 {
Message::with_cache_control(
MessageRole::User,
format!("Tool result: {}", result),
CacheControl::ephemeral(),
)
} else {
Message::new(MessageRole::User, format!("Tool result: {}", result))
};
self.add_message_to_context(result_message);
}
}
}
// Use the complete conversation history for the request
let messages = self.context_window.conversation_history.clone();
@@ -1451,24 +1528,7 @@ impl<W: UiWriter> Agent<W> {
// Add assistant response to context window only if not empty
// This prevents the "Skipping empty message" warning when only tools were executed
if !response_content.trim().is_empty() {
let assistant_message = {
// Check if we should use cache control (every 10 tool calls)
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::Assistant, response_content.clone(), cache_config, provider)
} else {
Message::new(MessageRole::Assistant, response_content.clone())
}
} else {
Message::new(MessageRole::Assistant, response_content.clone())
}
};
let assistant_message = Message::new(MessageRole::Assistant, response_content.clone());
self.context_window.add_message(assistant_message);
} else {
debug!("Assistant response was empty (likely only tool execution), skipping message addition");
@@ -1575,6 +1635,24 @@ impl<W: UiWriter> Agent<W> {
&self.context_window
}
/// Add a message directly to the context window.
/// Used for injecting discovery messages before the first LLM turn.
pub fn add_message_to_context(&mut self, message: Message) {
self.context_window.add_message(message);
}
/// Execute a tool call and return the result.
/// This is a public wrapper around execute_tool for use by external callers
/// like the planner's fast-discovery feature.
pub async fn execute_tool_call(&mut self, tool_call: &ToolCall) -> Result<String> {
self.execute_tool(tool_call).await
}
/// Execute a tool call with an optional working directory (for discovery commands)
pub async fn execute_tool_call_in_dir(&mut self, tool_call: &ToolCall, working_dir: Option<&str>) -> Result<String> {
self.execute_tool_in_dir(tool_call, working_dir).await
}
/// Log an error message to the session JSON file as the last message
/// This is used in autonomous mode to record context length exceeded errors
pub fn log_error_to_session(
@@ -3157,11 +3235,14 @@ impl<W: UiWriter> Agent<W> {
self.ui_writer.print_tool_output_header();
}
// Clone working_dir to avoid borrow checker issues
let working_dir = self.working_dir.clone();
let exec_start = Instant::now();
// Add 8-minute timeout for tool execution
let tool_result = match tokio::time::timeout(
Duration::from_secs(8 * 60), // 8 minutes
self.execute_tool(&tool_call),
// Use working_dir if set (from --codebase-fast-start)
self.execute_tool_in_dir(&tool_call, working_dir.as_deref()),
)
.await
{
@@ -3296,7 +3377,25 @@ impl<W: UiWriter> Agent<W> {
tool_call.tool, tool_call.args
))
};
let result_message = Message::new(MessageRole::User, format!("Tool result: {}", tool_result));
let result_message = {
// Check if we should use cache control (every 10 tool calls)
// But only if we haven't already added 4 cache_control annotations
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 && self.count_cache_controls_in_history() < 4 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::User, format!("Tool result: {}", tool_result), cache_config, provider)
} else {
Message::new(MessageRole::User, format!("Tool result: {}", tool_result))
}
} else {
Message::new(MessageRole::User, format!("Tool result: {}", tool_result))
}
};
self.context_window.add_message(tool_message);
self.context_window.add_message(result_message);
@@ -3646,24 +3745,7 @@ impl<W: UiWriter> Agent<W> {
.replace("<</SYS>>", "");
if !raw_clean.trim().is_empty() {
let assistant_message = {
// Check if we should use cache control (every 10 tool calls)
if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
let provider = self.providers.get(None)?;
if let Some(cache_config) = match provider.name() {
"anthropic" => self.config.providers.anthropic.as_ref()
.and_then(|c| c.cache_config.as_ref())
.and_then(|config| Self::parse_cache_control(config)),
_ => None,
} {
Message::with_cache_control_validated(MessageRole::Assistant, raw_clean, cache_config, provider)
} else {
Message::new(MessageRole::Assistant, raw_clean)
}
} else {
Message::new(MessageRole::Assistant, raw_clean)
}
};
let assistant_message = Message::new(MessageRole::Assistant, raw_clean);
self.context_window.add_message(assistant_message);
}
}
@@ -3707,8 +3789,17 @@ impl<W: UiWriter> Agent<W> {
pub async fn execute_tool(&mut self, tool_call: &ToolCall) -> Result<String> {
// Increment tool call count
self.tool_call_count += 1;
self.execute_tool_in_dir(tool_call, None).await
}
let result = self.execute_tool_inner(tool_call).await;
/// Execute a tool with an optional working directory (for discovery commands)
pub async fn execute_tool_in_dir(&mut self, tool_call: &ToolCall, working_dir: Option<&str>) -> Result<String> {
// Only increment tool call count if not already incremented by execute_tool
if working_dir.is_some() {
self.tool_call_count += 1;
}
let result = self.execute_tool_inner_in_dir(tool_call, working_dir).await;
let log_str = match &result {
Ok(s) => s.clone(),
Err(e) => format!("ERROR: {}", e),
@@ -3717,9 +3808,10 @@ impl<W: UiWriter> Agent<W> {
result
}
async fn execute_tool_inner(&mut self, tool_call: &ToolCall) -> Result<String> {
async fn execute_tool_inner_in_dir(&mut self, tool_call: &ToolCall, working_dir: Option<&str>) -> Result<String> {
debug!("=== EXECUTING TOOL ===");
debug!("Tool name: {}", tool_call.tool);
debug!("Working directory passed to execute_tool_inner_in_dir: {:?}", working_dir);
debug!("Tool args (raw): {:?}", tool_call.args);
debug!(
"Tool args (JSON): {}",
@@ -3754,9 +3846,11 @@ impl<W: UiWriter> Agent<W> {
let receiver = ToolOutputReceiver {
ui_writer: &self.ui_writer,
};
debug!("ABOUT TO CALL execute_bash_streaming_in_dir: escaped_command='{}', working_dir={:?}", escaped_command, working_dir);
match executor
.execute_bash_streaming(&escaped_command, &receiver)
.execute_bash_streaming_in_dir(&escaped_command, &receiver, working_dir)
.await
{
Ok(result) => {

View File

@@ -0,0 +1,20 @@
[package]
name = "g3-ensembles"
version = "0.1.0"
edition = "2021"
description = "Multi-agent ensemble functionality for G3"
[dependencies]
g3-core = { path = "../g3-core" }
g3-config = { path = "../g3-config" }
clap = { workspace = true }
tokio = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
chrono = { version = "0.4", features = ["serde"] }
uuid = { workspace = true }
[dev-dependencies]
tempfile = "3.8"

View File

@@ -0,0 +1,422 @@
# G3 Ensembles Testing Documentation
This document describes the comprehensive test suite for the g3-ensembles crate (Flock Mode).
## Test Coverage
### Unit Tests (`src/tests.rs`)
Unit tests cover the core data structures and logic:
#### Status Module Tests
1. **`test_segment_state_display`**
- Verifies that `SegmentState` enum displays correctly with emojis
- Tests all states: Pending, Running, Completed, Failed, Cancelled
2. **`test_flock_status_creation`**
- Tests creation of `FlockStatus` with correct initial values
- Verifies session ID, segment count, and zero metrics
3. **`test_segment_status_update`**
- Tests updating a single segment's status
- Verifies metrics are correctly aggregated
4. **`test_multiple_segment_updates`**
- Tests updating multiple segments
- Verifies aggregate metrics (tokens, tool calls, errors) are summed correctly
5. **`test_is_complete`**
- Tests the completion detection logic
- Verifies that flock is only complete when all segments are in terminal states
- Tests various scenarios: no segments, partial completion, full completion
6. **`test_count_by_state`**
- Tests counting segments by their state
- Verifies correct counts for each state type
7. **`test_status_serialization`**
- Tests JSON serialization and deserialization
- Verifies round-trip conversion preserves all data
8. **`test_report_generation`**
- Tests the comprehensive report generation
- Verifies all expected sections are present
- Checks that metrics are correctly displayed
**Run unit tests:**
```bash
cargo test -p g3-ensembles --lib
```
### Integration Tests (`tests/integration_tests.rs`)
Integration tests verify end-to-end functionality with real file system and git operations:
#### Configuration Tests
1. **`test_flock_config_validation`**
- Tests validation of project directory requirements
- Verifies error messages for:
- Non-existent directory
- Non-git repository
- Missing flock-requirements.md
- Verifies successful creation with valid inputs
2. **`test_flock_config_builder`**
- Tests the builder pattern for `FlockConfig`
- Verifies `with_max_turns()` and `with_g3_binary()` methods
3. **`test_workspace_creation`**
- Tests creation of `FlockMode` instance
- Verifies project structure is valid
#### Git Operations Tests
4. **`test_git_clone_functionality`**
- Tests git cloning of project repository
- Verifies cloned repository structure:
- `.git` directory exists
- All files are present
- Git history is preserved
5. **`test_multiple_segment_clones`**
- Tests cloning multiple segments (2 segments)
- Verifies each segment is independent
- Tests that modifications in one segment don't affect others
6. **`test_git_repo_independence`**
- Comprehensive test of segment independence
- Creates commits in different segments
- Verifies git histories diverge correctly
- Ensures files in one segment don't appear in others
#### Segment Management Tests
7. **`test_segment_requirements_creation`**
- Tests creation of `segment-requirements.md` files
- Verifies content is written correctly
8. **`test_requirements_file_content`**
- Tests the structure of flock-requirements.md
- Verifies content contains expected sections
#### Status File Tests
9. **`test_status_file_operations`**
- Tests saving and loading `flock-status.json`
- Verifies JSON serialization to file
- Tests deserialization from file
#### JSON Processing Tests
10. **`test_json_extraction`**
- Tests extraction of JSON arrays from text output
- Verifies handling of various formats:
- Plain JSON
- JSON in markdown code blocks
- JSON with surrounding text
- Invalid input (no JSON)
11. **`test_partition_json_parsing`**
- Tests parsing of partition JSON structure
- Verifies module names, requirements, and dependencies are extracted correctly
**Run integration tests:**
```bash
cargo test -p g3-ensembles --test integration_tests
```
### End-to-End Test Script (`scripts/test-flock-mode.sh`)
A comprehensive bash script that tests the complete flock mode workflow:
#### Test Scenarios
1. **Project Creation**
- Creates a temporary test project
- Initializes git repository
- Creates flock-requirements.md with realistic content
- Makes initial commit
2. **Project Structure Validation**
- Verifies `.git` directory exists
- Verifies `flock-requirements.md` exists
3. **Git Operations**
- Tests cloning project to segment directories
- Verifies cloned repositories are valid
- Tests git log to ensure history is preserved
4. **Segment Independence**
- Creates two segments
- Modifies one segment
- Verifies other segment is unaffected
5. **Segment Requirements**
- Creates `segment-requirements.md` in segments
- Verifies content is written correctly
6. **Status File Operations**
- Creates `flock-status.json`
- Validates JSON structure (if `jq` is available)
**Run end-to-end test:**
```bash
./scripts/test-flock-mode.sh
```
## Test Results
### Current Status
**All tests passing**
- **Unit tests**: 8/8 passed
- **Integration tests**: 11/11 passed
- **End-to-end test**: All scenarios passed
### Test Execution Time
- Unit tests: ~0.01s
- Integration tests: ~0.35s (includes git operations)
- End-to-end test: ~1-2s (includes cleanup)
## Running All Tests
### Run all tests for g3-ensembles:
```bash
cargo test -p g3-ensembles
```
### Run with verbose output:
```bash
cargo test -p g3-ensembles -- --nocapture
```
### Run specific test:
```bash
cargo test -p g3-ensembles test_git_clone_functionality
```
### Run tests with coverage (requires cargo-tarpaulin):
```bash
cargo tarpaulin -p g3-ensembles
```
## Test Helpers
### `create_test_project(name: &str) -> TempDir`
Helper function in integration tests that creates a complete test project:
- Initializes git repository
- Configures git user
- Creates flock-requirements.md with two modules
- Creates README.md
- Makes initial commit
- Returns `TempDir` that auto-cleans on drop
**Usage:**
```rust
let project_dir = create_test_project("my-test");
// Use project_dir.path() to access the directory
// Automatically cleaned up when project_dir goes out of scope
```
### `extract_json_array(output: &str) -> Option<String>`
Helper function that extracts JSON arrays from text output:
- Finds first `[` and last `]`
- Returns content between them
- Returns `None` if no valid JSON array found
## Test Data
### Sample Requirements
The test suite uses realistic requirements for a calculator project:
**Module A: Core Library**
- Arithmetic operations (add, sub, mul, div)
- Error handling for division by zero
- Unit tests
- Documentation
**Module B: CLI Application**
- Command-line interface using clap
- Subcommands for each operation
- User-friendly output
- Error handling
This structure tests the partitioning logic with:
- Clear module boundaries
- Dependency relationship (CLI depends on Core)
- Realistic implementation requirements
## Continuous Integration
To integrate these tests into CI/CD:
### GitHub Actions Example
```yaml
name: Test G3 Ensembles
on: [push, pull_request]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
- name: Run unit tests
run: cargo test -p g3-ensembles --lib
- name: Run integration tests
run: cargo test -p g3-ensembles --test integration_tests
- name: Run end-to-end test
run: ./scripts/test-flock-mode.sh
```
## Test Coverage Goals
### Current Coverage
- ✅ Status data structures: 100%
- ✅ Configuration validation: 100%
- ✅ Git operations: 100%
- ✅ Segment independence: 100%
- ✅ JSON processing: 100%
- ⚠️ Full flock execution: Requires LLM access (tested manually)
### Future Test Additions
1. **Mock LLM Tests**
- Mock the partitioning agent response
- Test full flock workflow without real LLM calls
2. **Performance Tests**
- Test with large numbers of segments (10+)
- Measure memory usage
- Test concurrent segment execution
3. **Error Handling Tests**
- Test behavior when git operations fail
- Test behavior when segments fail
- Test recovery scenarios
4. **Edge Cases**
- Empty requirements file
- Single segment (degenerate case)
- Very large requirements file
- Binary files in project
## Debugging Tests
### Enable debug logging:
```bash
RUST_LOG=debug cargo test -p g3-ensembles -- --nocapture
```
### Keep test artifacts:
```bash
# Modify test to not cleanup
# Or inspect TEST_DIR before cleanup in end-to-end test
export TEST_DIR=/tmp/my-test
./scripts/test-flock-mode.sh
ls -la $TEST_DIR
```
### Run single test with backtrace:
```bash
RUST_BACKTRACE=1 cargo test -p g3-ensembles test_git_clone_functionality -- --nocapture
```
## Contributing Tests
When adding new features to g3-ensembles:
1. **Add unit tests** for new data structures and logic
2. **Add integration tests** for new file/git operations
3. **Update end-to-end test** if workflow changes
4. **Document tests** in this file
5. **Ensure all tests pass** before submitting PR
### Test Naming Convention
- Unit tests: `test_<functionality>`
- Integration tests: `test_<feature>_<scenario>`
- Use descriptive names that explain what is being tested
### Test Structure
```rust
#[test]
fn test_feature_name() {
// Arrange: Set up test data
let data = create_test_data();
// Act: Perform the operation
let result = perform_operation(data);
// Assert: Verify the result
assert_eq!(result, expected_value);
assert!(result.is_ok());
}
```
## Troubleshooting
### Tests fail with "git not found"
**Solution**: Install git:
```bash
# macOS
brew install git
# Ubuntu/Debian
sudo apt-get install git
# Windows
choco install git
```
### Tests fail with permission errors
**Solution**: Ensure test directories are writable:
```bash
chmod -R u+w /tmp
```
### Integration tests are slow
**Cause**: Git operations and file I/O take time
**Solution**: Run only unit tests for quick feedback:
```bash
cargo test -p g3-ensembles --lib
```
### Test artifacts not cleaned up
**Cause**: Test panicked before cleanup
**Solution**: Manually clean temp directories:
```bash
rm -rf /tmp/tmp.*
```
## Summary
The g3-ensembles test suite provides comprehensive coverage of:
- ✅ Core data structures and logic
- ✅ Configuration validation
- ✅ Git repository operations
- ✅ Segment independence
- ✅ Status tracking and reporting
- ✅ JSON processing
- ✅ End-to-end workflow
All tests are automated, fast, and reliable. The test suite ensures that flock mode works correctly across different scenarios and edge cases.

View File

@@ -0,0 +1,911 @@
//! Flock mode implementation - parallel multi-agent development
use anyhow::{Context, Result};
use chrono::Utc;
use g3_config::Config;
use std::path::{Path, PathBuf};
use std::process::Stdio;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command;
use tracing::{debug, error, info, warn};
use uuid::Uuid;
use crate::status::{FlockStatus, SegmentState, SegmentStatus};
/// Configuration for flock mode
#[derive(Debug, Clone)]
pub struct FlockConfig {
/// Project directory (must be a git repo with flock-requirements.md)
pub project_dir: PathBuf,
/// Flock workspace directory where segments will be created
pub flock_workspace: PathBuf,
/// Number of segments to partition work into
pub num_segments: usize,
/// Maximum turns per segment (for autonomous mode)
pub max_turns: usize,
/// G3 configuration to use for agents
pub g3_config: Config,
/// Path to g3 binary (defaults to current executable)
pub g3_binary: Option<PathBuf>,
}
impl FlockConfig {
/// Create a new flock configuration
pub fn new(
project_dir: PathBuf,
flock_workspace: PathBuf,
num_segments: usize,
) -> Result<Self> {
// Validate project directory
if !project_dir.exists() {
anyhow::bail!("Project directory does not exist: {}", project_dir.display());
}
// Check if it's a git repo
if !project_dir.join(".git").exists() {
anyhow::bail!("Project directory must be a git repository: {}", project_dir.display());
}
// Check for flock-requirements.md
let requirements_path = project_dir.join("flock-requirements.md");
if !requirements_path.exists() {
anyhow::bail!(
"Project directory must contain flock-requirements.md: {}",
project_dir.display()
);
}
// Load default config
let g3_config = Config::load(None)?;
Ok(Self {
project_dir,
flock_workspace,
num_segments,
max_turns: 5, // Default
g3_config,
g3_binary: None,
})
}
/// Set maximum turns per segment
pub fn with_max_turns(mut self, max_turns: usize) -> Self {
self.max_turns = max_turns;
self
}
/// Set custom g3 binary path
pub fn with_g3_binary(mut self, binary: PathBuf) -> Self {
self.g3_binary = Some(binary);
self
}
/// Set custom g3 config
pub fn with_config(mut self, config: Config) -> Self {
self.g3_config = config;
self
}
}
/// Flock mode orchestrator
pub struct FlockMode {
config: FlockConfig,
status: FlockStatus,
session_id: String,
}
impl FlockMode {
/// Create a new flock mode instance
pub fn new(config: FlockConfig) -> Result<Self> {
let session_id = Uuid::new_v4().to_string();
let status = FlockStatus::new(
session_id.clone(),
config.project_dir.clone(),
config.flock_workspace.clone(),
config.num_segments,
);
Ok(Self {
config,
status,
session_id,
})
}
/// Run flock mode
pub async fn run(&mut self) -> Result<()> {
info!("Starting flock mode with {} segments", self.config.num_segments);
// Step 1: Partition requirements
println!("\n🧠 Step 1: Partitioning requirements into {} segments...", self.config.num_segments);
let partitions = self.partition_requirements().await?;
// Step 2: Create segment workspaces
println!("\n📁 Step 2: Creating segment workspaces...");
self.create_segment_workspaces(&partitions).await?;
// Step 3: Run segments in parallel
println!("\n🚀 Step 3: Running {} segments in parallel...", self.config.num_segments);
self.run_segments_parallel().await?;
// Step 4: Generate final report
println!("\n📊 Step 4: Generating final report...");
self.status.completed_at = Some(Utc::now());
self.save_status()?;
let report = self.status.generate_report();
println!("{}", report);
Ok(())
}
/// Partition requirements using an AI agent
async fn partition_requirements(&mut self) -> Result<Vec<String>> {
let requirements_path = self.config.project_dir.join("flock-requirements.md");
let requirements_content = std::fs::read_to_string(&requirements_path)
.context("Failed to read flock-requirements.md")?;
// Create a temporary workspace for the partitioning agent
let partition_workspace = self.config.flock_workspace.join("_partition");
std::fs::create_dir_all(&partition_workspace)?;
// Create the partitioning prompt
let partition_prompt = format!(
"You are a software architect tasked with partitioning project requirements into {} logical, \
largely non-overlapping modules that can grow into separate architectural components \
(e.g., crates, services, or packages).\n\n\
REQUIREMENTS:\n{}\n\n\
INSTRUCTIONS:\n\
1. Analyze the requirements carefully\n\
2. Identify {} distinct architectural modules that:\n\
- Have minimal overlap and dependencies\n\
- Can be developed largely independently\n\
- Represent logical architectural boundaries\n\
- Could eventually become separate crates or services\n\
3. For each module, provide:\n\
- A clear module name\n\
- The specific requirements that belong to this module\n\
- Any dependencies on other modules\n\n\
4. Return your final partitioning exactly once, prefixed by the marker '{{PARTITION JSON}}' followed by a fenced code block that starts with \"```json\" and ends with \"```\". Place only the JSON array inside the fence.\n\
5. Use the final_output tool to provide your partitioning as a JSON array of objects, where each object has:\n\
- \"module_name\": string\n\
- \"requirements\": string (the requirements text for this module)\n\
- \"dependencies\": array of strings (names of other modules this depends on)\n\n\
Example format:\n\
{{{{PARTITION JSON}}}}\n\
```json\n\
[\n\
{{\n\
\"module_name\": \"core-engine\",\n\
\"requirements\": \"Implement the core processing engine...\",\n\
\"dependencies\": []\n\
}},\n\
{{\n\
\"module_name\": \"api-server\",\n\
\"requirements\": \"Create REST API endpoints...\",\n\
\"dependencies\": [\"core-engine\"]\n\
}}\n\
]\n\
```\n\n\
Be thoughtful and strategic in your partitioning. The goal is to enable parallel development.",
self.config.num_segments,
requirements_content,
self.config.num_segments
);
// Get g3 binary path
let g3_binary = self.get_g3_binary()?;
// Run g3 in single-shot mode to partition requirements
println!(" Analyzing requirements and creating partitions...");
let output = Command::new(&g3_binary)
.arg("--workspace")
.arg(&partition_workspace)
.arg("--quiet") // Disable logging for partitioning agent
.arg(&partition_prompt)
.output()
.await
.context("Failed to run g3 for partitioning")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Partitioning agent failed: {}", stderr);
}
let stdout = String::from_utf8_lossy(&output.stdout);
debug!("Partitioning agent output: {}", stdout);
// Extract JSON from the output
let partitions_json = Self::extract_json_from_output(&stdout)
.context("Failed to extract partition JSON from agent output")?;
// Parse the partitions
let partitions: Vec<serde_json::Value> = serde_json::from_str(&partitions_json)
.context("Failed to parse partition JSON")?;
if partitions.len() != self.config.num_segments {
warn!(
"Expected {} partitions but got {}. Adjusting...",
self.config.num_segments,
partitions.len()
);
}
// Extract requirements text from each partition
let mut partition_texts = Vec::new();
for (i, partition) in partitions.iter().enumerate() {
let default_name = format!("module-{}", i + 1);
let module_name = partition["module_name"]
.as_str()
.unwrap_or(&default_name);
let requirements = partition["requirements"]
.as_str()
.context("Missing requirements field in partition")?;
let dependencies = partition["dependencies"]
.as_array()
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str())
.collect::<Vec<_>>()
.join(", ")
})
.unwrap_or_default();
let partition_text = format!(
"# Module: {}\n\n## Dependencies\n{}\n\n## Requirements\n\n{}",
module_name,
if dependencies.is_empty() {
"None".to_string()
} else {
dependencies
},
requirements
);
partition_texts.push(partition_text);
println!(" ✓ Created partition {}: {}", i + 1, module_name);
}
Ok(partition_texts)
}
/// Extract JSON from agent output (looks for JSON array in output)
fn extract_json_from_output(output: &str) -> Result<String> {
// Try to find all occurrences of partition markers and extract valid JSON
const MARKERS: &[&str] = &["{{PARTITION JSON}}", "{PARTITION JSON}"];
let mut candidates = Vec::new();
// Find all marker occurrences
for &marker in MARKERS {
let mut search_start = 0;
while let Some(marker_index) = output[search_start..].find(marker) {
let absolute_index = search_start + marker_index;
let after_marker = &output[absolute_index + marker.len()..];
// Try to find a code fence after this marker
if let Some(fence_start) = after_marker.find("```") {
let after_fence = &after_marker[fence_start + 3..];
// Skip optional "json" language identifier
let content_start = after_fence
.strip_prefix("json")
.unwrap_or(after_fence)
.trim_start_matches(|c: char| c.is_whitespace());
// Find closing fence
if let Some(fence_end) = content_start.find("```") {
let json_candidate = content_start[..fence_end].trim();
candidates.push(json_candidate.to_string());
}
}
// Move search position forward
search_start = absolute_index + marker.len();
}
}
if candidates.is_empty() {
anyhow::bail!("Could not find any partition JSON markers with code fences in agent output");
}
// Try to parse each candidate and return the first valid JSON
let mut last_error = None;
for (i, candidate) in candidates.iter().enumerate() {
match serde_json::from_str::<serde_json::Value>(candidate) {
Ok(_) => {
debug!("Successfully parsed JSON from candidate {} of {}", i + 1, candidates.len());
return Ok(candidate.clone());
}
Err(e) => {
debug!("Failed to parse candidate {} of {}: {}", i + 1, candidates.len(), e);
last_error = Some(e);
}
}
}
// If we get here, none of the candidates were valid JSON
if let Some(err) = last_error {
anyhow::bail!(
"Found {} JSON candidate(s) but none were valid JSON. Last error: {}",
candidates.len(),
err
);
}
anyhow::bail!("No valid JSON found in output")
}
/// Create segment workspaces by copying project directory
async fn create_segment_workspaces(&mut self, partitions: &[String]) -> Result<()> {
// Ensure flock workspace exists
std::fs::create_dir_all(&self.config.flock_workspace)?;
for (i, partition) in partitions.iter().enumerate() {
let segment_id = i + 1;
let segment_dir = self.config.flock_workspace.join(format!("segment-{}", segment_id));
println!(" Creating segment {} workspace...", segment_id);
// Copy project directory to segment directory
self.copy_git_repo(&self.config.project_dir, &segment_dir)
.await
.context(format!("Failed to copy project to segment {}", segment_id))?;
// Write segment-requirements.md
let requirements_path = segment_dir.join("segment-requirements.md");
std::fs::write(&requirements_path, partition)
.context(format!("Failed to write requirements for segment {}", segment_id))?;
println!(" ✓ Segment {} workspace ready at {}", segment_id, segment_dir.display());
}
Ok(())
}
/// Copy a git repository to a new location
async fn copy_git_repo(&self, source: &Path, dest: &Path) -> Result<()> {
// Use git clone for efficient copying
let output = Command::new("git")
.arg("clone")
.arg(source)
.arg(dest)
.output()
.await
.context("Failed to run git clone")?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
anyhow::bail!("Git clone failed: {}", stderr);
}
Ok(())
}
/// Run all segments in parallel
async fn run_segments_parallel(&mut self) -> Result<()> {
let mut handles = Vec::new();
for segment_id in 1..=self.config.num_segments {
let segment_dir = self.config.flock_workspace.join(format!("segment-{}", segment_id));
let max_turns = self.config.max_turns;
let g3_binary = self.get_g3_binary()?;
let status_file = self.get_status_file_path();
let session_id = self.session_id.clone();
// Initialize segment status
let segment_status = SegmentStatus {
segment_id,
workspace: segment_dir.clone(),
state: SegmentState::Running,
started_at: Utc::now(),
completed_at: None,
tokens_used: 0,
tool_calls: 0,
errors: 0,
current_turn: 0,
max_turns,
last_message: Some("Starting...".to_string()),
error_message: None,
};
self.status.update_segment(segment_id, segment_status);
self.save_status()?;
// Spawn a task for this segment
let handle = tokio::spawn(async move {
run_segment(
segment_id,
segment_dir,
max_turns,
g3_binary,
status_file,
session_id,
)
.await
});
handles.push((segment_id, handle));
}
// Wait for all segments to complete
for (segment_id, handle) in handles {
match handle.await {
Ok(Ok(final_status)) => {
println!("\n✅ Segment {} completed", segment_id);
self.status.update_segment(segment_id, final_status);
self.save_status()?;
}
Ok(Err(e)) => {
error!("Segment {} failed: {}", segment_id, e);
let mut segment_status = self.status.segments.get(&segment_id).cloned()
.unwrap_or_else(|| SegmentStatus {
segment_id,
workspace: self.config.flock_workspace.join(format!("segment-{}", segment_id)),
state: SegmentState::Failed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 0,
tool_calls: 0,
errors: 1,
current_turn: 0,
max_turns: self.config.max_turns,
last_message: None,
error_message: Some(e.to_string()),
});
segment_status.state = SegmentState::Failed;
segment_status.completed_at = Some(Utc::now());
segment_status.error_message = Some(e.to_string());
segment_status.errors += 1;
self.status.update_segment(segment_id, segment_status);
self.save_status()?;
}
Err(e) => {
error!("Segment {} task panicked: {}", segment_id, e);
let mut segment_status = self.status.segments.get(&segment_id).cloned()
.unwrap_or_else(|| SegmentStatus {
segment_id,
workspace: self.config.flock_workspace.join(format!("segment-{}", segment_id)),
state: SegmentState::Failed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 0,
tool_calls: 0,
errors: 1,
current_turn: 0,
max_turns: self.config.max_turns,
last_message: None,
error_message: Some(format!("Task panicked: {}", e)),
});
segment_status.state = SegmentState::Failed;
segment_status.completed_at = Some(Utc::now());
segment_status.error_message = Some(format!("Task panicked: {}", e));
segment_status.errors += 1;
self.status.update_segment(segment_id, segment_status);
self.save_status()?;
}
}
}
Ok(())
}
/// Get the g3 binary path
fn get_g3_binary(&self) -> Result<PathBuf> {
if let Some(ref binary) = self.config.g3_binary {
Ok(binary.clone())
} else {
// Use current executable
std::env::current_exe().context("Failed to get current executable path")
}
}
/// Get the status file path
fn get_status_file_path(&self) -> PathBuf {
self.config.flock_workspace.join("flock-status.json")
}
/// Save current status to file
fn save_status(&self) -> Result<()> {
let status_file = self.get_status_file_path();
self.status.save_to_file(&status_file)
}
}
/// Run a single segment worker
async fn run_segment(
segment_id: usize,
segment_dir: PathBuf,
max_turns: usize,
g3_binary: PathBuf,
status_file: PathBuf,
session_id: String,
) -> Result<SegmentStatus> {
info!("Starting segment {} in {}", segment_id, segment_dir.display());
let mut segment_status = SegmentStatus {
segment_id,
workspace: segment_dir.clone(),
state: SegmentState::Running,
started_at: Utc::now(),
completed_at: None,
tokens_used: 0,
tool_calls: 0,
errors: 0,
current_turn: 0,
max_turns,
last_message: Some("Starting autonomous mode...".to_string()),
error_message: None,
};
// Run g3 in autonomous mode with segment-requirements.md
let mut child = Command::new(&g3_binary)
.arg("--workspace")
.arg(&segment_dir)
.arg("--autonomous")
.arg("--max-turns")
.arg(max_turns.to_string())
.arg("--requirements")
.arg(std::fs::read_to_string(segment_dir.join("segment-requirements.md"))?)
.arg("--quiet") // Disable session logging for workers
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.context("Failed to spawn g3 process")?;
// Stream output and update status
let stdout = child.stdout.take().context("Failed to get stdout")?;
let stderr = child.stderr.take().context("Failed to get stderr")?;
let stdout_reader = BufReader::new(stdout);
let stderr_reader = BufReader::new(stderr);
let mut stdout_lines = stdout_reader.lines();
let mut stderr_lines = stderr_reader.lines();
// Read output and update status
loop {
tokio::select! {
line = stdout_lines.next_line() => {
match line {
Ok(Some(line)) => {
println!("[Segment {}] {}", segment_id, line);
// Parse output for status updates
if line.contains("TURN") {
// Extract turn number if possible
if let Some(turn_str) = line.split("TURN").nth(1) {
if let Ok(turn) = turn_str.trim().split('/').next().unwrap_or("0").parse::<usize>() {
segment_status.current_turn = turn;
}
}
}
segment_status.last_message = Some(line);
update_status_file(&status_file, &session_id, segment_status.clone())?;
}
Ok(None) => break,
Err(e) => {
error!("Error reading stdout for segment {}: {}", segment_id, e);
break;
}
}
}
line = stderr_lines.next_line() => {
match line {
Ok(Some(line)) => {
eprintln!("[Segment {} ERROR] {}", segment_id, line);
segment_status.errors += 1;
update_status_file(&status_file, &session_id, segment_status.clone())?;
}
Ok(None) => break,
Err(e) => {
error!("Error reading stderr for segment {}: {}", segment_id, e);
break;
}
}
}
}
}
// Wait for process to complete
let status = child.wait().await.context("Failed to wait for g3 process")?;
segment_status.completed_at = Some(Utc::now());
if status.success() {
segment_status.state = SegmentState::Completed;
segment_status.last_message = Some("Completed successfully".to_string());
} else {
segment_status.state = SegmentState::Failed;
segment_status.error_message = Some(format!("Process exited with status: {}", status));
segment_status.errors += 1;
}
// Try to extract metrics from session log if available
let log_dir = segment_dir.join("logs");
if log_dir.exists() {
if let Ok(entries) = std::fs::read_dir(&log_dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.extension().and_then(|s| s.to_str()) == Some("json") {
if let Ok(log_content) = std::fs::read_to_string(&path) {
if let Ok(log_json) = serde_json::from_str::<serde_json::Value>(&log_content) {
// Extract token usage
if let Some(context) = log_json.get("context_window") {
if let Some(cumulative) = context.get("cumulative_tokens") {
if let Some(tokens) = cumulative.as_u64() {
segment_status.tokens_used = tokens;
}
}
}
// Count tool calls from conversation history
if let Some(context) = log_json.get("context_window") {
if let Some(history) = context.get("conversation_history") {
if let Some(messages) = history.as_array() {
let tool_call_count = messages
.iter()
.filter(|msg| {
msg.get("role")
.and_then(|r| r.as_str())
== Some("tool")
})
.count();
segment_status.tool_calls = tool_call_count as u64;
}
}
}
}
}
}
}
}
}
update_status_file(&status_file, &session_id, segment_status.clone())?;
Ok(segment_status)
}
/// Update the status file with new segment status
fn update_status_file(
status_file: &PathBuf,
session_id: &str,
segment_status: SegmentStatus,
) -> Result<()> {
// Load existing status or create new one
let mut flock_status = if status_file.exists() {
FlockStatus::load_from_file(status_file)?
} else {
// This shouldn't happen, but handle it gracefully
FlockStatus::new(
session_id.to_string(),
PathBuf::new(),
PathBuf::new(),
0,
)
};
flock_status.update_segment(segment_status.segment_id, segment_status);
flock_status.save_to_file(status_file)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::FlockMode;
#[test]
fn extract_json_from_output_handles_partition_marker_and_fences() {
const NOISY_PREFIX: &str = concat!(
"\u{001b}[2m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m# Requirements Partitioning into 2 Architectural Modules\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m## Analysis\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m```json\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m[\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m {\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m }\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m]\u{001b}[0m\n",
"\u{001b}[1A\u{001b}[2K│ \u{001b}[2m```\u{001b}[0m\n",
"\n",
"# Requirements Partitioning into 2 Architectural Modules\n",
"\n",
"## Analysis\n",
"\n",
"The requirements have been partitioned into two logical, largely non-overlapping modules based on architectural concerns:\n",
"\n",
"1. **Message Protocol Module** - Handles message identity, formatting, and LLM communication\n",
"2. **Observability Module** - Handles logging, summarization, and monitoring of message history\n",
"\n",
"## Module Partitioning\n",
"\n"
);
let expected_json = r#"[
{
"module_name": "message-protocol",
"requirements": "For all messages sent in the message history, unique ID that is not longer than six characters they need to be alphanumeric and can be case sensitive. Double check the message format specification for Open AI message formats. Write tests to make sure the LLM works, so make sure it's an integration test.",
"dependencies": []
},
{
"module_name": "observability",
"requirements": "Add functionality that will summarise the entire message history every time it is sent to LLM. Put it in the logs directory the same as the workspace logs for message history. Call it \"context_window_<suffix>\" where the suffix is the same name as will be used for logging the message history, for example \"g3_session_you_are_g3_in_coach_f79be2a46ac40c35.json\". Look at the code that generates that file name in G3 and use the same code. This file name changes every time and new agent is created, so follow the same pattern with the context window summary. Whenever the file name changes, update a symlink called \"current_context_window\" to that new file. Every time the message history is sent to the LLM, rewrite the entire file. Each message should only take up one line. The format is: date&time, estimated number of tokens of the entire message (use the token estimator code in G3, write it in a compact way for example 1K, 2M, 100b, 200K, colour code it graded from bright green to dark red where 200b is bright green and 50K is dark red), message ID, role (e.g. \"user\", \"assistant\"), the first hundred characters of \"content\".",
"dependencies": ["message-protocol"]
}
]"#;
let mut output = String::from(NOISY_PREFIX);
output.push_str("{{PARTITION JSON}}\n```json\n");
output.push_str(expected_json);
output.push_str("```");
let extracted = FlockMode::extract_json_from_output(&output)
.expect("should extract JSON between markers");
assert_eq!(extracted, expected_json);
}
#[test]
fn extract_json_from_output_handles_multiple_markers_and_invalid_json() {
// This is the actual output from the LLM that was failing
let output = r#"[2m[0m
[1A[2K│ [2m# Requirements Partitioning into 2 Architectural Modules[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m## Analysis[0m
[1A[2K│ [2m[0m
[1A[2K│ [2mThe requirements have been partitioned into two logical, largely non-overlapping modules based on architectural concerns:[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m1. **Message Protocol Module** - Handles message identity, formatting, and LLM communication[0m
[1A[2K│ [2m2. **Observability Module** - Handles logging, summarization, and monitoring of message history[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m## Module Partitioning[0m
[1A[2K│ [2m[0m{PARTITION JSON}
[1A[2K│ [2m```json[0m
[1A[2K│ [2m[[0m
[1A[2K│ [2m {[0m
[1A[2K│ [2m "module_name": "message-protocol",[0m
[1A[2K│ [2m "requirements": "For all messages sent in the message history, unique ID that is not longer than six characters they need to be alphanumeric and can be case sensitive. Double check the message format specification for Open AI message formats. Write tests to make sure the LLM works, so make sure it's an integration test.",[0m
[1A[2K│ [2m "dependencies": [][0m
[1A[2K│ [2m },[0m
[1A[2K│ [2m {[0m
[1A[2K│ [2m "module_name": "observability",[0m
[1A[2K│ [2m "requirements": "Add functionality that will summarise the entire message history every time it is sent to LLM. Put it in the logs directory the same as the workspace logs for message history. Call it \"context_window_<suffix>\" where the suffix is the same name as will be used for logging the message history, for example \"g3_session_you_are_g3_in_coach_f79be2a46ac40c35.json\". Look at the code that generates that file name in G3 and use the same code. This file name changes every time and new agent is created, so follow the same pattern with the context window summary. Whenever the file name changes, update a symlink called \"current_context_window\" to that new file. Every time the message history is sent to the LLM, rewrite the entire file. Each message should only take up one line. The format is: date&time, estimated number of tokens of the entire message (use the token estimator code in G3, write it in a compact way for example 1K, 2M, 100b, 200K, colour code it graded from bright green to dark red where 200b is bright green and 50K is dark red), message ID, role (e.g. \"user\", \"assistant\"), the first hundred characters of \"content\".",[0m
[1A[2K│ [2m "dependencies": ["message-protocol"][0m
[1A[2K│ [2m }[0m
[1A[2K│ [2m][0m
[1A[2K│ [2m```[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m## Rationale[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m### Module 1: message-protocol[0m
[1A[2K│ [2m**Purpose**: Core messaging infrastructure and LLM communication layer[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m**Responsibilities**:[0m
[1A[2K│ [2m- Generate unique 6-character alphanumeric message IDs[0m
[1A[2K│ [2m- Ensure OpenAI message format compliance[0m
[1A[2K│ [2m- Handle LLM request/response cycles[0m
[1A[2K│ [2m- Integration testing of LLM functionality[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m**Why it's independent**: This module defines the fundamental message structure and communication protocol. It can be developed and tested independently as a core library.[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m**Future evolution**: Could become a separate crate (e.g., `g3-message-protocol`) or even a standalone service if message routing becomes complex.[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m### Module 2: observability[0m
[1A[2K│ [2m**Purpose**: Monitoring, logging, and visualization of system activity[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m**Responsibilities**:[0m
[1A[2K│ [2m- Summarize message history on each LLM interaction[0m
[1A[2K│ [2m- Generate context window summary files with specific naming conventions[0m
[1A[2K│ [2m- Manage symlinks to current summary files[0m
[1A[2K│ [2m- Format one-line summaries with timestamps, token counts, message IDs, roles, and content previews[0m
[1A[2K│ [2m- Color-code token estimates for visual monitoring[0m
[1A[2K│ [2m- Integrate with existing G3 logging infrastructure[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m**Why it depends on message-protocol**: Needs access to message IDs, message content, and token estimation utilities. However, the core messaging system doesn't need to know about observability.[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m**Future evolution**: Could become a separate crate (e.g., `g3-observability`) or monitoring service that subscribes to message events.[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m## Benefits of This Partitioning[0m
[1A[2K│ [2m[0m
[1A[2K│ [2m1. **Separation of Concerns**: Core messaging logic is isolated from monitoring/logging concerns[0m
[1A[2K│ [2m2. **Parallel Development**: Teams can work independently on message protocol vs. observability features[0m
[1A[2K│ [2m3. **Testability**: Each module can be tested in isolation[0m
[1A[2K│ [2m4. **Maintainability**: Changes to logging/monitoring don't affect core message handling[0m
[1A[2K│ [2m5. **Scalability**: Observability could be extracted to a separate service for distributed systems[0m
[1A[2K│ [2m6. **Dependency Direction**: Clean one-way dependency (observability → message-protocol) prevents circular dependencies[0m
# Requirements Partitioning into 2 Architectural Modules
## Analysis
The requirements have been partitioned into two logical, largely non-overlapping modules based on architectural concerns:
1. **Message Protocol Module** - Handles message identity, formatting, and LLM communication
2. **Observability Module** - Handles logging, summarization, and monitoring of message history
## Module Partitioning
{{PARTITION JSON}}
```json
[
{
"module_name": "message-protocol",
"requirements": "For all messages sent in the message history, unique ID that is not longer than six characters they need to be alphanumeric and can be case sensitive. Double check the message format specification for Open AI message formats. Write tests to make sure the LLM works, so make sure it's an integration test.",
"dependencies": []
},
{
"module_name": "observability",
"requirements": "Add functionality that will summarise the entire message history every time it is sent to LLM. Put it in the logs directory the same as the workspace logs for message history. Call it \"context_window_<suffix>\" where the suffix is the same name as will be used for logging the message history, for example \"g3_session_you_are_g3_in_coach_f79be2a46ac40c35.json\". Look at the code that generates that file name in G3 and use the same code. This file name changes every time and new agent is created, so follow the same pattern with the context window summary. Whenever the file name changes, update a symlink called \"current_context_window\" to that new file. Every time the message history is sent to the LLM, rewrite the entire file. Each message should only take up one line. The format is: date&time, estimated number of tokens of the entire message (use the token estimator code in G3, write it in a compact way for example 1K, 2M, 100b, 200K, colour code it graded from bright green to dark red where 200b is bright green and 50K is dark red), message ID, role (e.g. \"user\", \"assistant\"), the first hundred characters of \"content\".",
"dependencies": ["message-protocol"]
}
]
```
## Rationale
### Module 1: message-protocol
**Purpose**: Core messaging infrastructure and LLM communication layer
**Responsibilities**:
- Generate unique 6-character alphanumeric message IDs
- Ensure OpenAI message format compliance
- Handle LLM request/response cycles
- Integration testing of LLM functionality
**Why it's independent**: This module defines the fundamental message structure and communication protocol. It can be developed and tested independently as a core library.
**Future evolution**: Could become a separate crate (e.g., `g3-message-protocol`) or even a standalone service if message routing becomes complex.
### Module 2: observability
**Purpose**: Monitoring, logging, and visualization of system activity
**Responsibilities**:
- Summarize message history on each LLM interaction
- Generate context window summary files with specific naming conventions
- Manage symlinks to current summary files
- Format one-line summaries with timestamps, token counts, message IDs, roles, and content previews
- Color-code token estimates for visual monitoring
- Integrate with existing G3 logging infrastructure
**Why it depends on message-protocol**: Needs access to message IDs, message content, and token estimation utilities. However, the core messaging system doesn't need to know about observability.
**Future evolution**: Could become a separate crate (e.g., `g3-observability`) or monitoring service that subscribes to message events.
## Benefits of This Partitioning
1. **Separation of Concerns**: Core messaging logic is isolated from monitoring/logging concerns
2. **Parallel Development**: Teams can work independently on message protocol vs. observability features
3. **Testability**: Each module can be tested in isolation
4. **Maintainability**: Changes to logging/monitoring don't affect core message handling
5. **Scalability**: Observability could be extracted to a separate service for distributed systems
6. **Dependency Direction**: Clean one-way dependency (observability → message-protocol) prevents circular dependencies"#;
let extracted = FlockMode::extract_json_from_output(output)
.expect("should extract valid JSON from output with multiple markers");
// Should be able to parse as JSON
let parsed: serde_json::Value = serde_json::from_str(&extracted)
.expect("extracted content should be valid JSON");
// Verify it's an array with 2 elements
assert!(parsed.is_array());
let arr = parsed.as_array().unwrap();
assert_eq!(arr.len(), 2);
// Verify the structure
assert_eq!(arr[0]["module_name"], "message-protocol");
assert_eq!(arr[1]["module_name"], "observability");
}
}

View File

@@ -0,0 +1,12 @@
//! G3 Ensembles - Multi-agent ensemble functionality
//!
//! This crate provides functionality for running multiple G3 agents in coordination,
//! enabling parallel development across different architectural modules.
pub mod flock;
pub mod status;
mod tests;
/// Re-export main types for convenience
pub use flock::{FlockConfig, FlockMode};
pub use status::{FlockStatus, SegmentStatus};

View File

@@ -0,0 +1,240 @@
//! Status tracking for flock mode
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
/// Status of an individual segment worker
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SegmentStatus {
/// Segment number
pub segment_id: usize,
/// Segment workspace directory
pub workspace: PathBuf,
/// Current state of the segment
pub state: SegmentState,
/// Start time
pub started_at: DateTime<Utc>,
/// Completion time (if finished)
pub completed_at: Option<DateTime<Utc>>,
/// Total tokens used
pub tokens_used: u64,
/// Number of tool calls made
pub tool_calls: u64,
/// Number of errors encountered
pub errors: u64,
/// Current turn number (for autonomous mode)
pub current_turn: usize,
/// Maximum turns allowed
pub max_turns: usize,
/// Last status message
pub last_message: Option<String>,
/// Error message (if failed)
pub error_message: Option<String>,
}
/// State of a segment worker
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum SegmentState {
/// Waiting to start
Pending,
/// Currently running
Running,
/// Completed successfully
Completed,
/// Failed with error
Failed,
/// Cancelled by user
Cancelled,
}
impl std::fmt::Display for SegmentState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SegmentState::Pending => write!(f, "⏳ Pending"),
SegmentState::Running => write!(f, "🔄 Running"),
SegmentState::Completed => write!(f, "✅ Completed"),
SegmentState::Failed => write!(f, "❌ Failed"),
SegmentState::Cancelled => write!(f, "⚠️ Cancelled"),
}
}
}
/// Overall flock status
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlockStatus {
/// Flock session ID
pub session_id: String,
/// Project directory
pub project_dir: PathBuf,
/// Flock workspace directory
pub flock_workspace: PathBuf,
/// Number of segments
pub num_segments: usize,
/// Start time
pub started_at: DateTime<Utc>,
/// Completion time (if finished)
pub completed_at: Option<DateTime<Utc>>,
/// Status of each segment
pub segments: HashMap<usize, SegmentStatus>,
/// Total tokens used across all segments
pub total_tokens: u64,
/// Total tool calls across all segments
pub total_tool_calls: u64,
/// Total errors across all segments
pub total_errors: u64,
}
impl FlockStatus {
/// Create a new flock status
pub fn new(
session_id: String,
project_dir: PathBuf,
flock_workspace: PathBuf,
num_segments: usize,
) -> Self {
Self {
session_id,
project_dir,
flock_workspace,
num_segments,
started_at: Utc::now(),
completed_at: None,
segments: HashMap::new(),
total_tokens: 0,
total_tool_calls: 0,
total_errors: 0,
}
}
/// Update segment status
pub fn update_segment(&mut self, segment_id: usize, status: SegmentStatus) {
self.segments.insert(segment_id, status);
self.recalculate_totals();
}
/// Recalculate total metrics
fn recalculate_totals(&mut self) {
self.total_tokens = self.segments.values().map(|s| s.tokens_used).sum();
self.total_tool_calls = self.segments.values().map(|s| s.tool_calls).sum();
self.total_errors = self.segments.values().map(|s| s.errors).sum();
}
/// Check if all segments are complete
pub fn is_complete(&self) -> bool {
self.segments.len() == self.num_segments
&& self.segments.values().all(|s| {
matches!(
s.state,
SegmentState::Completed | SegmentState::Failed | SegmentState::Cancelled
)
})
}
/// Get count of segments by state
pub fn count_by_state(&self, state: SegmentState) -> usize {
self.segments.values().filter(|s| s.state == state).count()
}
/// Save status to file
pub fn save_to_file(&self, path: &PathBuf) -> anyhow::Result<()> {
let json = serde_json::to_string_pretty(self)?;
std::fs::write(path, json)?;
Ok(())
}
/// Load status from file
pub fn load_from_file(path: &PathBuf) -> anyhow::Result<Self> {
let json = std::fs::read_to_string(path)?;
let status = serde_json::from_str(&json)?;
Ok(status)
}
/// Generate a summary report
pub fn generate_report(&self) -> String {
let mut report = String::new();
report.push_str(&format!("\n{}", "=".repeat(80)));
report.push_str(&format!("\n📊 FLOCK MODE SESSION REPORT"));
report.push_str(&format!("\n{}", "=".repeat(80)));
report.push_str(&format!("\n\n🆔 Session ID: {}", self.session_id));
report.push_str(&format!("\n📁 Project: {}", self.project_dir.display()));
report.push_str(&format!("\n🗂️ Workspace: {}", self.flock_workspace.display()));
report.push_str(&format!("\n🔢 Segments: {}", self.num_segments));
let duration = if let Some(completed) = self.completed_at {
completed.signed_duration_since(self.started_at)
} else {
Utc::now().signed_duration_since(self.started_at)
};
report.push_str(&format!("\n⏱️ Duration: {:.2}s", duration.num_milliseconds() as f64 / 1000.0));
// Segment status summary
report.push_str(&format!("\n\n📈 Segment Status:"));
report.push_str(&format!("\n • Completed: {}", self.count_by_state(SegmentState::Completed)));
report.push_str(&format!("\n • Running: {}", self.count_by_state(SegmentState::Running)));
report.push_str(&format!("\n • Failed: {}", self.count_by_state(SegmentState::Failed)));
report.push_str(&format!("\n • Pending: {}", self.count_by_state(SegmentState::Pending)));
report.push_str(&format!("\n • Cancelled: {}", self.count_by_state(SegmentState::Cancelled)));
// Metrics
report.push_str(&format!("\n\n📊 Aggregate Metrics:"));
report.push_str(&format!("\n • Total Tokens: {}", self.total_tokens));
report.push_str(&format!("\n • Total Tool Calls: {}", self.total_tool_calls));
report.push_str(&format!("\n • Total Errors: {}", self.total_errors));
// Per-segment details
report.push_str(&format!("\n\n🔍 Segment Details:"));
let mut segments: Vec<_> = self.segments.iter().collect();
segments.sort_by_key(|(id, _)| *id);
for (id, segment) in segments {
report.push_str(&format!("\n\n Segment {}:", id));
report.push_str(&format!("\n Status: {}", segment.state));
report.push_str(&format!("\n Workspace: {}", segment.workspace.display()));
report.push_str(&format!("\n Tokens: {}", segment.tokens_used));
report.push_str(&format!("\n Tool Calls: {}", segment.tool_calls));
report.push_str(&format!("\n Errors: {}", segment.errors));
report.push_str(&format!("\n Turn: {}/{}", segment.current_turn, segment.max_turns));
if let Some(ref msg) = segment.last_message {
report.push_str(&format!("\n Last Message: {}", msg));
}
if let Some(ref err) = segment.error_message {
report.push_str(&format!("\n Error: {}", err));
}
}
report.push_str(&format!("\n\n{}", "=".repeat(80)));
report
}
}

View File

@@ -0,0 +1,331 @@
//! Unit tests for g3-ensembles
#[cfg(test)]
mod tests {
use crate::status::{FlockStatus, SegmentState, SegmentStatus};
use chrono::Utc;
use std::path::PathBuf;
#[test]
fn test_segment_state_display() {
assert_eq!(format!("{}", SegmentState::Pending), "⏳ Pending");
assert_eq!(format!("{}", SegmentState::Running), "🔄 Running");
assert_eq!(format!("{}", SegmentState::Completed), "✅ Completed");
assert_eq!(format!("{}", SegmentState::Failed), "❌ Failed");
assert_eq!(format!("{}", SegmentState::Cancelled), "⚠️ Cancelled");
}
#[test]
fn test_flock_status_creation() {
let status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
3,
);
assert_eq!(status.session_id, "test-session");
assert_eq!(status.num_segments, 3);
assert_eq!(status.segments.len(), 0);
assert_eq!(status.total_tokens, 0);
assert_eq!(status.total_tool_calls, 0);
assert_eq!(status.total_errors, 0);
assert!(status.completed_at.is_none());
}
#[test]
fn test_segment_status_update() {
let mut status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
2,
);
let segment1 = SegmentStatus {
segment_id: 1,
workspace: PathBuf::from("/test/workspace/segment-1"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 1000,
tool_calls: 50,
errors: 2,
current_turn: 5,
max_turns: 10,
last_message: Some("Done".to_string()),
error_message: None,
};
status.update_segment(1, segment1);
assert_eq!(status.segments.len(), 1);
assert_eq!(status.total_tokens, 1000);
assert_eq!(status.total_tool_calls, 50);
assert_eq!(status.total_errors, 2);
}
#[test]
fn test_multiple_segment_updates() {
let mut status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
2,
);
let segment1 = SegmentStatus {
segment_id: 1,
workspace: PathBuf::from("/test/workspace/segment-1"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 1000,
tool_calls: 50,
errors: 2,
current_turn: 5,
max_turns: 10,
last_message: Some("Done".to_string()),
error_message: None,
};
let segment2 = SegmentStatus {
segment_id: 2,
workspace: PathBuf::from("/test/workspace/segment-2"),
state: SegmentState::Failed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 500,
tool_calls: 25,
errors: 5,
current_turn: 3,
max_turns: 10,
last_message: Some("Error".to_string()),
error_message: Some("Test error".to_string()),
};
status.update_segment(1, segment1);
status.update_segment(2, segment2);
assert_eq!(status.segments.len(), 2);
assert_eq!(status.total_tokens, 1500);
assert_eq!(status.total_tool_calls, 75);
assert_eq!(status.total_errors, 7);
}
#[test]
fn test_is_complete() {
let mut status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
2,
);
// Not complete - no segments
assert!(!status.is_complete());
// Add one completed segment
let segment1 = SegmentStatus {
segment_id: 1,
workspace: PathBuf::from("/test/workspace/segment-1"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 1000,
tool_calls: 50,
errors: 0,
current_turn: 5,
max_turns: 10,
last_message: None,
error_message: None,
};
status.update_segment(1, segment1);
// Still not complete - only 1 of 2 segments
assert!(!status.is_complete());
// Add second segment (running)
let segment2 = SegmentStatus {
segment_id: 2,
workspace: PathBuf::from("/test/workspace/segment-2"),
state: SegmentState::Running,
started_at: Utc::now(),
completed_at: None,
tokens_used: 500,
tool_calls: 25,
errors: 0,
current_turn: 3,
max_turns: 10,
last_message: None,
error_message: None,
};
status.update_segment(2, segment2);
// Still not complete - segment 2 is running
assert!(!status.is_complete());
// Update segment 2 to completed
let segment2_done = SegmentStatus {
segment_id: 2,
workspace: PathBuf::from("/test/workspace/segment-2"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 500,
tool_calls: 25,
errors: 0,
current_turn: 5,
max_turns: 10,
last_message: None,
error_message: None,
};
status.update_segment(2, segment2_done);
// Now complete
assert!(status.is_complete());
}
#[test]
fn test_count_by_state() {
let mut status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
3,
);
let segment1 = SegmentStatus {
segment_id: 1,
workspace: PathBuf::from("/test/workspace/segment-1"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 1000,
tool_calls: 50,
errors: 0,
current_turn: 5,
max_turns: 10,
last_message: None,
error_message: None,
};
let segment2 = SegmentStatus {
segment_id: 2,
workspace: PathBuf::from("/test/workspace/segment-2"),
state: SegmentState::Failed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 500,
tool_calls: 25,
errors: 5,
current_turn: 3,
max_turns: 10,
last_message: None,
error_message: Some("Error".to_string()),
};
let segment3 = SegmentStatus {
segment_id: 3,
workspace: PathBuf::from("/test/workspace/segment-3"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 800,
tool_calls: 40,
errors: 1,
current_turn: 4,
max_turns: 10,
last_message: None,
error_message: None,
};
status.update_segment(1, segment1);
status.update_segment(2, segment2);
status.update_segment(3, segment3);
assert_eq!(status.count_by_state(SegmentState::Completed), 2);
assert_eq!(status.count_by_state(SegmentState::Failed), 1);
assert_eq!(status.count_by_state(SegmentState::Running), 0);
assert_eq!(status.count_by_state(SegmentState::Pending), 0);
}
#[test]
fn test_status_serialization() {
let mut status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
1,
);
let segment1 = SegmentStatus {
segment_id: 1,
workspace: PathBuf::from("/test/workspace/segment-1"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 1000,
tool_calls: 50,
errors: 2,
current_turn: 5,
max_turns: 10,
last_message: Some("Done".to_string()),
error_message: None,
};
status.update_segment(1, segment1);
// Serialize to JSON
let json = serde_json::to_string(&status).expect("Failed to serialize");
assert!(json.contains("test-session"));
assert!(json.contains("segment_id"));
assert!(json.contains("Completed"));
// Deserialize back
let deserialized: FlockStatus =
serde_json::from_str(&json).expect("Failed to deserialize");
assert_eq!(deserialized.session_id, "test-session");
assert_eq!(deserialized.segments.len(), 1);
assert_eq!(deserialized.total_tokens, 1000);
}
#[test]
fn test_report_generation() {
let mut status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
2,
);
let segment1 = SegmentStatus {
segment_id: 1,
workspace: PathBuf::from("/test/workspace/segment-1"),
state: SegmentState::Completed,
started_at: Utc::now(),
completed_at: Some(Utc::now()),
tokens_used: 1000,
tool_calls: 50,
errors: 2,
current_turn: 5,
max_turns: 10,
last_message: Some("Done".to_string()),
error_message: None,
};
status.update_segment(1, segment1);
let report = status.generate_report();
// Check that report contains expected sections
assert!(report.contains("FLOCK MODE SESSION REPORT"));
assert!(report.contains("test-session"));
assert!(report.contains("Segment Status:"));
assert!(report.contains("Aggregate Metrics:"));
assert!(report.contains("Segment Details:"));
assert!(report.contains("Total Tokens: 1000"));
assert!(report.contains("Total Tool Calls: 50"));
assert!(report.contains("Total Errors: 2"));
}
}

View File

@@ -0,0 +1,443 @@
//! Integration tests for g3-ensembles flock mode
use g3_ensembles::{FlockConfig, FlockMode};
use std::fs;
use std::path::PathBuf;
use std::process::Command;
use tempfile::TempDir;
/// Helper to create a test git repository with flock-requirements.md
fn create_test_project(name: &str) -> TempDir {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let project_path = temp_dir.path();
// Initialize git repo
let output = Command::new("git")
.arg("init")
.current_dir(project_path)
.output()
.expect("Failed to run git init");
assert!(output.status.success(), "git init failed");
// Configure git user (required for commits)
Command::new("git")
.args(["config", "user.email", "test@example.com"])
.current_dir(project_path)
.output()
.expect("Failed to configure git email");
Command::new("git")
.args(["config", "user.name", "Test User"])
.current_dir(project_path)
.output()
.expect("Failed to configure git name");
// Create flock-requirements.md
let requirements = format!(
"# {} Test Project\n\n\
## Module A\n\
- Create a simple Rust library\n\
- Add a function that returns \"Hello from Module A\"\n\
- Write a unit test for the function\n\n\
## Module B\n\
- Create another Rust library\n\
- Add a function that returns \"Hello from Module B\"\n\
- Write a unit test for the function\n",
name
);
fs::write(project_path.join("flock-requirements.md"), requirements)
.expect("Failed to write requirements");
// Create a simple README
fs::write(project_path.join("README.md"), format!("# {}\n", name))
.expect("Failed to write README");
// Create initial commit
Command::new("git")
.args(["add", "."])
.current_dir(project_path)
.output()
.expect("Failed to git add");
let output = Command::new("git")
.args(["commit", "-m", "Initial commit"])
.current_dir(project_path)
.output()
.expect("Failed to git commit");
assert!(output.status.success(), "git commit failed");
temp_dir
}
#[test]
fn test_flock_config_validation() {
let temp_dir = TempDir::new().unwrap();
let project_path = temp_dir.path().to_path_buf();
let workspace_path = temp_dir.path().join("workspace");
// Should fail - not a git repo
let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("must be a git repository"));
// Initialize git repo
Command::new("git")
.arg("init")
.current_dir(&project_path)
.output()
.expect("Failed to run git init");
// Should fail - no flock-requirements.md
let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
assert!(result.is_err());
assert!(result
.unwrap_err()
.to_string()
.contains("flock-requirements.md"));
// Create flock-requirements.md
fs::write(project_path.join("flock-requirements.md"), "# Test\n")
.expect("Failed to write requirements");
// Should succeed now
let result = FlockConfig::new(project_path, workspace_path, 2);
assert!(result.is_ok());
}
#[test]
fn test_flock_config_builder() {
let project_dir = create_test_project("builder-test");
let workspace_dir = TempDir::new().unwrap();
let config = FlockConfig::new(
project_dir.path().to_path_buf(),
workspace_dir.path().to_path_buf(),
2,
)
.expect("Failed to create config")
.with_max_turns(15)
.with_g3_binary(PathBuf::from("/custom/g3"));
assert_eq!(config.num_segments, 2);
assert_eq!(config.max_turns, 15);
assert_eq!(config.g3_binary, Some(PathBuf::from("/custom/g3")));
}
#[test]
fn test_workspace_creation() {
let project_dir = create_test_project("workspace-test");
let workspace_dir = TempDir::new().unwrap();
let config = FlockConfig::new(
project_dir.path().to_path_buf(),
workspace_dir.path().to_path_buf(),
2,
)
.expect("Failed to create config");
// Create FlockMode instance
let _flock = FlockMode::new(config).expect("Failed to create FlockMode");
// Verify workspace directory structure will be created
// (We can't run the full flock without LLM access, but we can test the setup)
assert!(project_dir.path().join(".git").exists());
assert!(project_dir.path().join("flock-requirements.md").exists());
}
#[test]
fn test_git_clone_functionality() {
let project_dir = create_test_project("clone-test");
let workspace_dir = TempDir::new().unwrap();
// Manually test git cloning (what flock mode does internally)
let segment_dir = workspace_dir.path().join("segment-1");
let output = Command::new("git")
.arg("clone")
.arg(project_dir.path())
.arg(&segment_dir)
.output()
.expect("Failed to run git clone");
assert!(output.status.success(), "git clone failed: {:?}", output);
// Verify the clone
assert!(segment_dir.exists());
assert!(segment_dir.join(".git").exists());
assert!(segment_dir.join("flock-requirements.md").exists());
assert!(segment_dir.join("README.md").exists());
// Verify it's a proper git repo
let output = Command::new("git")
.args(["log", "--oneline"])
.current_dir(&segment_dir)
.output()
.expect("Failed to run git log");
assert!(output.status.success());
let log = String::from_utf8_lossy(&output.stdout);
assert!(log.contains("Initial commit"));
}
#[test]
fn test_multiple_segment_clones() {
let project_dir = create_test_project("multi-clone-test");
let workspace_dir = TempDir::new().unwrap();
// Clone multiple segments
for i in 1..=2 {
let segment_dir = workspace_dir.path().join(format!("segment-{}", i));
let output = Command::new("git")
.arg("clone")
.arg(project_dir.path())
.arg(&segment_dir)
.output()
.expect("Failed to run git clone");
assert!(output.status.success(), "git clone {} failed", i);
assert!(segment_dir.exists());
assert!(segment_dir.join(".git").exists());
assert!(segment_dir.join("flock-requirements.md").exists());
}
// Verify both segments exist and are independent
let segment1 = workspace_dir.path().join("segment-1");
let segment2 = workspace_dir.path().join("segment-2");
assert!(segment1.exists());
assert!(segment2.exists());
// Modify segment 1
fs::write(segment1.join("test.txt"), "segment 1")
.expect("Failed to write to segment 1");
// Verify segment 2 is unaffected
assert!(!segment2.join("test.txt").exists());
}
#[test]
fn test_segment_requirements_creation() {
let project_dir = create_test_project("segment-req-test");
let workspace_dir = TempDir::new().unwrap();
// Clone a segment
let segment_dir = workspace_dir.path().join("segment-1");
Command::new("git")
.arg("clone")
.arg(project_dir.path())
.arg(&segment_dir)
.output()
.expect("Failed to clone");
// Create segment-requirements.md (what flock mode does)
let segment_requirements = "# Module A\n\nImplement module A functionality\n";
fs::write(segment_dir.join("segment-requirements.md"), segment_requirements)
.expect("Failed to write segment requirements");
// Verify it was created
assert!(segment_dir.join("segment-requirements.md").exists());
let content = fs::read_to_string(segment_dir.join("segment-requirements.md"))
.expect("Failed to read segment requirements");
assert!(content.contains("Module A"));
}
#[test]
fn test_status_file_operations() {
use g3_ensembles::FlockStatus;
let temp_dir = TempDir::new().unwrap();
let status_file = temp_dir.path().join("flock-status.json");
// Create a status
let status = FlockStatus::new(
"test-session".to_string(),
PathBuf::from("/test/project"),
PathBuf::from("/test/workspace"),
2,
);
// Save to file
status
.save_to_file(&status_file)
.expect("Failed to save status");
// Verify file exists
assert!(status_file.exists());
// Load from file
let loaded = FlockStatus::load_from_file(&status_file).expect("Failed to load status");
assert_eq!(loaded.session_id, "test-session");
assert_eq!(loaded.num_segments, 2);
}
#[test]
fn test_json_extraction() {
// Test the JSON extraction logic used in partition_requirements
let test_cases = vec![
(
"Here is the result: [{\"module_name\": \"test\"}]",
Some("[{\"module_name\": \"test\"}]"),
),
(
"```json\n[{\"module_name\": \"test\"}]\n```",
Some("[{\"module_name\": \"test\"}]"),
),
(
"Some text before\n[{\"a\": 1}, {\"b\": 2}]\nSome text after",
Some("[{\"a\": 1}, {\"b\": 2}]"),
),
("No JSON here", None),
];
for (input, expected) in test_cases {
let result = extract_json_array(input);
match expected {
Some(exp) => {
assert!(result.is_some(), "Failed to extract from: {}", input);
assert_eq!(result.unwrap(), exp);
}
None => {
assert!(result.is_none(), "Should not extract from: {}", input);
}
}
}
}
// Helper function to extract JSON array (mimics the logic in flock.rs)
fn extract_json_array(output: &str) -> Option<String> {
if let Some(start) = output.find('[') {
if let Some(end) = output.rfind(']') {
if end > start {
return Some(output[start..=end].to_string());
}
}
}
None
}
#[test]
fn test_partition_json_parsing() {
// Test parsing of partition JSON
let json = r#"[
{
"module_name": "core-library",
"requirements": "Build the core library with basic functionality",
"dependencies": []
},
{
"module_name": "cli-tool",
"requirements": "Create a CLI tool that uses the core library",
"dependencies": ["core-library"]
}
]"#;
let partitions: Vec<serde_json::Value> =
serde_json::from_str(json).expect("Failed to parse JSON");
assert_eq!(partitions.len(), 2);
assert_eq!(partitions[0]["module_name"], "core-library");
assert_eq!(partitions[1]["module_name"], "cli-tool");
assert_eq!(partitions[1]["dependencies"][0], "core-library");
}
#[test]
fn test_requirements_file_content() {
let project_dir = create_test_project("content-test");
let requirements_path = project_dir.path().join("flock-requirements.md");
let content = fs::read_to_string(&requirements_path).expect("Failed to read requirements");
// Verify content structure
assert!(content.contains("# content-test Test Project"));
assert!(content.contains("## Module A"));
assert!(content.contains("## Module B"));
assert!(content.contains("Hello from Module A"));
assert!(content.contains("Hello from Module B"));
}
#[test]
fn test_git_repo_independence() {
let project_dir = create_test_project("independence-test");
let workspace_dir = TempDir::new().unwrap();
// Clone two segments
let segment1 = workspace_dir.path().join("segment-1");
let segment2 = workspace_dir.path().join("segment-2");
Command::new("git")
.arg("clone")
.arg(project_dir.path())
.arg(&segment1)
.output()
.expect("Failed to clone segment 1");
Command::new("git")
.arg("clone")
.arg(project_dir.path())
.arg(&segment2)
.output()
.expect("Failed to clone segment 2");
// Make a commit in segment 1
fs::write(segment1.join("file1.txt"), "content 1").expect("Failed to write file1");
Command::new("git")
.args(["add", "file1.txt"])
.current_dir(&segment1)
.output()
.expect("Failed to git add");
Command::new("git")
.args(["commit", "-m", "Add file1"])
.current_dir(&segment1)
.output()
.expect("Failed to commit in segment 1");
// Make a different commit in segment 2
fs::write(segment2.join("file2.txt"), "content 2").expect("Failed to write file2");
Command::new("git")
.args(["add", "file2.txt"])
.current_dir(&segment2)
.output()
.expect("Failed to git add");
Command::new("git")
.args(["commit", "-m", "Add file2"])
.current_dir(&segment2)
.output()
.expect("Failed to commit in segment 2");
// Verify they have different commits
let log1 = Command::new("git")
.args(["log", "--oneline"])
.current_dir(&segment1)
.output()
.expect("Failed to get log 1");
let log2 = Command::new("git")
.args(["log", "--oneline"])
.current_dir(&segment2)
.output()
.expect("Failed to get log 2");
let log1_str = String::from_utf8_lossy(&log1.stdout);
let log2_str = String::from_utf8_lossy(&log2.stdout);
assert!(log1_str.contains("Add file1"));
assert!(!log1_str.contains("Add file2"));
assert!(log2_str.contains("Add file2"));
assert!(!log2_str.contains("Add file1"));
// Verify files exist only in their respective segments
assert!(segment1.join("file1.txt").exists());
assert!(!segment1.join("file2.txt").exists());
assert!(segment2.join("file2.txt").exists());
assert!(!segment2.join("file1.txt").exists());
}

View File

@@ -5,6 +5,17 @@ use tempfile::NamedTempFile;
use std::io::Write;
use tracing::{info, debug, error};
/// Expand tilde (~) in a path to the user's home directory
fn expand_tilde(path: &str) -> String {
if path.starts_with("~") {
if let Some(home) = std::env::var_os("HOME") {
let home_str = home.to_string_lossy();
return path.replacen("~", &home_str, 1);
}
}
path.to_string()
}
pub struct CodeExecutor {
// Future: add configuration for execution limits, sandboxing, etc.
}
@@ -241,11 +252,33 @@ impl CodeExecutor {
&self,
code: &str,
receiver: &R
) -> Result<ExecutionResult> {
self.execute_bash_streaming_in_dir(code, receiver, None).await
}
/// Execute bash command with streaming output in a specific directory
pub async fn execute_bash_streaming_in_dir<R: OutputReceiver>(
&self,
code: &str,
receiver: &R,
working_dir: Option<&str>,
) -> Result<ExecutionResult> {
use std::process::Stdio;
use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command as TokioCommand;
// CRITICAL DEBUG: Print to stderr so it's always visible
debug!("========== execute_bash_streaming_in_dir START ==========");
debug!("Code to execute: {}", code);
debug!("Working directory parameter: {:?}", working_dir);
debug!("FULL DIAGNOSTIC: code='{}', working_dir={:?}", code, working_dir);
if let Some(dir) = working_dir {
debug!("Working dir exists check: {}", std::path::Path::new(dir).exists());
debug!("Working dir is_dir check: {}", std::path::Path::new(dir).is_dir());
}
debug!("Current process working directory: {:?}", std::env::current_dir());
// Check if this is a detached/daemon command that should run independently
// Look for patterns like: setsid, nohup with &, or explicit backgrounding with disown
let is_detached = code.trim_start().starts_with("setsid ")
@@ -255,10 +288,17 @@ impl CodeExecutor {
if is_detached {
// For detached commands, just spawn and return immediately
TokioCommand::new("bash")
.arg("-c")
.arg(code)
.spawn()?;
let mut cmd = TokioCommand::new("bash");
cmd.arg("-c")
.arg(code);
// Set working directory if provided
if let Some(dir) = working_dir {
let expanded_dir = expand_tilde(dir);
cmd.current_dir(&expanded_dir);
}
cmd.spawn()?;
// Don't wait for the process - it's meant to run independently
return Ok(ExecutionResult {
@@ -269,12 +309,33 @@ impl CodeExecutor {
});
}
let mut child = TokioCommand::new("bash")
.arg("-c")
let mut cmd = TokioCommand::new("bash");
cmd.arg("-c")
.arg(code)
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()?;
.stderr(Stdio::piped());
// Set working directory if provided
if let Some(dir) = working_dir {
debug!("Setting current_dir on command to: {}", dir);
let expanded_dir = expand_tilde(dir);
debug!("Expanded working dir: {}", expanded_dir);
debug!("Expanded dir exists: {}", std::path::Path::new(&expanded_dir).exists());
debug!("Expanded dir is_dir: {}", std::path::Path::new(&expanded_dir).is_dir());
cmd.current_dir(&expanded_dir);
}
debug!("About to spawn command...");
let spawn_result = cmd.spawn();
debug!("Spawn result: {:?}", spawn_result.is_ok());
let mut child = match spawn_result {
Ok(c) => c,
Err(e) => {
debug!("SPAWN ERROR: {:?}", e);
return Err(e.into());
}
};
debug!("Command spawned successfully");
let stdout = child.stdout.take().unwrap();
let stderr = child.stderr.take().unwrap();
@@ -322,12 +383,23 @@ impl CodeExecutor {
let status = child.wait().await?;
Ok(ExecutionResult {
let result = ExecutionResult {
stdout: stdout_output.join("\n"),
stderr: stderr_output.join("\n"),
exit_code: status.code().unwrap_or(-1),
success: status.success(),
})
};
debug!("========== execute_bash_streaming_in_dir END ==========");
debug!("Exit code: {}", result.exit_code);
debug!("Success: {}", result.success);
debug!("Stdout length: {}", result.stdout.len());
debug!("Stderr length: {}", result.stderr.len());
if !result.stderr.is_empty() {
debug!("Stderr content: {}", result.stderr);
}
Ok(result)
}
}

View File

@@ -0,0 +1,14 @@
[package]
name = "g3-planner"
version = "0.1.0"
edition = "2021"
description = "Fast-discovery planner for G3 AI coding agent"
[dependencies]
g3-providers = { path = "../g3-providers" }
serde = { workspace = true }
serde_json = { workspace = true }
const_format = "0.2"
anyhow = { workspace = true }
tokio = { workspace = true }
chrono = { version = "0.4", features = ["serde"] }

View File

@@ -0,0 +1,724 @@
//! Code exploration module for analyzing codebases
//!
//! This module provides functions to explore and analyze codebases
//! for various programming languages, returning structured reports
//! about the code structure.
use std::path::Path;
use std::process::Command;
/// Main entry point for exploring a codebase at the given path.
/// Detects which languages are present and generates a comprehensive report.
pub fn explore_codebase(path: &str) -> String {
let path = expand_tilde(path);
let mut report = String::new();
let mut languages_found = Vec::new();
// Check for each language and add to report if found
if has_rust_files(&path) {
languages_found.push("Rust".to_string());
report.push_str(&explore_rust(&path));
}
if has_java_files(&path) {
languages_found.push("Java".to_string());
report.push_str(&explore_java(&path));
}
if has_kotlin_files(&path) {
languages_found.push("Kotlin".to_string());
report.push_str(&explore_kotlin(&path));
}
if has_swift_files(&path) {
languages_found.push("Swift".to_string());
report.push_str(&explore_swift(&path));
}
if has_go_files(&path) {
languages_found.push("Go".to_string());
report.push_str(&explore_go(&path));
}
if has_python_files(&path) {
languages_found.push("Python".to_string());
report.push_str(&explore_python(&path));
}
if has_typescript_files(&path) {
languages_found.push("TypeScript".to_string());
report.push_str(&explore_typescript(&path));
}
if has_javascript_files(&path) {
languages_found.push("JavaScript".to_string());
report.push_str(&explore_javascript(&path));
}
if has_cpp_files(&path) {
languages_found.push("C/C++".to_string());
report.push_str(&explore_cpp(&path));
}
if has_markdown_files(&path) {
languages_found.push("Markdown".to_string());
report.push_str(&explore_markdown(&path));
}
if has_yaml_files(&path) {
languages_found.push("YAML".to_string());
report.push_str(&explore_yaml(&path));
}
if has_sql_files(&path) {
languages_found.push("SQL".to_string());
report.push_str(&explore_sql(&path));
}
if has_ruby_files(&path) {
languages_found.push("Ruby".to_string());
report.push_str(&explore_ruby(&path));
}
if languages_found.is_empty() {
report.push_str("No recognized programming languages found in the codebase.\n");
} else {
let header = format!(
"=== CODEBASE ANALYSIS ===\nLanguages detected: {}\n\n",
languages_found.join(", ")
);
report = header + &report;
}
report
}
/// Expand tilde to home directory
fn expand_tilde(path: &str) -> String {
if path.starts_with("~/") {
if let Some(home) = std::env::var_os("HOME") {
return path.replacen("~", &home.to_string_lossy(), 1);
}
}
path.to_string()
}
/// Run a shell command and return its output
fn run_command(cmd: &str, working_dir: &str) -> String {
let output = Command::new("sh")
.arg("-c")
.arg(cmd)
.current_dir(working_dir)
.output();
match output {
Ok(out) => {
let stdout = String::from_utf8_lossy(&out.stdout);
let stderr = String::from_utf8_lossy(&out.stderr);
if !stdout.is_empty() {
stdout.to_string()
} else if !stderr.is_empty() {
format!("(stderr): {}", stderr)
} else {
String::new()
}
}
Err(e) => format!("Error running command: {}", e),
}
}
/// Check if files with given extension exist
fn has_files_with_extension(path: &str, extension: &str) -> bool {
let cmd = format!(
"find . -name '.git' -prune -o -type f -name '*.{}' -print | head -1",
extension
);
!run_command(&cmd, path).trim().is_empty()
}
// Language detection functions
fn has_rust_files(path: &str) -> bool {
has_files_with_extension(path, "rs") || Path::new(path).join("Cargo.toml").exists()
}
fn has_java_files(path: &str) -> bool {
has_files_with_extension(path, "java")
}
fn has_kotlin_files(path: &str) -> bool {
has_files_with_extension(path, "kt") || has_files_with_extension(path, "kts")
}
fn has_swift_files(path: &str) -> bool {
has_files_with_extension(path, "swift")
}
fn has_go_files(path: &str) -> bool {
has_files_with_extension(path, "go")
}
fn has_python_files(path: &str) -> bool {
has_files_with_extension(path, "py")
}
fn has_typescript_files(path: &str) -> bool {
has_files_with_extension(path, "ts") || has_files_with_extension(path, "tsx")
}
fn has_javascript_files(path: &str) -> bool {
has_files_with_extension(path, "js") || has_files_with_extension(path, "jsx")
}
fn has_cpp_files(path: &str) -> bool {
has_files_with_extension(path, "cpp")
|| has_files_with_extension(path, "cc")
|| has_files_with_extension(path, "c")
|| has_files_with_extension(path, "h")
|| has_files_with_extension(path, "hpp")
}
fn has_markdown_files(path: &str) -> bool {
has_files_with_extension(path, "md")
}
fn has_yaml_files(path: &str) -> bool {
has_files_with_extension(path, "yaml") || has_files_with_extension(path, "yml")
}
fn has_sql_files(path: &str) -> bool {
has_files_with_extension(path, "sql")
}
fn has_ruby_files(path: &str) -> bool {
has_files_with_extension(path, "rb")
}
/// Explore Rust codebase
pub fn explore_rust(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== RUST ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.rs' . 2>/dev/null | grep -v '/target/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Dependencies (Cargo.toml)
report.push_str("--- Dependencies (Cargo.toml) ---\n");
let cargo = run_command("cat Cargo.toml 2>/dev/null | head -50", path);
report.push_str(&cargo);
report.push('\n');
// Data structures
report.push_str("--- Data Structures (Structs, Enums, Types) ---\n");
let structs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union) ' . 2>/dev/null | grep -v '/target/' | head -100"#,
path,
);
report.push_str(&structs);
report.push('\n');
// Traits and implementations
report.push_str("--- Traits & Implementations ---\n");
let traits = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?trait |^impl ' . 2>/dev/null | grep -v '/target/' | head -100"#,
path,
);
report.push_str(&traits);
report.push('\n');
// Public functions
report.push_str("--- Public Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^pub (async )?fn ' . 2>/dev/null | grep -v '/target/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Java codebase
pub fn explore_java(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== JAVA ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.java' . 2>/dev/null | grep -v '/build/' | grep -v '/target/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Build files
report.push_str("--- Build Configuration ---\n");
let build = run_command(
"cat pom.xml 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50",
path,
);
report.push_str(&build);
report.push('\n');
// Classes and interfaces
report.push_str("--- Classes & Interfaces ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^(public |private |protected )?(abstract )?(class|interface|enum|record) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Public methods
report.push_str("--- Public Methods ---\n");
let methods = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.java' '^\s+public .+\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&methods);
report.push('\n');
report
}
/// Explore Kotlin codebase
pub fn explore_kotlin(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== KOTLIN ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.kt' -g '*.kts' . 2>/dev/null | grep -v '/build/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Build files
report.push_str("--- Build Configuration ---\n");
let build = run_command("cat build.gradle.kts 2>/dev/null | head -50 || cat build.gradle 2>/dev/null | head -50", path);
report.push_str(&build);
report.push('\n');
// Classes, objects, interfaces
report.push_str("--- Classes, Objects & Interfaces ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(data |sealed |open |abstract )?(class|interface|object|enum class) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.kt' '^(suspend |private |internal |public )?fun ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Swift codebase
pub fn explore_swift(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== SWIFT ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.swift' . 2>/dev/null | grep -v '/.build/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Package.swift
report.push_str("--- Package Configuration ---\n");
let pkg = run_command("cat Package.swift 2>/dev/null | head -50", path);
report.push_str(&pkg);
report.push('\n');
// Classes, structs, protocols
report.push_str("--- Types (Classes, Structs, Protocols, Enums) ---\n");
let types = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^(public |private |internal |open |final )?(class|struct|protocol|enum|actor) ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
path,
);
report.push_str(&types);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.swift' '^\s*(public |private |internal |open )?func ' . 2>/dev/null | grep -v '/.build/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Go codebase
pub fn explore_go(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== GO ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.go' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// go.mod
report.push_str("--- Module Configuration ---\n");
let gomod = run_command("cat go.mod 2>/dev/null | head -50", path);
report.push_str(&gomod);
report.push('\n');
// Types (structs, interfaces)
report.push_str("--- Types (Structs & Interfaces) ---\n");
let types = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^type .+ (struct|interface)' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&types);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.go' '^func ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Python codebase
pub fn explore_python(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== PYTHON ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.py' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | grep -v '/.venv/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Requirements/setup
report.push_str("--- Dependencies ---\n");
let deps = run_command(
"cat requirements.txt 2>/dev/null | head -30 || cat pyproject.toml 2>/dev/null | head -50 || cat setup.py 2>/dev/null | head -30",
path,
);
report.push_str(&deps);
report.push('\n');
// Classes
report.push_str("--- Classes ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^class ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions
report.push_str("--- Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.py' '^def |^async def ' . 2>/dev/null | grep -v '/__pycache__/' | grep -v '/venv/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore TypeScript codebase
pub fn explore_typescript(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== TYPESCRIPT ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.ts' -g '*.tsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// package.json
report.push_str("--- Package Configuration ---\n");
let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
report.push_str(&pkg);
report.push('\n');
// Types, interfaces, classes
report.push_str("--- Types, Interfaces & Classes ---\n");
let types = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (type|interface|class|enum|abstract class) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&types);
report.push('\n');
// Functions
report.push_str("--- Exported Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.ts' -g '*.tsx' '^export (async )?function |^export const .+ = (async )?\(' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore JavaScript codebase
pub fn explore_javascript(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== JAVASCRIPT ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.js' -g '*.jsx' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/dist/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// package.json
report.push_str("--- Package Configuration ---\n");
let pkg = run_command("cat package.json 2>/dev/null | head -50", path);
report.push_str(&pkg);
report.push('\n');
// Classes
report.push_str("--- Classes ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(default )?(class ) ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions
report.push_str("--- Exported Functions ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.js' -g '*.jsx' '^(export )?(async )?function |^module\.exports' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore C/C++ codebase
pub fn explore_cpp(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== C/C++ ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.c' -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' . 2>/dev/null | grep -v '/build/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Build files
report.push_str("--- Build Configuration ---\n");
let build = run_command(
"cat CMakeLists.txt 2>/dev/null | head -50 || cat Makefile 2>/dev/null | head -50",
path,
);
report.push_str(&build);
report.push('\n');
// Classes and structs
report.push_str("--- Classes & Structs ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.cpp' -g '*.cc' -g '*.h' -g '*.hpp' '^(class|struct|enum|union|typedef) ' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Functions (simplified pattern)
report.push_str("--- Function Declarations ---\n");
let funcs = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.h' -g '*.hpp' '^[a-zA-Z_][a-zA-Z0-9_<>: ]*\s+[a-zA-Z_][a-zA-Z0-9_]*\s*\(' . 2>/dev/null | grep -v '/build/' | head -100"#,
path,
);
report.push_str(&funcs);
report.push('\n');
report
}
/// Explore Markdown documentation
pub fn explore_markdown(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== MARKDOWN DOCUMENTATION ===\n\n");
// File structure
report.push_str("--- Documentation Files ---\n");
let files = run_command(
"rg --files -g '*.md' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
path,
);
report.push_str(&files);
report.push('\n');
// README content
report.push_str("--- README Overview ---\n");
let readme = run_command(
"cat README.md 2>/dev/null | head -100 || cat readme.md 2>/dev/null | head -100",
path,
);
report.push_str(&readme);
report.push('\n');
// Headers from all markdown files
report.push_str("--- Document Headers ---\n");
let headers = run_command(
r#"rg --no-heading --line-number --with-filename -g '*.md' '^#{1,3} ' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&headers);
report.push('\n');
report
}
/// Explore YAML configuration files
pub fn explore_yaml(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== YAML CONFIGURATION ===\n\n");
// File structure
report.push_str("--- YAML Files ---\n");
let files = run_command(
"rg --files -g '*.yaml' -g '*.yml' . 2>/dev/null | grep -v '/node_modules/' | grep -v '/vendor/' | sort | head -50",
path,
);
report.push_str(&files);
report.push('\n');
// Top-level keys from YAML files
report.push_str("--- Top-Level Keys ---\n");
let keys = run_command(
r#"rg --no-heading --line-number --with-filename -g '*.yaml' -g '*.yml' '^[a-zA-Z_][a-zA-Z0-9_-]*:' . 2>/dev/null | grep -v '/node_modules/' | head -100"#,
path,
);
report.push_str(&keys);
report.push('\n');
report
}
/// Explore SQL files
pub fn explore_sql(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== SQL ===\n\n");
// File structure
report.push_str("--- SQL Files ---\n");
let files = run_command(
"rg --files -g '*.sql' . 2>/dev/null | sort | head -50",
path,
);
report.push_str(&files);
report.push('\n');
// Tables
report.push_str("--- Table Definitions ---\n");
let tables = run_command(
r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE TABLE' . 2>/dev/null | head -100"#,
path,
);
report.push_str(&tables);
report.push('\n');
// Views and procedures
report.push_str("--- Views & Procedures ---\n");
let views = run_command(
r#"rg --no-heading --line-number --with-filename -i -g '*.sql' 'CREATE (VIEW|PROCEDURE|FUNCTION)' . 2>/dev/null | head -100"#,
path,
);
report.push_str(&views);
report.push('\n');
report
}
/// Explore Ruby codebase
pub fn explore_ruby(path: &str) -> String {
let mut report = String::new();
report.push_str("\n=== RUBY ===\n\n");
// File structure
report.push_str("--- File Structure ---\n");
let files = run_command(
"rg --files -g '*.rb' . 2>/dev/null | grep -v '/vendor/' | sort | head -100",
path,
);
report.push_str(&files);
report.push('\n');
// Gemfile
report.push_str("--- Dependencies (Gemfile) ---\n");
let gemfile = run_command("cat Gemfile 2>/dev/null | head -50", path);
report.push_str(&gemfile);
report.push('\n');
// Classes and modules
report.push_str("--- Classes & Modules ---\n");
let classes = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^(class|module) ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&classes);
report.push('\n');
// Methods
report.push_str("--- Methods ---\n");
let methods = run_command(
r#"rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rb' '^\s*def ' . 2>/dev/null | grep -v '/vendor/' | head -100"#,
path,
);
report.push_str(&methods);
report.push('\n');
report
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_expand_tilde() {
let path = expand_tilde("~/test");
assert!(!path.starts_with("~"));
}
#[test]
fn test_explore_codebase_returns_string() {
// Test with current directory
let result = explore_codebase(".");
assert!(!result.is_empty());
}
}

View File

@@ -0,0 +1,325 @@
//! g3-planner: Fast-discovery planner for G3 AI coding agent
//!
//! This crate provides functionality to generate initial discovery tool calls
//! that are injected into the conversation before the first LLM turn.
mod code_explore;
pub mod prompts;
pub use code_explore::explore_codebase;
use anyhow::Result;
use g3_providers::{CompletionRequest, LLMProvider, Message, MessageRole};
use chrono::Local;
use std::fs::{self, OpenOptions};
use std::io::Write;
use prompts::{DISCOVERY_REQUIREMENTS_PROMPT, DISCOVERY_SYSTEM_PROMPT};
/// Type alias for a status callback function
pub type StatusCallback = Box<dyn Fn(&str) + Send + Sync>;
/// Generates initial discovery messages for fast codebase exploration.
///
/// This function:
/// 1. Runs explore_codebase to get a codebase report
/// 2. Sends the report to the LLM with DISCOVERY_SYSTEM_PROMPT
/// 3. Extracts shell commands from the LLM response
/// 4. Returns Assistant messages with tool calls for each command
///
/// # Arguments
///
/// * `codebase_path` - The path to the codebase to explore
/// * `provider` - An LLM provider to query for exploration commands
/// * `requirements_text` - Optional requirements text to include in the discovery prompt
/// * `status_callback` - Optional callback for status updates
///
/// # Returns
///
/// A `Result<Vec<Message>>` containing Assistant messages with JSON tool call strings.
pub async fn get_initial_discovery_messages(
codebase_path: &str,
requirements_text: Option<&str>,
provider: &dyn LLMProvider,
status_callback: Option<&StatusCallback>,
) -> Result<Vec<Message>> {
// Helper to call status callback if provided
let status = |msg: &str| {
if let Some(cb) = status_callback {
cb(msg);
}
};
status("🔍 Starting code discovery...");
// Step 1: Run explore_codebase to get the codebase report
let codebase_report = explore_codebase(codebase_path);
// Write the codebase report to logs directory
write_code_report(&codebase_report)?;
// Step 2: Build the prompt with the codebase report appended
let user_prompt = if let Some(requirements) = requirements_text {
format!(
"{}\n\n
=== REQUIREMENTS ===\n\n{}\n\n
=== CODEBASE REPORT ===\n\n{}",
DISCOVERY_REQUIREMENTS_PROMPT, requirements, codebase_report
)
} else {
format!(
"{}\n\n=== CODEBASE REPORT ===\n\n{}",
DISCOVERY_REQUIREMENTS_PROMPT, codebase_report
)
};
// Step 3: Create messages for the LLM
let messages = vec![
Message::new(MessageRole::System, DISCOVERY_SYSTEM_PROMPT.to_string()),
Message::new(MessageRole::User, user_prompt),
];
// Step 4: Send to LLM
let request = CompletionRequest {
messages,
max_tokens: Some(provider.max_tokens()),
temperature: Some(provider.temperature()),
stream: false,
tools: None,
};
status("🤖 Calling LLM for discovery commands...");
let response = provider.complete(request).await?;
// Step 5: Extract shell commands from the response
let shell_commands = extract_shell_commands(&response.content);
status(&format!("📋 Extracted {} discovery commands", shell_commands.len()));
// Write the discovery commands to logs directory
write_discovery_commands(&shell_commands)?;
// Step 6: Format as tool messages
let tool_messages = shell_commands
.into_iter()
.map(|cmd| create_tool_message("shell", &cmd))
.collect();
Ok(tool_messages)
}
/// Creates an Assistant message with a tool call in g3's JSON format.
pub fn create_tool_message(tool: &str, command: &str) -> Message {
let tool_call = serde_json::json!({
"tool": tool,
"args": {
"command": command
}
});
Message::new(MessageRole::Assistant, tool_call.to_string())
}
/// Extract shell commands from the LLM response.
/// Looks for {{CODE EXPLORATION COMMANDS}} section and extracts commands from code blocks.
pub fn extract_shell_commands(response: &str) -> Vec<String> {
let mut commands = Vec::new();
let section_marker = "{{CODE EXPLORATION COMMANDS}}";
let section_start = match response.find(section_marker) {
Some(pos) => pos + section_marker.len(),
None => return commands,
};
let section_content = &response[section_start..];
let mut in_code_block = false;
let mut current_block = String::new();
for line in section_content.lines() {
let trimmed = line.trim();
if trimmed.starts_with("```") {
if in_code_block {
// End of code block - extract commands
for cmd_line in current_block.lines() {
let cmd = cmd_line.trim();
if !cmd.is_empty() && !cmd.starts_with('#') {
commands.push(cmd.to_string());
}
}
current_block.clear();
}
in_code_block = !in_code_block;
} else if in_code_block {
current_block.push_str(line);
current_block.push('\n');
}
}
commands
}
/// Extract the summary section from the LLM response
pub fn extract_summary(response: &str) -> Option<String> {
let section_marker = "{{SUMMARY BASED ON INITIAL INFO}}";
let section_start = match response.find(section_marker) {
Some(pos) => pos + section_marker.len(),
None => return None,
};
let section_content = &response[section_start..];
let section_end = section_content.find("{{").unwrap_or(section_content.len());
let summary = section_content[..section_end].trim().to_string();
if summary.is_empty() {
None
} else {
Some(summary)
}
}
/// Write the codebase report to logs directory
fn write_code_report(report: &str) -> Result<()> {
// Ensure logs directory exists
fs::create_dir_all("logs")?;
// Generate timestamp in same format as tool_calls log
let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
let filename = format!("logs/code_report_{}.log", timestamp);
// Write the report to file
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(&filename)?;
file.write_all(report.as_bytes())?;
file.flush()?;
Ok(())
}
/// Write the discovery commands to logs directory
fn write_discovery_commands(commands: &[String]) -> Result<()> {
// Ensure logs directory exists
fs::create_dir_all("logs")?;
// Generate timestamp in same format as tool_calls log
let timestamp = Local::now().format("%Y%m%d_%H%M%S").to_string();
let filename = format!("logs/discovery_commands_{}.log", timestamp);
// Write the commands to file
let mut file = OpenOptions::new()
.create(true)
.write(true)
.truncate(true)
.open(&filename)?;
// Write header
file.write_all(b"# Discovery Commands\n")?;
file.write_all(b"# Generated by g3-planner\n\n")?;
// Write each command on a separate line
for cmd in commands {
file.write_all(cmd.as_bytes())?;
file.write_all(b"\n")?;
}
file.flush()?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_tool_message_format() {
let msg = create_tool_message("shell", "ls -la");
assert!(matches!(msg.role, MessageRole::Assistant));
let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
assert_eq!(parsed["tool"], "shell");
assert_eq!(parsed["args"]["command"], "ls -la");
}
#[test]
fn test_extract_shell_commands_basic() {
let response = r#"
Some text here.
{{CODE EXPLORATION COMMANDS}}
```bash
ls -la
cat README.md
rg --files -g '*.rs'
```
More text.
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 3);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat README.md");
assert_eq!(commands[2], "rg --files -g '*.rs'");
}
#[test]
fn test_extract_shell_commands_with_comments() {
let response = r#"
{{CODE EXPLORATION COMMANDS}}
```
# This is a comment
ls -la
# Another comment
cat file.txt
```
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 2);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat file.txt");
}
#[test]
fn test_extract_shell_commands_no_section() {
let response = "Some response without the expected section.";
let commands = extract_shell_commands(response);
assert!(commands.is_empty());
}
#[test]
fn test_extract_summary() {
let response = r#"
{{SUMMARY BASED ON INITIAL INFO}}
This is a summary of the codebase.
It has multiple lines.
{{CODE EXPLORATION COMMANDS}}
```
ls -la
```
"#;
let summary = extract_summary(response);
assert!(summary.is_some());
let summary_text = summary.unwrap();
assert!(summary_text.contains("This is a summary"));
assert!(summary_text.contains("multiple lines"));
}
#[test]
fn test_extract_summary_no_section() {
let response = "Response without summary section.";
let summary = extract_summary(response);
assert!(summary.is_none());
}
}

View File

@@ -0,0 +1,37 @@
//! Prompts used for discovery phase
/// System prompt for discovery mode - instructs the LLM to analyze codebase and generate exploration commands
pub const DISCOVERY_SYSTEM_PROMPT: &str = r#"You are an expert code analyst. Your task is to analyze a codebase structure and generate shell commands to explore it further.
You will receive:
1. User requirements describing what needs to be implemented
2. A codebase report showing the structure and key elements of the codebase
Your job is to:
1. Understand the requirements and identify what parts of the codebase are relevant
2. Generate shell commands to explore those parts in more detail
IMPORTANT: Do NOT attempt to implement anything. Only generate exploration commands."#;
/// Discovery prompt template - used when we have a codebase report.
/// The codebase report should be appended after this prompt.
pub const DISCOVERY_REQUIREMENTS_PROMPT: &str = r#"**CRITICAL**: DO ABSOLUTELY NOT ATTEMPT TO IMPLEMENT THESE REQUIREMENTS AT THIS POINT. ONLY USE THEM TO
UNDERSTAND WHICH PARTS OF THE CODE YOU MIGHT BE INTERESTED IN, AND WHAT SEARCH/GREP EXPRESSIONS YOU MIGHT WANT TO USE
TO GET A BETTER UNDERSTANDING OF THE CODEBASE.
Your task is to analyze the codebase overview provided below and generate shell commands to explore it further - in particular, those
you deem most relevant to the requirements given below.
Your output MUST include:
1. A summary report. Use the heading {{SUMMARY BASED ON INITIAL INFO}}.
- retain as much information of that as you consider relevant to the requirements, and for making an implementation plan.
- Ideally that should not be more than 10000 tokens.
2. A list of shell commands to explore the code. Use the heading {{CODE EXPLORATION COMMANDS}}.
- Try plan ahead for what you need for a deep dive into the code. Make sure the information is sparing.
- Carefully consider which commands give you the most relevant information, pick the top 25 commands.
- Use tools like `ls`, `rg` (ripgrep), `grep`, `sed`, `cat`, `head`, `tail` etc.
- Focus on commands that will help understand the code STRUCTURE without dumping large sections of file.
- e.g. for Rust you might try `rg --no-heading --line-number --with-filename --max-filesize 500K -g '*.rs' '^(pub )?(struct|enum|type|union)`
- Mark the beginning and end of the commands with "```".
DO NOT ADD ANY COMMENTS OR OTHER EXPLANATION IN THE COMMANDS SECTION, JUST INCLUDE THE SHELL COMMANDS."#;

View File

@@ -0,0 +1,60 @@
//! Integration tests for logging functionality
use std::fs;
use std::path::Path;
#[test]
fn test_log_files_created() {
// This test verifies that the logging functions work correctly
// by checking that files can be created in the logs directory
// Clean up any existing test logs
let _ = fs::remove_dir_all("logs");
// Create logs directory
fs::create_dir_all("logs").expect("Failed to create logs directory");
// Verify directory exists
assert!(Path::new("logs").exists());
assert!(Path::new("logs").is_dir());
// Test writing a code report
let test_report = "Test codebase report\nLine 2\nLine 3";
let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
let report_filename = format!("logs/code_report_{}.log", timestamp);
fs::write(&report_filename, test_report).expect("Failed to write code report");
assert!(Path::new(&report_filename).exists());
let content = fs::read_to_string(&report_filename).expect("Failed to read code report");
assert_eq!(content, test_report);
// Test writing discovery commands
let commands_filename = format!("logs/discovery_commands_{}.log", timestamp);
let test_commands = "# Discovery Commands\n# Generated by g3-planner\n\nls -la\ncat README.md\n";
fs::write(&commands_filename, test_commands).expect("Failed to write discovery commands");
assert!(Path::new(&commands_filename).exists());
let content = fs::read_to_string(&commands_filename).expect("Failed to read discovery commands");
assert_eq!(content, test_commands);
// Clean up
let _ = fs::remove_file(&report_filename);
let _ = fs::remove_file(&commands_filename);
}
#[test]
fn test_filename_format() {
// Verify the filename format matches the tool_calls log format
let timestamp = chrono::Local::now().format("%Y%m%d_%H%M%S").to_string();
// Check format: YYYYMMDD_HHMMSS
assert_eq!(timestamp.len(), 15); // 8 digits + underscore + 6 digits
assert!(timestamp.contains('_'));
let parts: Vec<&str> = timestamp.split('_').collect();
assert_eq!(parts.len(), 2);
assert_eq!(parts[0].len(), 8); // YYYYMMDD
assert_eq!(parts[1].len(), 6); // HHMMSS
}

View File

@@ -0,0 +1,103 @@
//! Integration tests for g3-planner
use g3_planner::{create_tool_message, explore_codebase, extract_shell_commands};
use g3_providers::MessageRole;
#[test]
fn test_create_tool_message_format() {
let msg = create_tool_message("shell", "ls -la");
assert!(matches!(msg.role, MessageRole::Assistant));
let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
assert_eq!(parsed["tool"], "shell");
assert_eq!(parsed["args"]["command"], "ls -la");
}
#[test]
fn test_explore_codebase_returns_report() {
// Test with current directory (should find Rust files in g3 project)
let report = explore_codebase(".");
// Should return a non-empty report
assert!(!report.is_empty(), "Report should not be empty");
// Should contain the codebase analysis header
assert!(
report.contains("CODEBASE ANALYSIS") || report.contains("No recognized"),
"Report should have analysis header or indicate no languages found"
);
}
#[test]
fn test_extract_shell_commands_basic() {
let response = r#"
Some text here.
{{CODE EXPLORATION COMMANDS}}
```bash
ls -la
cat README.md
rg --files -g '*.rs'
```
More text.
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 3);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat README.md");
assert_eq!(commands[2], "rg --files -g '*.rs'");
}
#[test]
fn test_extract_shell_commands_with_comments() {
let response = r#"
{{CODE EXPLORATION COMMANDS}}
```
# This is a comment
ls -la
# Another comment
cat file.txt
```
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 2);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat file.txt");
}
#[test]
fn test_extract_shell_commands_no_section() {
let response = "Some response without the expected section.";
let commands = extract_shell_commands(response);
assert!(commands.is_empty());
}
#[test]
fn test_extract_shell_commands_multiple_code_blocks() {
let response = r#"
{{CODE EXPLORATION COMMANDS}}
```bash
ls -la
```
Some explanation text.
```
cat README.md
head -50 src/main.rs
```
"#;
let commands = extract_shell_commands(response);
assert_eq!(commands.len(), 3);
assert_eq!(commands[0], "ls -la");
assert_eq!(commands[1], "cat README.md");
assert_eq!(commands[2], "head -50 src/main.rs");
}

View File

@@ -678,6 +678,14 @@ impl LLMProvider for AnthropicProvider {
// Anthropic supports cache control
true
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}
// Anthropic API request/response structures

View File

@@ -1055,6 +1055,14 @@ impl LLMProvider for DatabricksProvider {
fn supports_cache_control(&self) -> bool {
false
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}
// Databricks API request/response structures

View File

@@ -771,4 +771,12 @@ impl LLMProvider for EmbeddedProvider {
fn model(&self) -> &str {
&self.model_name
}
fn max_tokens(&self) -> u32 {
self.max_tokens
}
fn temperature(&self) -> f32 {
self.temperature
}
}

View File

@@ -26,6 +26,12 @@ pub trait LLMProvider: Send + Sync {
fn supports_cache_control(&self) -> bool {
false
}
/// Get the configured max_tokens for this provider
fn max_tokens(&self) -> u32;
/// Get the configured temperature for this provider
fn temperature(&self) -> f32;
}
#[derive(Debug, Clone, Serialize, Deserialize)]

View File

@@ -384,6 +384,14 @@ impl LLMProvider for OpenAIProvider {
// OpenAI models support native tool calling
true
}
fn max_tokens(&self) -> u32 {
self.max_tokens.unwrap_or(16000)
}
fn temperature(&self) -> f32 {
self._temperature.unwrap_or(0.1)
}
}
fn convert_messages(messages: &[Message]) -> Vec<serde_json::Value> {

View File

@@ -1,39 +0,0 @@
#!/bin/bash
# Test script for AI-enhanced interactive requirements mode
echo "Testing AI-enhanced interactive requirements mode..."
echo ""
# Create a test workspace
TEST_WORKSPACE="/tmp/g3-test-interactive-$(date +%s)"
mkdir -p "$TEST_WORKSPACE"
echo "Test workspace: $TEST_WORKSPACE"
echo ""
# Create sample brief input
BRIEF_INPUT="build a calculator cli in rust with basic operations"
echo "Brief input:"
echo "---"
echo "$BRIEF_INPUT"
echo "---"
echo ""
echo "This will:"
echo "1. Send brief input to AI"
echo "2. AI generates structured requirements.md"
echo "3. Show enhanced requirements"
echo "4. Prompt for confirmation (y/e/n)"
echo ""
echo "To test manually, run:"
echo "cargo run -- --autonomous --interactive-requirements --workspace $TEST_WORKSPACE"
echo ""
echo "Then type: $BRIEF_INPUT"
echo "Press Ctrl+D"
echo "Review the AI-generated requirements"
echo "Choose 'y' to proceed, 'e' to edit, or 'n' to cancel"
echo ""
echo "Test workspace will be at: $TEST_WORKSPACE"

View File

@@ -1,70 +0,0 @@
# Anthropic max_tokens Error Fix - Test Plan
## Changes Made
### 1. Fixed Context Window Size Detection
- **Problem**: Code used hardcoded 200k limit for Anthropic instead of configured max_tokens
- **Fix**: Modified `determine_context_length()` to check configured max_tokens first before falling back to defaults
- **Files**: `crates/g3-core/src/lib.rs` lines 923-945, 967-985
### 2. Added Thinning Before Summarization
- **Problem**: Code attempted summarization even when context window was nearly full
- **Fix**: Added logic to try thinning first when context usage is between 80-90%
- **Files**: `crates/g3-core/src/lib.rs` lines 2415-2439
### 3. Added Capacity Checks Before Summarization
- **Problem**: No validation that sufficient tokens remained for summarization
- **Fix**: Added capacity checks for all provider types with helpful error messages
- **Files**: `crates/g3-core/src/lib.rs` lines 2480-2520
### 4. Improved Error Messages
- **Problem**: Generic errors when summarization failed
- **Fix**: Specific error messages suggesting `/thinnify` and `/compact` commands
- **Files**: Multiple locations in summarization logic
### 5. Dynamic Buffer Calculation
- **Problem**: Fixed 5k buffer regardless of model size
- **Fix**: Proportional buffer (2.5% of model limit, min 1k, max 10k)
- **Files**: `crates/g3-core/src/lib.rs` line 2487
## Test Cases
### Test 1: Configured max_tokens Respected
```toml
# In g3.toml
[providers.anthropic]
api_key = "your-key"
model = "claude-3-5-sonnet-20241022"
max_tokens = 50000 # Should use this instead of 200k default
```
### Test 2: Thinning Before Summarization
- Fill context to 85% capacity
- Verify thinning is attempted before summarization
- Check that summarization is skipped if thinning resolves the issue
### Test 3: Capacity Error Handling
- Fill context to 98% capacity
- Verify helpful error message is shown instead of API error
- Check that `/thinnify` and `/compact` commands are suggested
### Test 4: Provider-Specific Handling
- Test with different providers (anthropic, databricks, embedded)
- Verify each uses appropriate capacity checks and buffers
## Expected Behavior
1. **No more max_tokens API errors** from Anthropic when context window is full
2. **Automatic thinning** when approaching capacity (80-90%)
3. **Clear error messages** with actionable suggestions when at capacity
4. **Respect configured limits** instead of hardcoded defaults
5. **Graceful degradation** with helpful user guidance
## Manual Testing Commands
```bash
# Test with small max_tokens to trigger the issue quickly
g3 --chat
# Then paste large amounts of text to fill context window
# Verify thinning and error handling work correctly
```