fix tests

cache_control removed from databricks
regression tests added
2025-11-19 12:42:37 +11:00 · 2025-11-19 12:15:49 +11:00 · 2025-11-19 11:32:14 +11:00 · 2025-11-18 23:23:49 +11:00 · 2025-11-18 22:38:52 +11:00
27 changed files with 925 additions and 2437 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1345,7 +1345,6 @@ dependencies = [
 "dirs 5.0.1",
 "g3-config",
 "g3-core",
 "g3-ensembles",
 "indicatif",
 "ratatui",
 "rustyline",
@@ -1463,23 +1462,6 @@ dependencies = [
 "walkdir",
 ]
 [[package]]
 name = "g3-ensembles"
 version = "0.1.0"
 dependencies = [
 "anyhow",
 "chrono",
 "clap",
 "g3-config",
 "g3-core",
 "serde",
 "serde_json",
 "tempfile",
 "tokio",
 "tracing",
 "uuid",
 ]
 [[package]]
 name = "g3-execution"
 version = "0.1.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -6,8 +6,7 @@ members = [
    "crates/g3-config",
    "crates/g3-execution",
    "crates/g3-computer-control",
-    "crates/g3-console",
+    "crates/g3-console"
    "crates/g3-ensembles"
 ]
 resolver = "2"
--- a/README.md
+++ b/README.md
@@ -96,7 +96,6 @@ These commands give you fine-grained control over context management, allowing y
  - Window listing and identification
 - **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
 - **Final Output**: Formatted result presentation
 - **Flock Mode**: Parallel multi-agent development for large projects - see [Flock Mode Guide](docs/FLOCK_MODE.md)
 ### Provider Flexibility
 - Support for multiple LLM providers through a unified interface
@@ -130,7 +129,6 @@ G3 is designed for:
 - API integration and testing
 - Documentation generation
 - Complex multi-step workflows
 - Parallel development of modular architectures
 - Desktop application automation and testing
 ## Getting Started
--- a/config.coach-player.example.toml
+++ b/config.coach-player.example.toml
@@ -11,12 +11,23 @@ model = "databricks-claude-sonnet-4"
 max_tokens = 4096
 temperature = 0.1
 use_oauth = true
 # cache_config = "ephemeral"  # Optional: Enable prompt caching for Claude models
                              # Options: "ephemeral", "5minute", "1hour"
                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
                                # The cache control will be automatically applied to:
                                # - The system prompt at the start of each session
                                # - Assistant responses after every 10 tool calls
                                # - 5minute costs $3/mtok, more details below
                                # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
 [providers.anthropic]
 api_key = "your-anthropic-api-key"
 model = "claude-3-haiku-20240307"  # Using a faster model for player
 max_tokens = 4096
 temperature = 0.3  # Slightly higher temperature for more creative implementations
 # cache_config = "ephemeral"  # Optional: Enable prompt caching
                              # Options: "ephemeral", "5minute", "1hour"
                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
 [agent]
 fallback_default_max_tokens = 8192
--- a/config.example.toml
+++ b/config.example.toml
@@ -14,6 +14,15 @@ max_tokens = 4096  # Per-request output limit (how many tokens the model can gen
                   # Note: This is different from max_context_length (total conversation history size)
 temperature = 0.1
 use_oauth = true
 # cache_config = "ephemeral"  # Optional: Enable prompt caching for Claude models on Databricks
                              # Options: "ephemeral", "5minute", "1hour"
                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
                                # The cache control will be automatically applied to:
                                # - The system prompt at the start of each session
                                # - Assistant responses after every 10 tool calls
                                # - 5minute costs $3/mtok, more details below
                                # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
 # Multiple OpenAI-compatible providers can be configured with custom names
 # Each provider gets its own section under [providers.openai_compatible.<name>]
--- a/crates/g3-cli/Cargo.toml
+++ b/crates/g3-cli/Cargo.toml
@@ -8,7 +8,6 @@ description = "CLI interface for G3 AI coding agent"
 g3-core = { path = "../g3-core" }
 g3-config = { path = "../g3-config" }
 clap = { workspace = true }
 g3-ensembles = { path = "../g3-ensembles" }
 tokio = { workspace = true }
 anyhow = { workspace = true }
 tracing = { workspace = true }
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -246,36 +246,11 @@ pub struct Cli {
    /// Enable WebDriver browser automation tools
    #[arg(long)]
    pub webdriver: bool,
    /// Enable flock mode - parallel multi-agent development
    #[arg(long, requires = "flock_workspace", requires = "segments")]
    pub project: Option<PathBuf>,
    /// Flock workspace directory (where segment copies will be created)
    #[arg(long, requires = "project")]
    pub flock_workspace: Option<PathBuf>,
    /// Number of segments to partition work into (for flock mode)
    #[arg(long, requires = "project")]
    pub segments: Option<usize>,
    /// Maximum turns per segment in flock mode (default: 5)
    #[arg(long, default_value = "5")]
    pub flock_max_turns: usize,
 }
 pub async fn run() -> Result<()> {
    let cli = Cli::parse();
    // Check if flock mode is enabled
    if let (Some(project_dir), Some(flock_workspace), Some(num_segments)) = 
        (&cli.project, &cli.flock_workspace, cli.segments) {
        // Run flock mode
        return run_flock_mode(project_dir.clone(), flock_workspace.clone(), num_segments, cli.flock_max_turns).await;
    }
    // Otherwise, continue with normal mode
    // Only initialize logging if not in retro mode
    if !cli.machine {
        // Initialize logging with filtering
@@ -464,39 +439,6 @@ pub async fn run() -> Result<()> {
    Ok(())
 }
 /// Run flock mode - parallel multi-agent development
 async fn run_flock_mode(
    project_dir: PathBuf,
    flock_workspace: PathBuf,
    num_segments: usize,
    max_turns: usize,
 ) -> Result<()> {
    let output = SimpleOutput::new();
    output.print("");
    output.print("🦅 G3 FLOCK MODE - Parallel Multi-Agent Development");
    output.print("");
    output.print(&format!("📁 Project: {}", project_dir.display()));
    output.print(&format!("🗂️  Workspace: {}", flock_workspace.display()));
    output.print(&format!("🔢 Segments: {}", num_segments));
    output.print(&format!("🔄 Max Turns per Segment: {}", max_turns));
    output.print("");
    // Create flock configuration
    let config = g3_ensembles::FlockConfig::new(project_dir, flock_workspace, num_segments)?
        .with_max_turns(max_turns);
    // Create and run flock mode
    let mut flock = g3_ensembles::FlockMode::new(config)?;
    match flock.run().await {
        Ok(_) => output.print("\n✅ Flock mode completed successfully"),
        Err(e) => output.print(&format!("\n❌ Flock mode failed: {}", e)),
    }
    Ok(())
 }
 /// Accumulative autonomous mode: accumulates requirements from user input
 /// and runs autonomous mode after each input
 async fn run_accumulative_mode(
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -40,6 +40,8 @@ pub struct AnthropicConfig {
    pub model: String,
    pub max_tokens: Option<u32>,
    pub temperature: Option<f32>,
    pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
    pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra)
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -3,6 +3,8 @@ pub mod error_handling;
 pub mod project;
 pub mod task_result;
 pub mod ui_writer;
 use std::process::exit;
 pub use task_result::TaskResult;
 #[cfg(test)]
@@ -23,7 +25,7 @@ use anyhow::Result;
 use g3_computer_control::WebDriverController;
 use g3_config::Config;
 use g3_execution::CodeExecutor;
-use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
+use g3_providers::{CacheControl, CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
 #[allow(unused_imports)]
 use regex::Regex;
 use serde::{Deserialize, Serialize};
@@ -423,18 +425,12 @@ Format this as a detailed but concise summary that can be used to resume the con
        self.used_tokens = 0;
        // Add the summary as a system message
-        let summary_message = Message {
+        let summary_message = Message::new(MessageRole::System, format!("Previous conversation summary:\n\n{}", summary));
            role: MessageRole::System,
            content: format!("Previous conversation summary:\n\n{}", summary),
        };
        self.add_message(summary_message);
        // Add the latest user message if provided
        if let Some(user_msg) = latest_user_message {
-            self.add_message(Message {
+            self.add_message(Message::new(MessageRole::User, user_msg));
                role: MessageRole::User,
                content: user_msg,
            });
        }
        let new_chars: usize = self
@@ -756,6 +752,7 @@ pub struct Agent<W: UiWriter> {
    safaridriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
    macax_controller:
        std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
    tool_call_count: usize,
 }
 impl<W: UiWriter> Agent<W> {
@@ -898,6 +895,8 @@ impl<W: UiWriter> Agent<W> {
                    Some(anthropic_config.model.clone()),
                    anthropic_config.max_tokens,
                    anthropic_config.temperature,
                    anthropic_config.cache_config.clone(),
                    anthropic_config.enable_1m_context,
                )?;
                providers.register(anthropic_provider);
            }
@@ -944,10 +943,7 @@ impl<W: UiWriter> Agent<W> {
        // If README content is provided, add it as the first system message
        if let Some(readme) = readme_content {
-            let readme_message = Message {
+            let readme_message = Message::new(MessageRole::System, readme);
                role: MessageRole::System,
                content: readme,
            };
            context_window.add_message(readme_message);
        }
@@ -1003,9 +999,23 @@ impl<W: UiWriter> Agent<W> {
                    None
                }))
            },
            tool_call_count: 0,
        })
    }
    /// Convert cache config string to CacheControl enum
    fn parse_cache_control(cache_config: &str) -> Option<CacheControl> {
        match cache_config {
            "ephemeral" => Some(CacheControl::ephemeral()),
            "5minute" => Some(CacheControl::five_minute()),
            "1hour" => Some(CacheControl::one_hour()),
            _ => {
                warn!("Invalid cache_config value: '{}'. Valid values are: ephemeral, 5minute, 1hour", cache_config);
                None
            }
        }
    }
    fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
        // First, check if there's a global max_context_length override in agent config
        if let Some(max_context_length) = config.agent.max_context_length {
@@ -1185,7 +1195,11 @@ impl<W: UiWriter> Agent<W> {
        // Only add system message if this is the first interaction (empty conversation history)
        if self.context_window.conversation_history.is_empty() {
            let provider = self.providers.get(None)?;
-            let system_prompt = if provider.has_native_tool_calling() {
+            let provider_has_native_tool_calling = provider.has_native_tool_calling();
            let provider_name_for_system = provider.name().to_string();
            drop(provider); // Drop provider reference to avoid borrowing issues
            let system_prompt = if provider_has_native_tool_calling {
                // For native tool calling providers, use a more explicit system prompt
                "You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals.
@@ -1493,18 +1507,26 @@ If you can complete it with 1-2 tool calls, skip TODO.
            }
            // Add system message to context window
-            let system_message = Message {
+            let system_message = {
-                role: MessageRole::System,
+                // Check if we should use cache control for system message
-                content: system_prompt,
+                if let Some(cache_config) = match provider_name_for_system.as_str() {
                    "anthropic" => self.config.providers.anthropic.as_ref()
                        .and_then(|c| c.cache_config.as_ref())
                        .and_then(|config| Self::parse_cache_control(config)),
                    _ => None,
                } {
                    let provider = self.providers.get(None)?;
                    Message::with_cache_control_validated(MessageRole::System, system_prompt, cache_config, provider)
                } else {
                    Message::new(MessageRole::System, system_prompt)
                }
            };
            self.context_window.add_message(system_message);
        }
        // Add user message to context window
-        let user_message = Message {
+        let user_message = Message::new(MessageRole::User, format!("Task: {}", description));
            role: MessageRole::User,
            content: format!("Task: {}", description),
        };
        self.context_window.add_message(user_message);
        // Use the complete conversation history for the request
@@ -1512,6 +1534,9 @@ If you can complete it with 1-2 tool calls, skip TODO.
        // Check if provider supports native tool calling and add tools if so
        let provider = self.providers.get(None)?;
        let provider_name = provider.name().to_string();
        let has_native_tool_calling = provider.has_native_tool_calling();
        let supports_cache_control = provider.supports_cache_control();
        let tools = if provider.has_native_tool_calling() {
            Some(Self::create_tool_definitions(
                self.config.webdriver.enabled,
@@ -1521,9 +1546,10 @@ If you can complete it with 1-2 tool calls, skip TODO.
        } else {
            None
        };
        drop(provider); // Drop the provider reference to avoid borrowing issues
        // Get max_tokens from provider configuration
-        let max_tokens = match provider.name() {
+        let max_tokens = match provider_name.as_str() {
            "databricks" => {
                // Use the model's maximum limit for Databricks to allow large file generation
                Some(32000)
@@ -1578,9 +1604,23 @@ If you can complete it with 1-2 tool calls, skip TODO.
        // Add assistant response to context window only if not empty
        // This prevents the "Skipping empty message" warning when only tools were executed
        if !response_content.trim().is_empty() {
-            let assistant_message = Message {
+            let assistant_message = {
-                role: MessageRole::Assistant,
+                // Check if we should use cache control (every 10 tool calls)
-                content: response_content.clone(),
+                if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
                    let provider = self.providers.get(None)?;
                    if let Some(cache_config) = match provider.name() {
                        "anthropic" => self.config.providers.anthropic.as_ref()
                            .and_then(|c| c.cache_config.as_ref())
                            .and_then(|config| Self::parse_cache_control(config)),
                        _ => None,
                    } {
                        Message::with_cache_control_validated(MessageRole::Assistant, response_content.clone(), cache_config, provider)
                    } else {
                        Message::new(MessageRole::Assistant, response_content.clone())
                    }
                } else {
                    Message::new(MessageRole::Assistant, response_content.clone())
                }
            };
            self.context_window.add_message(assistant_message);
        } else {
@@ -1783,17 +1823,11 @@ If you can complete it with 1-2 tool calls, skip TODO.
            .join("\n\n");
        let summary_messages = vec![
-            Message {
+            Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
-                role: MessageRole::System,
+            Message::new(MessageRole::User, format!(
                content: "You are a helpful assistant that creates concise summaries.".to_string(),
            },
            Message {
                role: MessageRole::User,
                content: format!(
                    "Based on this conversation history, {}\n\nConversation:\n{}",
                    summary_prompt, conversation_text
-                ),
+                )),
            },
        ];
        let provider = self.providers.get(None)?;
@@ -2776,18 +2810,11 @@ If you can complete it with 1-2 tool calls, skip TODO.
                .join("\n\n");
            let summary_messages = vec![
-                Message {
+                Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
-                    role: MessageRole::System,
+                Message::new(MessageRole::User, format!(
                    content: "You are a helpful assistant that creates concise summaries."
                        .to_string(),
                },
                Message {
                    role: MessageRole::User,
                    content: format!(
                        "Based on this conversation history, {}\n\nConversation:\n{}",
                        summary_prompt, conversation_text
-                    ),
+                )),
                },
            ];
            let provider = self.providers.get(None)?;
@@ -3273,29 +3300,20 @@ If you can complete it with 1-2 tool calls, skip TODO.
                            // Add the tool call and result to the context window using RAW unfiltered content
                            // This ensures the log file contains the true raw content including JSON tool calls
                            let tool_message = if !raw_content_for_log.trim().is_empty() {
-                                Message {
+                                Message::new(MessageRole::Assistant, format!(
                                    role: MessageRole::Assistant,
                                    content: format!(
                                        "{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
                                        raw_content_for_log.trim(),
                                        tool_call.tool,
                                        tool_call.args
-                                    ),
+                                ))
                                }
                            } else {
                                // No text content before tool call, just include the tool call
-                                Message {
+                                Message::new(MessageRole::Assistant, format!(
                                    role: MessageRole::Assistant,
                                    content: format!(
                                        "{{\"tool\": \"{}\", \"args\": {}}}",
                                        tool_call.tool, tool_call.args
-                                    ),
+                                ))
                                }
                            };
                            let result_message = Message {
                                role: MessageRole::User,
                                content: format!("Tool result: {}", tool_result),
                            };
                            let result_message = Message::new(MessageRole::User, format!("Tool result: {}", tool_result));
                            self.context_window.add_message(tool_message);
                            self.context_window.add_message(result_message);
@@ -3304,7 +3322,8 @@ If you can complete it with 1-2 tool calls, skip TODO.
                            request.messages = self.context_window.conversation_history.clone();
                            // Ensure tools are included for native providers in subsequent iterations
-                            if provider.has_native_tool_calling() {
+                            let provider_for_tools = self.providers.get(None)?;
                            if provider_for_tools.has_native_tool_calling() {
                                request.tools = Some(Self::create_tool_definitions(
                                    self.config.webdriver.enabled,
                                    self.config.macax.enabled,
@@ -3635,9 +3654,23 @@ If you can complete it with 1-2 tool calls, skip TODO.
                        .replace("<</SYS>>", "");
                    if !raw_clean.trim().is_empty() {
-                        let assistant_message = Message {
+                        let assistant_message = {
-                            role: MessageRole::Assistant,
+                            // Check if we should use cache control (every 10 tool calls)
-                            content: raw_clean,
+                            if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
                                let provider = self.providers.get(None)?;
                                if let Some(cache_config) = match provider.name() {
                                    "anthropic" => self.config.providers.anthropic.as_ref()
                                        .and_then(|c| c.cache_config.as_ref())
                                        .and_then(|config| Self::parse_cache_control(config)),
                                    _ => None,
                                } {
                                    Message::with_cache_control_validated(MessageRole::Assistant, raw_clean, cache_config, provider)
                                } else {
                                    Message::new(MessageRole::Assistant, raw_clean)
                                }
                            } else {
                                Message::new(MessageRole::Assistant, raw_clean)
                            }
                        };
                        self.context_window.add_message(assistant_message);
                    }
@@ -3679,7 +3712,10 @@ If you can complete it with 1-2 tool calls, skip TODO.
        Ok(TaskResult::new(final_response, self.context_window.clone()))
    }
-    pub async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
+    pub async fn execute_tool(&mut self, tool_call: &ToolCall) -> Result<String> {
        // Increment tool call count
        self.tool_call_count += 1;
        debug!("=== EXECUTING TOOL ===");
        debug!("Tool name: {}", tool_call.tool);
        debug!("Tool args (raw): {:?}", tool_call.args);
--- a/crates/g3-core/src/task_result_comprehensive_tests.rs
+++ b/crates/g3-core/src/task_result_comprehensive_tests.rs
@@ -6,14 +6,10 @@ use std::sync::Arc;
 fn test_task_result_basic_functionality() {
    // Create a context window with some messages
    let mut context = ContextWindow::new(10000);
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::User, "Test message 1".to_string())
-        role: MessageRole::User,
+    );
-        content: "Test message 1".to_string(),
+    context.add_message(Message::new(MessageRole::Assistant, "Response 1".to_string())
-    });
+    );
    context.add_message(Message {
        role: MessageRole::Assistant,
        content: "Response 1".to_string(),
    });
    // Create a TaskResult
    let response = "This is the response\n\nFinal output block".to_string();
@@ -100,10 +96,7 @@ fn test_context_window_preservation() {
    // Add some messages
    for i in 0..5 {
-        context.add_message(Message {
+        context.add_message(Message::new(if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant }, format!("Message {}", i)));
            role: if i % 2 == 0 { MessageRole::User } else { MessageRole::Assistant },
            content: format!("Message {}", i),
        });
    }
    // Create TaskResult
--- a/crates/g3-core/tests/test_context_thinning.rs
+++ b/crates/g3-core/tests/test_context_thinning.rs
@@ -46,10 +46,10 @@ fn test_thin_context_basic() {
    // Add some messages to the first third
    for i in 0..9 {
        if i % 2 == 0 {
-            context.add_message(Message {
+            context.add_message(Message::new(
-                role: MessageRole::Assistant,
+                MessageRole::Assistant,
-                content: format!("Assistant message {}", i),
+                format!("Assistant message {}", i),
-            });
+            ));
        } else {
            // Add tool results with varying sizes
            let content = if i == 1 {
@@ -63,10 +63,10 @@ fn test_thin_context_basic() {
                format!("Tool result: small result {}", i)
            };
-            context.add_message(Message {
+            context.add_message(Message::new(
-                role: MessageRole::User,
+                MessageRole::User,
                content,
-            });
+            ));
        }
    }
@@ -98,10 +98,10 @@ fn test_thin_write_file_tool_calls() {
    let mut context = ContextWindow::new(10000);
    // Add some messages including a write_file tool call with large content
-    context.add_message(Message {
+    context.add_message(Message::new(
-        role: MessageRole::User,
+        MessageRole::User,
-        content: "Please create a large file".to_string(),
+        "Please create a large file".to_string(),
-    });
+    ));
    // Add an assistant message with a write_file tool call containing large content
    let large_content = "x".repeat(1500);
@@ -109,22 +109,22 @@ fn test_thin_write_file_tool_calls() {
        r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
        large_content
    );
-    context.add_message(Message {
+    context.add_message(Message::new(
-        role: MessageRole::Assistant,
+        MessageRole::Assistant,
-        content: format!("I'll create that file.\n\n{}", tool_call_json),
+        format!("I'll create that file.\n\n{}", tool_call_json),
-    });
+    ));
-    context.add_message(Message {
+    context.add_message(Message::new(
-        role: MessageRole::User,
+        MessageRole::User,
-        content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
+        "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
-    });
+    ));
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
+        context.add_message(Message::new(
-            role: MessageRole::Assistant,
+            MessageRole::Assistant,
-            content: format!("Response {}", i),
+            format!("Response {}", i),
-        });
+        ));
    }
    // Trigger thinning at 50%
@@ -154,10 +154,10 @@ fn test_thin_str_replace_tool_calls() {
    let mut context = ContextWindow::new(10000);
    // Add some messages including a str_replace tool call with large diff
-    context.add_message(Message {
+    context.add_message(Message::new(
-        role: MessageRole::User,
+        MessageRole::User,
-        content: "Please update the file".to_string(),
+        "Please update the file".to_string(),
-    });
+    ));
    // Add an assistant message with a str_replace tool call containing large diff
    let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
@@ -165,22 +165,22 @@ fn test_thin_str_replace_tool_calls() {
        r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
        large_diff.replace('\n', "\\n")
    );
-    context.add_message(Message {
+    context.add_message(Message::new(
-        role: MessageRole::Assistant,
+        MessageRole::Assistant,
-        content: format!("I'll update that file.\n\n{}", tool_call_json),
+        format!("I'll update that file.\n\n{}", tool_call_json),
-    });
+    ));
-    context.add_message(Message {
+    context.add_message(Message::new(
-        role: MessageRole::User,
+        MessageRole::User,
-        content: "Tool result: ✅ applied unified diff".to_string(),
+        "Tool result: ✅ applied unified diff".to_string(),
-    });
+    ));
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
+        context.add_message(Message::new(
-            role: MessageRole::Assistant,
+            MessageRole::Assistant,
-            content: format!("Response {}", i),
+            format!("Response {}", i),
-        });
+        ));
    }
    // Trigger thinning at 50%
@@ -212,10 +212,10 @@ fn test_thin_context_no_large_results() {
    // Add only small messages
    for i in 0..9 {
-        context.add_message(Message {
+        context.add_message(Message::new(
-            role: MessageRole::User,
+            MessageRole::User,
-            content: format!("Tool result: small {}", i),
+            format!("Tool result: small {}", i),
-        });
+        ));
    }
    context.used_tokens = 5000;
@@ -244,7 +244,7 @@ fn test_thin_context_only_affects_first_third() {
            MessageRole::Assistant
        };
-        context.add_message(Message { role, content });
+        context.add_message(Message::new(role, content));
    }
    context.used_tokens = 5000;
--- a/crates/g3-core/tests/test_todo_context_thinning.rs
+++ b/crates/g3-core/tests/test_todo_context_thinning.rs
@@ -8,27 +8,18 @@ fn test_todo_read_results_not_thinned() {
    let mut context = ContextWindow::new(10000);
    // Add a todo_read tool call
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
        role: MessageRole::Assistant,
        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
    });
    // Add a large TODO result (> 500 chars)
    let large_todo_result = format!(
        "Tool result: 📝 TODO list:\n{}",
        "- [ ] Task with long description\n".repeat(50)
    );
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
        role: MessageRole::User,
        content: large_todo_result.clone(),
    });
    // Add more messages to ensure we have enough for "first third" logic
    for i in 0..6 {
-        context.add_message(Message {
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
            role: MessageRole::Assistant,
            content: format!("Response {}", i),
        });
    }
    // Trigger thinning at 50%
@@ -65,27 +56,18 @@ fn test_todo_write_results_not_thinned() {
    // Add a todo_write tool call
    let large_content = "- [ ] Task\n".repeat(100);
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::Assistant, format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content)));
        role: MessageRole::Assistant,
        content: format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content),
    });
    // Add a large TODO write result
    let large_todo_result = format!(
        "Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
        large_content.len()
    );
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
        role: MessageRole::User,
        content: large_todo_result.clone(),
    });
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
            role: MessageRole::Assistant,
            content: format!("Response {}", i),
        });
    }
    // Trigger thinning at 50%
@@ -119,24 +101,15 @@ fn test_non_todo_results_still_thinned() {
    let mut context = ContextWindow::new(10000);
    // Add a non-TODO tool call (e.g., read_file)
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string()));
        role: MessageRole::Assistant,
        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
    });
    // Add a large read_file result (> 500 chars)
    let large_result = format!("Tool result: {}", "x".repeat(1500));
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::User, large_result));
        role: MessageRole::User,
        content: large_result,
    });
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
            role: MessageRole::Assistant,
            content: format!("Response {}", i),
        });
    }
    // Trigger thinning at 50%
@@ -172,27 +145,18 @@ fn test_todo_read_with_spaces_in_tool_name() {
    let mut context = ContextWindow::new(10000);
    // Add a todo_read tool call with spaces (JSON formatting variation)
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::Assistant, r#"{"tool": "todo_read", "args": {}}"#.to_string()));
        role: MessageRole::Assistant,
        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
    });
    // Add a large TODO result
    let large_todo_result = format!(
        "Tool result: 📝 TODO list:\n{}",
        "- [ ] Task\n".repeat(50)
    );
-    context.add_message(Message {
+    context.add_message(Message::new(MessageRole::User, large_todo_result.clone()));
        role: MessageRole::User,
        content: large_todo_result.clone(),
    });
    // Add more messages
    for i in 0..6 {
-        context.add_message(Message {
+        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)))
            role: MessageRole::Assistant,
            content: format!("Response {}", i),
        });
    }
    // Trigger thinning
--- a/crates/g3-core/tests/test_todo_persistence.rs
+++ b/crates/g3-core/tests/test_todo_persistence.rs
@@ -27,7 +27,7 @@ fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
 #[serial]
 async fn test_todo_write_creates_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    // Initially, todo.g3.md should not exist
@@ -67,7 +67,7 @@ async fn test_todo_read_from_file() {
    fs::write(&todo_path, test_content).unwrap();
    // Create agent (should load from file)
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    // Create a tool call to read TODO
    let tool_call = g3_core::ToolCall {
@@ -88,7 +88,7 @@ async fn test_todo_read_from_file() {
 #[serial]
 async fn test_todo_read_empty_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    // Create a tool call to read TODO (file doesn't exist)
    let tool_call = g3_core::ToolCall {
@@ -111,7 +111,7 @@ async fn test_todo_persistence_across_agents() {
    // Agent 1: Write TODO
    {
-        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let mut agent = create_test_agent_in_dir(&temp_dir).await;
        let tool_call = g3_core::ToolCall {
            tool: "todo_write".to_string(),
            args: serde_json::json!({
@@ -126,7 +126,7 @@ async fn test_todo_persistence_across_agents() {
    // Agent 2: Read TODO (new agent instance)
    {
-        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let mut agent = create_test_agent_in_dir(&temp_dir).await;
        let tool_call = g3_core::ToolCall {
            tool: "todo_read".to_string(),
            args: serde_json::json!({}),
@@ -143,7 +143,7 @@ async fn test_todo_persistence_across_agents() {
 #[serial]
 async fn test_todo_update_preserves_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    // Write initial TODO
@@ -173,7 +173,7 @@ async fn test_todo_update_preserves_file() {
 #[serial]
 async fn test_todo_handles_large_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    // Create a large TODO (but under the 50k limit)
@@ -202,7 +202,7 @@ async fn test_todo_handles_large_content() {
 #[serial]
 async fn test_todo_respects_size_limit() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    // Create content that exceeds the default 50k limit
    let huge_content = "x".repeat(60_000);
@@ -232,7 +232,7 @@ async fn test_todo_agent_initialization_loads_file() {
    fs::write(&todo_path, initial_content).unwrap();
    // Create agent - should load the file during initialization
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    // Read TODO - should return the pre-existing content
    let tool_call = g3_core::ToolCall {
@@ -248,7 +248,7 @@ async fn test_todo_agent_initialization_loads_file() {
 #[serial]
 async fn test_todo_handles_unicode_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    // Create TODO with unicode characters
@@ -283,7 +283,7 @@ async fn test_todo_handles_unicode_content() {
 #[serial]
 async fn test_todo_empty_content_creates_empty_file() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    let todo_path = get_todo_path(&temp_dir);
    // Write empty TODO
@@ -306,7 +306,7 @@ async fn test_todo_empty_content_creates_empty_file() {
 #[serial]
 async fn test_todo_whitespace_only_content() {
    let temp_dir = TempDir::new().unwrap();
-    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let mut agent = create_test_agent_in_dir(&temp_dir).await;
    // Write whitespace-only TODO
    let tool_call = g3_core::ToolCall {
--- a/crates/g3-ensembles/Cargo.toml
+++ b/crates/g3-ensembles/Cargo.toml
@@ -1,20 +0,0 @@
 [package]
 name = "g3-ensembles"
 version = "0.1.0"
 edition = "2021"
 description = "Multi-agent ensemble functionality for G3"
 [dependencies]
 g3-core = { path = "../g3-core" }
 g3-config = { path = "../g3-config" }
 clap = { workspace = true }
 tokio = { workspace = true }
 anyhow = { workspace = true }
 tracing = { workspace = true }
 serde = { workspace = true }
 serde_json = { workspace = true }
 chrono = { version = "0.4", features = ["serde"] }
 uuid = { workspace = true }
 [dev-dependencies]
 tempfile = "3.8"
--- a/crates/g3-ensembles/TESTING.md
+++ b/crates/g3-ensembles/TESTING.md
@@ -1,422 +0,0 @@
 # G3 Ensembles Testing Documentation
 This document describes the comprehensive test suite for the g3-ensembles crate (Flock Mode).
 ## Test Coverage
 ### Unit Tests (`src/tests.rs`)
 Unit tests cover the core data structures and logic:
 #### Status Module Tests
 1. **`test_segment_state_display`**
   - Verifies that `SegmentState` enum displays correctly with emojis
   - Tests all states: Pending, Running, Completed, Failed, Cancelled
 2. **`test_flock_status_creation`**
   - Tests creation of `FlockStatus` with correct initial values
   - Verifies session ID, segment count, and zero metrics
 3. **`test_segment_status_update`**
   - Tests updating a single segment's status
   - Verifies metrics are correctly aggregated
 4. **`test_multiple_segment_updates`**
   - Tests updating multiple segments
   - Verifies aggregate metrics (tokens, tool calls, errors) are summed correctly
 5. **`test_is_complete`**
   - Tests the completion detection logic
   - Verifies that flock is only complete when all segments are in terminal states
   - Tests various scenarios: no segments, partial completion, full completion
 6. **`test_count_by_state`**
   - Tests counting segments by their state
   - Verifies correct counts for each state type
 7. **`test_status_serialization`**
   - Tests JSON serialization and deserialization
   - Verifies round-trip conversion preserves all data
 8. **`test_report_generation`**
   - Tests the comprehensive report generation
   - Verifies all expected sections are present
   - Checks that metrics are correctly displayed
 **Run unit tests:**
 ```bash
 cargo test -p g3-ensembles --lib
 ```
 ### Integration Tests (`tests/integration_tests.rs`)
 Integration tests verify end-to-end functionality with real file system and git operations:
 #### Configuration Tests
 1. **`test_flock_config_validation`**
   - Tests validation of project directory requirements
   - Verifies error messages for:
     - Non-existent directory
     - Non-git repository
     - Missing flock-requirements.md
   - Verifies successful creation with valid inputs
 2. **`test_flock_config_builder`**
   - Tests the builder pattern for `FlockConfig`
   - Verifies `with_max_turns()` and `with_g3_binary()` methods
 3. **`test_workspace_creation`**
   - Tests creation of `FlockMode` instance
   - Verifies project structure is valid
 #### Git Operations Tests
 4. **`test_git_clone_functionality`**
   - Tests git cloning of project repository
   - Verifies cloned repository structure:
     - `.git` directory exists
     - All files are present
     - Git history is preserved
 5. **`test_multiple_segment_clones`**
   - Tests cloning multiple segments (2 segments)
   - Verifies each segment is independent
   - Tests that modifications in one segment don't affect others
 6. **`test_git_repo_independence`**
   - Comprehensive test of segment independence
   - Creates commits in different segments
   - Verifies git histories diverge correctly
   - Ensures files in one segment don't appear in others
 #### Segment Management Tests
 7. **`test_segment_requirements_creation`**
   - Tests creation of `segment-requirements.md` files
   - Verifies content is written correctly
 8. **`test_requirements_file_content`**
   - Tests the structure of flock-requirements.md
   - Verifies content contains expected sections
 #### Status File Tests
 9. **`test_status_file_operations`**
   - Tests saving and loading `flock-status.json`
   - Verifies JSON serialization to file
   - Tests deserialization from file
 #### JSON Processing Tests
 10. **`test_json_extraction`**
    - Tests extraction of JSON arrays from text output
    - Verifies handling of various formats:
      - Plain JSON
      - JSON in markdown code blocks
      - JSON with surrounding text
      - Invalid input (no JSON)
 11. **`test_partition_json_parsing`**
    - Tests parsing of partition JSON structure
    - Verifies module names, requirements, and dependencies are extracted correctly
 **Run integration tests:**
 ```bash
 cargo test -p g3-ensembles --test integration_tests
 ```
 ### End-to-End Test Script (`scripts/test-flock-mode.sh`)
 A comprehensive bash script that tests the complete flock mode workflow:
 #### Test Scenarios
 1. **Project Creation**
   - Creates a temporary test project
   - Initializes git repository
   - Creates flock-requirements.md with realistic content
   - Makes initial commit
 2. **Project Structure Validation**
   - Verifies `.git` directory exists
   - Verifies `flock-requirements.md` exists
 3. **Git Operations**
   - Tests cloning project to segment directories
   - Verifies cloned repositories are valid
   - Tests git log to ensure history is preserved
 4. **Segment Independence**
   - Creates two segments
   - Modifies one segment
   - Verifies other segment is unaffected
 5. **Segment Requirements**
   - Creates `segment-requirements.md` in segments
   - Verifies content is written correctly
 6. **Status File Operations**
   - Creates `flock-status.json`
   - Validates JSON structure (if `jq` is available)
 **Run end-to-end test:**
 ```bash
 ./scripts/test-flock-mode.sh
 ```
 ## Test Results
 ### Current Status
 ✅ **All tests passing**
 - **Unit tests**: 8/8 passed
 - **Integration tests**: 11/11 passed
 - **End-to-end test**: All scenarios passed
 ### Test Execution Time
 - Unit tests: ~0.01s
 - Integration tests: ~0.35s (includes git operations)
 - End-to-end test: ~1-2s (includes cleanup)
 ## Running All Tests
 ### Run all tests for g3-ensembles:
 ```bash
 cargo test -p g3-ensembles
 ```
 ### Run with verbose output:
 ```bash
 cargo test -p g3-ensembles -- --nocapture
 ```
 ### Run specific test:
 ```bash
 cargo test -p g3-ensembles test_git_clone_functionality
 ```
 ### Run tests with coverage (requires cargo-tarpaulin):
 ```bash
 cargo tarpaulin -p g3-ensembles
 ```
 ## Test Helpers
 ### `create_test_project(name: &str) -> TempDir`
 Helper function in integration tests that creates a complete test project:
 - Initializes git repository
 - Configures git user
 - Creates flock-requirements.md with two modules
 - Creates README.md
 - Makes initial commit
 - Returns `TempDir` that auto-cleans on drop
 **Usage:**
 ```rust
 let project_dir = create_test_project("my-test");
 // Use project_dir.path() to access the directory
 // Automatically cleaned up when project_dir goes out of scope
 ```
 ### `extract_json_array(output: &str) -> Option<String>`
 Helper function that extracts JSON arrays from text output:
 - Finds first `[` and last `]`
 - Returns content between them
 - Returns `None` if no valid JSON array found
 ## Test Data
 ### Sample Requirements
 The test suite uses realistic requirements for a calculator project:
 **Module A: Core Library**
 - Arithmetic operations (add, sub, mul, div)
 - Error handling for division by zero
 - Unit tests
 - Documentation
 **Module B: CLI Application**
 - Command-line interface using clap
 - Subcommands for each operation
 - User-friendly output
 - Error handling
 This structure tests the partitioning logic with:
 - Clear module boundaries
 - Dependency relationship (CLI depends on Core)
 - Realistic implementation requirements
 ## Continuous Integration
 To integrate these tests into CI/CD:
 ### GitHub Actions Example
 ```yaml
 name: Test G3 Ensembles
 on: [push, pull_request]
 jobs:
  test:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v2
      - uses: actions-rs/toolchain@v1
        with:
          toolchain: stable
      - name: Run unit tests
        run: cargo test -p g3-ensembles --lib
      - name: Run integration tests
        run: cargo test -p g3-ensembles --test integration_tests
      - name: Run end-to-end test
        run: ./scripts/test-flock-mode.sh
 ```
 ## Test Coverage Goals
 ### Current Coverage
 - ✅ Status data structures: 100%
 - ✅ Configuration validation: 100%
 - ✅ Git operations: 100%
 - ✅ Segment independence: 100%
 - ✅ JSON processing: 100%
 - ⚠️  Full flock execution: Requires LLM access (tested manually)
 ### Future Test Additions
 1. **Mock LLM Tests**
   - Mock the partitioning agent response
   - Test full flock workflow without real LLM calls
 2. **Performance Tests**
   - Test with large numbers of segments (10+)
   - Measure memory usage
   - Test concurrent segment execution
 3. **Error Handling Tests**
   - Test behavior when git operations fail
   - Test behavior when segments fail
   - Test recovery scenarios
 4. **Edge Cases**
   - Empty requirements file
   - Single segment (degenerate case)
   - Very large requirements file
   - Binary files in project
 ## Debugging Tests
 ### Enable debug logging:
 ```bash
 RUST_LOG=debug cargo test -p g3-ensembles -- --nocapture
 ```
 ### Keep test artifacts:
 ```bash
 # Modify test to not cleanup
 # Or inspect TEST_DIR before cleanup in end-to-end test
 export TEST_DIR=/tmp/my-test
 ./scripts/test-flock-mode.sh
 ls -la $TEST_DIR
 ```
 ### Run single test with backtrace:
 ```bash
 RUST_BACKTRACE=1 cargo test -p g3-ensembles test_git_clone_functionality -- --nocapture
 ```
 ## Contributing Tests
 When adding new features to g3-ensembles:
 1. **Add unit tests** for new data structures and logic
 2. **Add integration tests** for new file/git operations
 3. **Update end-to-end test** if workflow changes
 4. **Document tests** in this file
 5. **Ensure all tests pass** before submitting PR
 ### Test Naming Convention
 - Unit tests: `test_<functionality>`
 - Integration tests: `test_<feature>_<scenario>`
 - Use descriptive names that explain what is being tested
 ### Test Structure
 ```rust
 #[test]
 fn test_feature_name() {
    // Arrange: Set up test data
    let data = create_test_data();
    // Act: Perform the operation
    let result = perform_operation(data);
    // Assert: Verify the result
    assert_eq!(result, expected_value);
    assert!(result.is_ok());
 }
 ```
 ## Troubleshooting
 ### Tests fail with "git not found"
 **Solution**: Install git:
 ```bash
 # macOS
 brew install git
 # Ubuntu/Debian
 sudo apt-get install git
 # Windows
 choco install git
 ```
 ### Tests fail with permission errors
 **Solution**: Ensure test directories are writable:
 ```bash
 chmod -R u+w /tmp
 ```
 ### Integration tests are slow
 **Cause**: Git operations and file I/O take time
 **Solution**: Run only unit tests for quick feedback:
 ```bash
 cargo test -p g3-ensembles --lib
 ```
 ### Test artifacts not cleaned up
 **Cause**: Test panicked before cleanup
 **Solution**: Manually clean temp directories:
 ```bash
 rm -rf /tmp/tmp.*
 ```
 ## Summary
 The g3-ensembles test suite provides comprehensive coverage of:
 - ✅ Core data structures and logic
 - ✅ Configuration validation
 - ✅ Git repository operations
 - ✅ Segment independence
 - ✅ Status tracking and reporting
 - ✅ JSON processing
 - ✅ End-to-end workflow
 All tests are automated, fast, and reliable. The test suite ensures that flock mode works correctly across different scenarios and edge cases.
--- a/crates/g3-ensembles/src/flock.rs
+++ b/crates/g3-ensembles/src/flock.rs
@@ -1,647 +0,0 @@
 //! Flock mode implementation - parallel multi-agent development
 use anyhow::{Context, Result};
 use chrono::Utc;
 use g3_config::Config;
 use std::path::{Path, PathBuf};
 use std::process::Stdio;
 use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::Command;
 use tracing::{debug, error, info, warn};
 use uuid::Uuid;
 use crate::status::{FlockStatus, SegmentState, SegmentStatus};
 /// Configuration for flock mode
 #[derive(Debug, Clone)]
 pub struct FlockConfig {
    /// Project directory (must be a git repo with flock-requirements.md)
    pub project_dir: PathBuf,
    /// Flock workspace directory where segments will be created
    pub flock_workspace: PathBuf,
    /// Number of segments to partition work into
    pub num_segments: usize,
    /// Maximum turns per segment (for autonomous mode)
    pub max_turns: usize,
    /// G3 configuration to use for agents
    pub g3_config: Config,
    /// Path to g3 binary (defaults to current executable)
    pub g3_binary: Option<PathBuf>,
 }
 impl FlockConfig {
    /// Create a new flock configuration
    pub fn new(
        project_dir: PathBuf,
        flock_workspace: PathBuf,
        num_segments: usize,
    ) -> Result<Self> {
        // Validate project directory
        if !project_dir.exists() {
            anyhow::bail!("Project directory does not exist: {}", project_dir.display());
        }
        // Check if it's a git repo
        if !project_dir.join(".git").exists() {
            anyhow::bail!("Project directory must be a git repository: {}", project_dir.display());
        }
        // Check for flock-requirements.md
        let requirements_path = project_dir.join("flock-requirements.md");
        if !requirements_path.exists() {
            anyhow::bail!(
                "Project directory must contain flock-requirements.md: {}",
                project_dir.display()
            );
        }
        // Load default config
        let g3_config = Config::load(None)?;
        Ok(Self {
            project_dir,
            flock_workspace,
            num_segments,
            max_turns: 5, // Default
            g3_config,
            g3_binary: None,
        })
    }
    /// Set maximum turns per segment
    pub fn with_max_turns(mut self, max_turns: usize) -> Self {
        self.max_turns = max_turns;
        self
    }
    /// Set custom g3 binary path
    pub fn with_g3_binary(mut self, binary: PathBuf) -> Self {
        self.g3_binary = Some(binary);
        self
    }
    /// Set custom g3 config
    pub fn with_config(mut self, config: Config) -> Self {
        self.g3_config = config;
        self
    }
 }
 /// Flock mode orchestrator
 pub struct FlockMode {
    config: FlockConfig,
    status: FlockStatus,
    session_id: String,
 }
 impl FlockMode {
    /// Create a new flock mode instance
    pub fn new(config: FlockConfig) -> Result<Self> {
        let session_id = Uuid::new_v4().to_string();
        let status = FlockStatus::new(
            session_id.clone(),
            config.project_dir.clone(),
            config.flock_workspace.clone(),
            config.num_segments,
        );
        Ok(Self {
            config,
            status,
            session_id,
        })
    }
    /// Run flock mode
    pub async fn run(&mut self) -> Result<()> {
        info!("Starting flock mode with {} segments", self.config.num_segments);
        // Step 1: Partition requirements
        println!("\n🧠 Step 1: Partitioning requirements into {} segments...", self.config.num_segments);
        let partitions = self.partition_requirements().await?;
        // Step 2: Create segment workspaces
        println!("\n📁 Step 2: Creating segment workspaces...");
        self.create_segment_workspaces(&partitions).await?;
        // Step 3: Run segments in parallel
        println!("\n🚀 Step 3: Running {} segments in parallel...", self.config.num_segments);
        self.run_segments_parallel().await?;
        // Step 4: Generate final report
        println!("\n📊 Step 4: Generating final report...");
        self.status.completed_at = Some(Utc::now());
        self.save_status()?;
        let report = self.status.generate_report();
        println!("{}", report);
        Ok(())
    }
    /// Partition requirements using an AI agent
    async fn partition_requirements(&mut self) -> Result<Vec<String>> {
        let requirements_path = self.config.project_dir.join("flock-requirements.md");
        let requirements_content = std::fs::read_to_string(&requirements_path)
            .context("Failed to read flock-requirements.md")?;
        // Create a temporary workspace for the partitioning agent
        let partition_workspace = self.config.flock_workspace.join("_partition");
        std::fs::create_dir_all(&partition_workspace)?;
        // Create the partitioning prompt
        let partition_prompt = format!(
            "You are a software architect tasked with partitioning project requirements into {} logical, \
            largely non-overlapping modules that can grow into separate architectural components \
            (e.g., crates, services, or packages).\n\n\
            REQUIREMENTS:\n{}\n\n\
            INSTRUCTIONS:\n\
            1. Analyze the requirements carefully\n\
            2. Identify {} distinct architectural modules that:\n\
               - Have minimal overlap and dependencies\n\
               - Can be developed largely independently\n\
               - Represent logical architectural boundaries\n\
               - Could eventually become separate crates or services\n\
            3. For each module, provide:\n\
               - A clear module name\n\
               - The specific requirements that belong to this module\n\
               - Any dependencies on other modules\n\n\
            4. Use the final_output tool to provide your partitioning as a JSON array of objects, where each object has:\n\
               - \"module_name\": string\n\
               - \"requirements\": string (the requirements text for this module)\n\
               - \"dependencies\": array of strings (names of other modules this depends on)\n\n\
            Example format:\n\
            ```json\n\
            [\n\
              {{\n\
                \"module_name\": \"core-engine\",\n\
                \"requirements\": \"Implement the core processing engine...\",\n\
                \"dependencies\": []\n\
              }},\n\
              {{\n\
                \"module_name\": \"api-server\",\n\
                \"requirements\": \"Create REST API endpoints...\",\n\
                \"dependencies\": [\"core-engine\"]\n\
              }}\n\
            ]\n\
            ```\n\n\
            Be thoughtful and strategic in your partitioning. The goal is to enable parallel development.",
            self.config.num_segments,
            requirements_content,
            self.config.num_segments
        );
        // Get g3 binary path
        let g3_binary = self.get_g3_binary()?;
        // Run g3 in single-shot mode to partition requirements
        println!("   Analyzing requirements and creating partitions...");
        let output = Command::new(&g3_binary)
            .arg("--workspace")
            .arg(&partition_workspace)
            .arg("--quiet") // Disable logging for partitioning agent
            .arg(&partition_prompt)
            .output()
            .await
            .context("Failed to run g3 for partitioning")?;
        if !output.status.success() {
            let stderr = String::from_utf8_lossy(&output.stderr);
            anyhow::bail!("Partitioning agent failed: {}", stderr);
        }
        let stdout = String::from_utf8_lossy(&output.stdout);
        debug!("Partitioning agent output: {}", stdout);
        // Extract JSON from the output
        let partitions_json = self.extract_json_from_output(&stdout)
            .context("Failed to extract partition JSON from agent output")?;
        // Parse the partitions
        let partitions: Vec<serde_json::Value> = serde_json::from_str(&partitions_json)
            .context("Failed to parse partition JSON")?;
        if partitions.len() != self.config.num_segments {
            warn!(
                "Expected {} partitions but got {}. Adjusting...",
                self.config.num_segments,
                partitions.len()
            );
        }
        // Extract requirements text from each partition
        let mut partition_texts = Vec::new();
        for (i, partition) in partitions.iter().enumerate() {
            let default_name = format!("module-{}", i + 1);
            let module_name = partition["module_name"]
                .as_str()
                .unwrap_or(&default_name);
            let requirements = partition["requirements"]
                .as_str()
                .context("Missing requirements field in partition")?;
            let dependencies = partition["dependencies"]
                .as_array()
                .map(|arr| {
                    arr.iter()
                        .filter_map(|v| v.as_str())
                        .collect::<Vec<_>>()
                        .join(", ")
                })
                .unwrap_or_default();
            let partition_text = format!(
                "# Module: {}\n\n## Dependencies\n{}\n\n## Requirements\n\n{}",
                module_name,
                if dependencies.is_empty() {
                    "None".to_string()
                } else {
                    dependencies
                },
                requirements
            );
            partition_texts.push(partition_text);
            println!("   ✓ Created partition {}: {}", i + 1, module_name);
        }
        Ok(partition_texts)
    }
    /// Extract JSON from agent output (looks for JSON array in output)
    fn extract_json_from_output(&self, output: &str) -> Result<String> {
        // Look for JSON array in the output
        // Try to find content between [ and ]
        if let Some(start) = output.find('[') {
            if let Some(end) = output.rfind(']') {
                if end > start {
                    return Ok(output[start..=end].to_string());
                }
            }
        }
        // If we can't find JSON array markers, try to parse the whole output
        anyhow::bail!("Could not find JSON array in agent output")
    }
    /// Create segment workspaces by copying project directory
    async fn create_segment_workspaces(&mut self, partitions: &[String]) -> Result<()> {
        // Ensure flock workspace exists
        std::fs::create_dir_all(&self.config.flock_workspace)?;
        for (i, partition) in partitions.iter().enumerate() {
            let segment_id = i + 1;
            let segment_dir = self.config.flock_workspace.join(format!("segment-{}", segment_id));
            println!("   Creating segment {} workspace...", segment_id);
            // Copy project directory to segment directory
            self.copy_git_repo(&self.config.project_dir, &segment_dir)
                .await
                .context(format!("Failed to copy project to segment {}", segment_id))?;
            // Write segment-requirements.md
            let requirements_path = segment_dir.join("segment-requirements.md");
            std::fs::write(&requirements_path, partition)
                .context(format!("Failed to write requirements for segment {}", segment_id))?;
            println!("   ✓ Segment {} workspace ready at {}", segment_id, segment_dir.display());
        }
        Ok(())
    }
    /// Copy a git repository to a new location
    async fn copy_git_repo(&self, source: &Path, dest: &Path) -> Result<()> {
        // Use git clone for efficient copying
        let output = Command::new("git")
            .arg("clone")
            .arg(source)
            .arg(dest)
            .output()
            .await
            .context("Failed to run git clone")?;
        if !output.status.success() {
            let stderr = String::from_utf8_lossy(&output.stderr);
            anyhow::bail!("Git clone failed: {}", stderr);
        }
        Ok(())
    }
    /// Run all segments in parallel
    async fn run_segments_parallel(&mut self) -> Result<()> {
        let mut handles = Vec::new();
        for segment_id in 1..=self.config.num_segments {
            let segment_dir = self.config.flock_workspace.join(format!("segment-{}", segment_id));
            let max_turns = self.config.max_turns;
            let g3_binary = self.get_g3_binary()?;
            let status_file = self.get_status_file_path();
            let session_id = self.session_id.clone();
            // Initialize segment status
            let segment_status = SegmentStatus {
                segment_id,
                workspace: segment_dir.clone(),
                state: SegmentState::Running,
                started_at: Utc::now(),
                completed_at: None,
                tokens_used: 0,
                tool_calls: 0,
                errors: 0,
                current_turn: 0,
                max_turns,
                last_message: Some("Starting...".to_string()),
                error_message: None,
            };
            self.status.update_segment(segment_id, segment_status);
            self.save_status()?;
            // Spawn a task for this segment
            let handle = tokio::spawn(async move {
                run_segment(
                    segment_id,
                    segment_dir,
                    max_turns,
                    g3_binary,
                    status_file,
                    session_id,
                )
                .await
            });
            handles.push((segment_id, handle));
        }
        // Wait for all segments to complete
        for (segment_id, handle) in handles {
            match handle.await {
                Ok(Ok(final_status)) => {
                    println!("\n✅ Segment {} completed", segment_id);
                    self.status.update_segment(segment_id, final_status);
                    self.save_status()?;
                }
                Ok(Err(e)) => {
                    error!("Segment {} failed: {}", segment_id, e);
                    let mut segment_status = self.status.segments.get(&segment_id).cloned()
                        .unwrap_or_else(|| SegmentStatus {
                            segment_id,
                            workspace: self.config.flock_workspace.join(format!("segment-{}", segment_id)),
                            state: SegmentState::Failed,
                            started_at: Utc::now(),
                            completed_at: Some(Utc::now()),
                            tokens_used: 0,
                            tool_calls: 0,
                            errors: 1,
                            current_turn: 0,
                            max_turns: self.config.max_turns,
                            last_message: None,
                            error_message: Some(e.to_string()),
                        });
                    segment_status.state = SegmentState::Failed;
                    segment_status.completed_at = Some(Utc::now());
                    segment_status.error_message = Some(e.to_string());
                    segment_status.errors += 1;
                    self.status.update_segment(segment_id, segment_status);
                    self.save_status()?;
                }
                Err(e) => {
                    error!("Segment {} task panicked: {}", segment_id, e);
                    let mut segment_status = self.status.segments.get(&segment_id).cloned()
                        .unwrap_or_else(|| SegmentStatus {
                            segment_id,
                            workspace: self.config.flock_workspace.join(format!("segment-{}", segment_id)),
                            state: SegmentState::Failed,
                            started_at: Utc::now(),
                            completed_at: Some(Utc::now()),
                            tokens_used: 0,
                            tool_calls: 0,
                            errors: 1,
                            current_turn: 0,
                            max_turns: self.config.max_turns,
                            last_message: None,
                            error_message: Some(format!("Task panicked: {}", e)),
                        });
                    segment_status.state = SegmentState::Failed;
                    segment_status.completed_at = Some(Utc::now());
                    segment_status.error_message = Some(format!("Task panicked: {}", e));
                    segment_status.errors += 1;
                    self.status.update_segment(segment_id, segment_status);
                    self.save_status()?;
                }
            }
        }
        Ok(())
    }
    /// Get the g3 binary path
    fn get_g3_binary(&self) -> Result<PathBuf> {
        if let Some(ref binary) = self.config.g3_binary {
            Ok(binary.clone())
        } else {
            // Use current executable
            std::env::current_exe().context("Failed to get current executable path")
        }
    }
    /// Get the status file path
    fn get_status_file_path(&self) -> PathBuf {
        self.config.flock_workspace.join("flock-status.json")
    }
    /// Save current status to file
    fn save_status(&self) -> Result<()> {
        let status_file = self.get_status_file_path();
        self.status.save_to_file(&status_file)
    }
 }
 /// Run a single segment worker
 async fn run_segment(
    segment_id: usize,
    segment_dir: PathBuf,
    max_turns: usize,
    g3_binary: PathBuf,
    status_file: PathBuf,
    session_id: String,
 ) -> Result<SegmentStatus> {
    info!("Starting segment {} in {}", segment_id, segment_dir.display());
    let mut segment_status = SegmentStatus {
        segment_id,
        workspace: segment_dir.clone(),
        state: SegmentState::Running,
        started_at: Utc::now(),
        completed_at: None,
        tokens_used: 0,
        tool_calls: 0,
        errors: 0,
        current_turn: 0,
        max_turns,
        last_message: Some("Starting autonomous mode...".to_string()),
        error_message: None,
    };
    // Run g3 in autonomous mode with segment-requirements.md
    let mut child = Command::new(&g3_binary)
        .arg("--workspace")
        .arg(&segment_dir)
        .arg("--autonomous")
        .arg("--max-turns")
        .arg(max_turns.to_string())
        .arg("--requirements")
        .arg(std::fs::read_to_string(segment_dir.join("segment-requirements.md"))?)
        .arg("--quiet") // Disable session logging for workers
        .stdout(Stdio::piped())
        .stderr(Stdio::piped())
        .spawn()
        .context("Failed to spawn g3 process")?;
    // Stream output and update status
    let stdout = child.stdout.take().context("Failed to get stdout")?;
    let stderr = child.stderr.take().context("Failed to get stderr")?;
    let stdout_reader = BufReader::new(stdout);
    let stderr_reader = BufReader::new(stderr);
    let mut stdout_lines = stdout_reader.lines();
    let mut stderr_lines = stderr_reader.lines();
    // Read output and update status
    loop {
        tokio::select! {
            line = stdout_lines.next_line() => {
                match line {
                    Ok(Some(line)) => {
                        println!("[Segment {}] {}", segment_id, line);
                        // Parse output for status updates
                        if line.contains("TURN") {
                            // Extract turn number if possible
                            if let Some(turn_str) = line.split("TURN").nth(1) {
                                if let Ok(turn) = turn_str.trim().split('/').next().unwrap_or("0").parse::<usize>() {
                                    segment_status.current_turn = turn;
                                }
                            }
                        }
                        segment_status.last_message = Some(line);
                        update_status_file(&status_file, &session_id, segment_status.clone())?;
                    }
                    Ok(None) => break,
                    Err(e) => {
                        error!("Error reading stdout for segment {}: {}", segment_id, e);
                        break;
                    }
                }
            }
            line = stderr_lines.next_line() => {
                match line {
                    Ok(Some(line)) => {
                        eprintln!("[Segment {} ERROR] {}", segment_id, line);
                        segment_status.errors += 1;
                        update_status_file(&status_file, &session_id, segment_status.clone())?;
                    }
                    Ok(None) => break,
                    Err(e) => {
                        error!("Error reading stderr for segment {}: {}", segment_id, e);
                        break;
                    }
                }
            }
        }
    }
    // Wait for process to complete
    let status = child.wait().await.context("Failed to wait for g3 process")?;
    segment_status.completed_at = Some(Utc::now());
    if status.success() {
        segment_status.state = SegmentState::Completed;
        segment_status.last_message = Some("Completed successfully".to_string());
    } else {
        segment_status.state = SegmentState::Failed;
        segment_status.error_message = Some(format!("Process exited with status: {}", status));
        segment_status.errors += 1;
    }
    // Try to extract metrics from session log if available
    let log_dir = segment_dir.join("logs");
    if log_dir.exists() {
        if let Ok(entries) = std::fs::read_dir(&log_dir) {
            for entry in entries.flatten() {
                let path = entry.path();
                if path.extension().and_then(|s| s.to_str()) == Some("json") {
                    if let Ok(log_content) = std::fs::read_to_string(&path) {
                        if let Ok(log_json) = serde_json::from_str::<serde_json::Value>(&log_content) {
                            // Extract token usage
                            if let Some(context) = log_json.get("context_window") {
                                if let Some(cumulative) = context.get("cumulative_tokens") {
                                    if let Some(tokens) = cumulative.as_u64() {
                                        segment_status.tokens_used = tokens;
                                    }
                                }
                            }
                            // Count tool calls from conversation history
                            if let Some(context) = log_json.get("context_window") {
                                if let Some(history) = context.get("conversation_history") {
                                    if let Some(messages) = history.as_array() {
                                        let tool_call_count = messages
                                            .iter()
                                            .filter(|msg| {
                                                msg.get("role")
                                                    .and_then(|r| r.as_str())
                                                    == Some("tool")
                                            })
                                            .count();
                                        segment_status.tool_calls = tool_call_count as u64;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    update_status_file(&status_file, &session_id, segment_status.clone())?;
    Ok(segment_status)
 }
 /// Update the status file with new segment status
 fn update_status_file(
    status_file: &PathBuf,
    session_id: &str,
    segment_status: SegmentStatus,
 ) -> Result<()> {
    // Load existing status or create new one
    let mut flock_status = if status_file.exists() {
        FlockStatus::load_from_file(status_file)?
    } else {
        // This shouldn't happen, but handle it gracefully
        FlockStatus::new(
            session_id.to_string(),
            PathBuf::new(),
            PathBuf::new(),
            0,
        )
    };
    flock_status.update_segment(segment_status.segment_id, segment_status);
    flock_status.save_to_file(status_file)?;
    Ok(())
 }
--- a/crates/g3-ensembles/src/lib.rs
+++ b/crates/g3-ensembles/src/lib.rs
@@ -1,12 +0,0 @@
 //! G3 Ensembles - Multi-agent ensemble functionality
 //!
 //! This crate provides functionality for running multiple G3 agents in coordination,
 //! enabling parallel development across different architectural modules.
 pub mod flock;
 pub mod status;
 mod tests;
 /// Re-export main types for convenience
 pub use flock::{FlockConfig, FlockMode};
 pub use status::{FlockStatus, SegmentStatus};
--- a/crates/g3-ensembles/src/status.rs
+++ b/crates/g3-ensembles/src/status.rs
@@ -1,240 +0,0 @@
 //! Status tracking for flock mode
 use chrono::{DateTime, Utc};
 use serde::{Deserialize, Serialize};
 use std::collections::HashMap;
 use std::path::PathBuf;
 /// Status of an individual segment worker
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct SegmentStatus {
    /// Segment number
    pub segment_id: usize,
    /// Segment workspace directory
    pub workspace: PathBuf,
    /// Current state of the segment
    pub state: SegmentState,
    /// Start time
    pub started_at: DateTime<Utc>,
    /// Completion time (if finished)
    pub completed_at: Option<DateTime<Utc>>,
    /// Total tokens used
    pub tokens_used: u64,
    /// Number of tool calls made
    pub tool_calls: u64,
    /// Number of errors encountered
    pub errors: u64,
    /// Current turn number (for autonomous mode)
    pub current_turn: usize,
    /// Maximum turns allowed
    pub max_turns: usize,
    /// Last status message
    pub last_message: Option<String>,
    /// Error message (if failed)
    pub error_message: Option<String>,
 }
 /// State of a segment worker
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub enum SegmentState {
    /// Waiting to start
    Pending,
    /// Currently running
    Running,
    /// Completed successfully
    Completed,
    /// Failed with error
    Failed,
    /// Cancelled by user
    Cancelled,
 }
 impl std::fmt::Display for SegmentState {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            SegmentState::Pending => write!(f, "⏳ Pending"),
            SegmentState::Running => write!(f, "🔄 Running"),
            SegmentState::Completed => write!(f, "✅ Completed"),
            SegmentState::Failed => write!(f, "❌ Failed"),
            SegmentState::Cancelled => write!(f, "⚠️  Cancelled"),
        }
    }
 }
 /// Overall flock status
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FlockStatus {
    /// Flock session ID
    pub session_id: String,
    /// Project directory
    pub project_dir: PathBuf,
    /// Flock workspace directory
    pub flock_workspace: PathBuf,
    /// Number of segments
    pub num_segments: usize,
    /// Start time
    pub started_at: DateTime<Utc>,
    /// Completion time (if finished)
    pub completed_at: Option<DateTime<Utc>>,
    /// Status of each segment
    pub segments: HashMap<usize, SegmentStatus>,
    /// Total tokens used across all segments
    pub total_tokens: u64,
    /// Total tool calls across all segments
    pub total_tool_calls: u64,
    /// Total errors across all segments
    pub total_errors: u64,
 }
 impl FlockStatus {
    /// Create a new flock status
    pub fn new(
        session_id: String,
        project_dir: PathBuf,
        flock_workspace: PathBuf,
        num_segments: usize,
    ) -> Self {
        Self {
            session_id,
            project_dir,
            flock_workspace,
            num_segments,
            started_at: Utc::now(),
            completed_at: None,
            segments: HashMap::new(),
            total_tokens: 0,
            total_tool_calls: 0,
            total_errors: 0,
        }
    }
    /// Update segment status
    pub fn update_segment(&mut self, segment_id: usize, status: SegmentStatus) {
        self.segments.insert(segment_id, status);
        self.recalculate_totals();
    }
    /// Recalculate total metrics
    fn recalculate_totals(&mut self) {
        self.total_tokens = self.segments.values().map(|s| s.tokens_used).sum();
        self.total_tool_calls = self.segments.values().map(|s| s.tool_calls).sum();
        self.total_errors = self.segments.values().map(|s| s.errors).sum();
    }
    /// Check if all segments are complete
    pub fn is_complete(&self) -> bool {
        self.segments.len() == self.num_segments
            && self.segments.values().all(|s| {
                matches!(
                    s.state,
                    SegmentState::Completed | SegmentState::Failed | SegmentState::Cancelled
                )
            })
    }
    /// Get count of segments by state
    pub fn count_by_state(&self, state: SegmentState) -> usize {
        self.segments.values().filter(|s| s.state == state).count()
    }
    /// Save status to file
    pub fn save_to_file(&self, path: &PathBuf) -> anyhow::Result<()> {
        let json = serde_json::to_string_pretty(self)?;
        std::fs::write(path, json)?;
        Ok(())
    }
    /// Load status from file
    pub fn load_from_file(path: &PathBuf) -> anyhow::Result<Self> {
        let json = std::fs::read_to_string(path)?;
        let status = serde_json::from_str(&json)?;
        Ok(status)
    }
    /// Generate a summary report
    pub fn generate_report(&self) -> String {
        let mut report = String::new();
        report.push_str(&format!("\n{}", "=".repeat(80)));
        report.push_str(&format!("\n📊 FLOCK MODE SESSION REPORT"));
        report.push_str(&format!("\n{}", "=".repeat(80)));
        report.push_str(&format!("\n\n🆔 Session ID: {}", self.session_id));
        report.push_str(&format!("\n📁 Project: {}", self.project_dir.display()));
        report.push_str(&format!("\n🗂️  Workspace: {}", self.flock_workspace.display()));
        report.push_str(&format!("\n🔢 Segments: {}", self.num_segments));
        let duration = if let Some(completed) = self.completed_at {
            completed.signed_duration_since(self.started_at)
        } else {
            Utc::now().signed_duration_since(self.started_at)
        };
        report.push_str(&format!("\n⏱️  Duration: {:.2}s", duration.num_milliseconds() as f64 / 1000.0));
        // Segment status summary
        report.push_str(&format!("\n\n📈 Segment Status:"));
        report.push_str(&format!("\n   • Completed: {}", self.count_by_state(SegmentState::Completed)));
        report.push_str(&format!("\n   • Running: {}", self.count_by_state(SegmentState::Running)));
        report.push_str(&format!("\n   • Failed: {}", self.count_by_state(SegmentState::Failed)));
        report.push_str(&format!("\n   • Pending: {}", self.count_by_state(SegmentState::Pending)));
        report.push_str(&format!("\n   • Cancelled: {}", self.count_by_state(SegmentState::Cancelled)));
        // Metrics
        report.push_str(&format!("\n\n📊 Aggregate Metrics:"));
        report.push_str(&format!("\n   • Total Tokens: {}", self.total_tokens));
        report.push_str(&format!("\n   • Total Tool Calls: {}", self.total_tool_calls));
        report.push_str(&format!("\n   • Total Errors: {}", self.total_errors));
        // Per-segment details
        report.push_str(&format!("\n\n🔍 Segment Details:"));
        let mut segments: Vec<_> = self.segments.iter().collect();
        segments.sort_by_key(|(id, _)| *id);
        for (id, segment) in segments {
            report.push_str(&format!("\n\n   Segment {}:", id));
            report.push_str(&format!("\n      Status: {}", segment.state));
            report.push_str(&format!("\n      Workspace: {}", segment.workspace.display()));
            report.push_str(&format!("\n      Tokens: {}", segment.tokens_used));
            report.push_str(&format!("\n      Tool Calls: {}", segment.tool_calls));
            report.push_str(&format!("\n      Errors: {}", segment.errors));
            report.push_str(&format!("\n      Turn: {}/{}", segment.current_turn, segment.max_turns));
            if let Some(ref msg) = segment.last_message {
                report.push_str(&format!("\n      Last Message: {}", msg));
            }
            if let Some(ref err) = segment.error_message {
                report.push_str(&format!("\n      Error: {}", err));
            }
        }
        report.push_str(&format!("\n\n{}", "=".repeat(80)));
        report
    }
 }
--- a/crates/g3-ensembles/src/tests.rs
+++ b/crates/g3-ensembles/src/tests.rs
@@ -1,331 +0,0 @@
 //! Unit tests for g3-ensembles
 #[cfg(test)]
 mod tests {
    use crate::status::{FlockStatus, SegmentState, SegmentStatus};
    use chrono::Utc;
    use std::path::PathBuf;
    #[test]
    fn test_segment_state_display() {
        assert_eq!(format!("{}", SegmentState::Pending), "⏳ Pending");
        assert_eq!(format!("{}", SegmentState::Running), "🔄 Running");
        assert_eq!(format!("{}", SegmentState::Completed), "✅ Completed");
        assert_eq!(format!("{}", SegmentState::Failed), "❌ Failed");
        assert_eq!(format!("{}", SegmentState::Cancelled), "⚠️  Cancelled");
    }
    #[test]
    fn test_flock_status_creation() {
        let status = FlockStatus::new(
            "test-session".to_string(),
            PathBuf::from("/test/project"),
            PathBuf::from("/test/workspace"),
            3,
        );
        assert_eq!(status.session_id, "test-session");
        assert_eq!(status.num_segments, 3);
        assert_eq!(status.segments.len(), 0);
        assert_eq!(status.total_tokens, 0);
        assert_eq!(status.total_tool_calls, 0);
        assert_eq!(status.total_errors, 0);
        assert!(status.completed_at.is_none());
    }
    #[test]
    fn test_segment_status_update() {
        let mut status = FlockStatus::new(
            "test-session".to_string(),
            PathBuf::from("/test/project"),
            PathBuf::from("/test/workspace"),
            2,
        );
        let segment1 = SegmentStatus {
            segment_id: 1,
            workspace: PathBuf::from("/test/workspace/segment-1"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 1000,
            tool_calls: 50,
            errors: 2,
            current_turn: 5,
            max_turns: 10,
            last_message: Some("Done".to_string()),
            error_message: None,
        };
        status.update_segment(1, segment1);
        assert_eq!(status.segments.len(), 1);
        assert_eq!(status.total_tokens, 1000);
        assert_eq!(status.total_tool_calls, 50);
        assert_eq!(status.total_errors, 2);
    }
    #[test]
    fn test_multiple_segment_updates() {
        let mut status = FlockStatus::new(
            "test-session".to_string(),
            PathBuf::from("/test/project"),
            PathBuf::from("/test/workspace"),
            2,
        );
        let segment1 = SegmentStatus {
            segment_id: 1,
            workspace: PathBuf::from("/test/workspace/segment-1"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 1000,
            tool_calls: 50,
            errors: 2,
            current_turn: 5,
            max_turns: 10,
            last_message: Some("Done".to_string()),
            error_message: None,
        };
        let segment2 = SegmentStatus {
            segment_id: 2,
            workspace: PathBuf::from("/test/workspace/segment-2"),
            state: SegmentState::Failed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 500,
            tool_calls: 25,
            errors: 5,
            current_turn: 3,
            max_turns: 10,
            last_message: Some("Error".to_string()),
            error_message: Some("Test error".to_string()),
        };
        status.update_segment(1, segment1);
        status.update_segment(2, segment2);
        assert_eq!(status.segments.len(), 2);
        assert_eq!(status.total_tokens, 1500);
        assert_eq!(status.total_tool_calls, 75);
        assert_eq!(status.total_errors, 7);
    }
    #[test]
    fn test_is_complete() {
        let mut status = FlockStatus::new(
            "test-session".to_string(),
            PathBuf::from("/test/project"),
            PathBuf::from("/test/workspace"),
            2,
        );
        // Not complete - no segments
        assert!(!status.is_complete());
        // Add one completed segment
        let segment1 = SegmentStatus {
            segment_id: 1,
            workspace: PathBuf::from("/test/workspace/segment-1"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 1000,
            tool_calls: 50,
            errors: 0,
            current_turn: 5,
            max_turns: 10,
            last_message: None,
            error_message: None,
        };
        status.update_segment(1, segment1);
        // Still not complete - only 1 of 2 segments
        assert!(!status.is_complete());
        // Add second segment (running)
        let segment2 = SegmentStatus {
            segment_id: 2,
            workspace: PathBuf::from("/test/workspace/segment-2"),
            state: SegmentState::Running,
            started_at: Utc::now(),
            completed_at: None,
            tokens_used: 500,
            tool_calls: 25,
            errors: 0,
            current_turn: 3,
            max_turns: 10,
            last_message: None,
            error_message: None,
        };
        status.update_segment(2, segment2);
        // Still not complete - segment 2 is running
        assert!(!status.is_complete());
        // Update segment 2 to completed
        let segment2_done = SegmentStatus {
            segment_id: 2,
            workspace: PathBuf::from("/test/workspace/segment-2"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 500,
            tool_calls: 25,
            errors: 0,
            current_turn: 5,
            max_turns: 10,
            last_message: None,
            error_message: None,
        };
        status.update_segment(2, segment2_done);
        // Now complete
        assert!(status.is_complete());
    }
    #[test]
    fn test_count_by_state() {
        let mut status = FlockStatus::new(
            "test-session".to_string(),
            PathBuf::from("/test/project"),
            PathBuf::from("/test/workspace"),
            3,
        );
        let segment1 = SegmentStatus {
            segment_id: 1,
            workspace: PathBuf::from("/test/workspace/segment-1"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 1000,
            tool_calls: 50,
            errors: 0,
            current_turn: 5,
            max_turns: 10,
            last_message: None,
            error_message: None,
        };
        let segment2 = SegmentStatus {
            segment_id: 2,
            workspace: PathBuf::from("/test/workspace/segment-2"),
            state: SegmentState::Failed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 500,
            tool_calls: 25,
            errors: 5,
            current_turn: 3,
            max_turns: 10,
            last_message: None,
            error_message: Some("Error".to_string()),
        };
        let segment3 = SegmentStatus {
            segment_id: 3,
            workspace: PathBuf::from("/test/workspace/segment-3"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 800,
            tool_calls: 40,
            errors: 1,
            current_turn: 4,
            max_turns: 10,
            last_message: None,
            error_message: None,
        };
        status.update_segment(1, segment1);
        status.update_segment(2, segment2);
        status.update_segment(3, segment3);
        assert_eq!(status.count_by_state(SegmentState::Completed), 2);
        assert_eq!(status.count_by_state(SegmentState::Failed), 1);
        assert_eq!(status.count_by_state(SegmentState::Running), 0);
        assert_eq!(status.count_by_state(SegmentState::Pending), 0);
    }
    #[test]
    fn test_status_serialization() {
        let mut status = FlockStatus::new(
            "test-session".to_string(),
            PathBuf::from("/test/project"),
            PathBuf::from("/test/workspace"),
            1,
        );
        let segment1 = SegmentStatus {
            segment_id: 1,
            workspace: PathBuf::from("/test/workspace/segment-1"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 1000,
            tool_calls: 50,
            errors: 2,
            current_turn: 5,
            max_turns: 10,
            last_message: Some("Done".to_string()),
            error_message: None,
        };
        status.update_segment(1, segment1);
        // Serialize to JSON
        let json = serde_json::to_string(&status).expect("Failed to serialize");
        assert!(json.contains("test-session"));
        assert!(json.contains("segment_id"));
        assert!(json.contains("Completed"));
        // Deserialize back
        let deserialized: FlockStatus =
            serde_json::from_str(&json).expect("Failed to deserialize");
        assert_eq!(deserialized.session_id, "test-session");
        assert_eq!(deserialized.segments.len(), 1);
        assert_eq!(deserialized.total_tokens, 1000);
    }
    #[test]
    fn test_report_generation() {
        let mut status = FlockStatus::new(
            "test-session".to_string(),
            PathBuf::from("/test/project"),
            PathBuf::from("/test/workspace"),
            2,
        );
        let segment1 = SegmentStatus {
            segment_id: 1,
            workspace: PathBuf::from("/test/workspace/segment-1"),
            state: SegmentState::Completed,
            started_at: Utc::now(),
            completed_at: Some(Utc::now()),
            tokens_used: 1000,
            tool_calls: 50,
            errors: 2,
            current_turn: 5,
            max_turns: 10,
            last_message: Some("Done".to_string()),
            error_message: None,
        };
        status.update_segment(1, segment1);
        let report = status.generate_report();
        // Check that report contains expected sections
        assert!(report.contains("FLOCK MODE SESSION REPORT"));
        assert!(report.contains("test-session"));
        assert!(report.contains("Segment Status:"));
        assert!(report.contains("Aggregate Metrics:"));
        assert!(report.contains("Segment Details:"));
        assert!(report.contains("Total Tokens: 1000"));
        assert!(report.contains("Total Tool Calls: 50"));
        assert!(report.contains("Total Errors: 2"));
    }
 }
--- a/crates/g3-ensembles/tests/integration_tests.rs
+++ b/crates/g3-ensembles/tests/integration_tests.rs
@@ -1,443 +0,0 @@
 //! Integration tests for g3-ensembles flock mode
 use g3_ensembles::{FlockConfig, FlockMode};
 use std::fs;
 use std::path::PathBuf;
 use std::process::Command;
 use tempfile::TempDir;
 /// Helper to create a test git repository with flock-requirements.md
 fn create_test_project(name: &str) -> TempDir {
    let temp_dir = TempDir::new().expect("Failed to create temp dir");
    let project_path = temp_dir.path();
    // Initialize git repo
    let output = Command::new("git")
        .arg("init")
        .current_dir(project_path)
        .output()
        .expect("Failed to run git init");
    assert!(output.status.success(), "git init failed");
    // Configure git user (required for commits)
    Command::new("git")
        .args(["config", "user.email", "test@example.com"])
        .current_dir(project_path)
        .output()
        .expect("Failed to configure git email");
    Command::new("git")
        .args(["config", "user.name", "Test User"])
        .current_dir(project_path)
        .output()
        .expect("Failed to configure git name");
    // Create flock-requirements.md
    let requirements = format!(
        "# {} Test Project\n\n\
        ## Module A\n\
        - Create a simple Rust library\n\
        - Add a function that returns \"Hello from Module A\"\n\
        - Write a unit test for the function\n\n\
        ## Module B\n\
        - Create another Rust library\n\
        - Add a function that returns \"Hello from Module B\"\n\
        - Write a unit test for the function\n",
        name
    );
    fs::write(project_path.join("flock-requirements.md"), requirements)
        .expect("Failed to write requirements");
    // Create a simple README
    fs::write(project_path.join("README.md"), format!("# {}\n", name))
        .expect("Failed to write README");
    // Create initial commit
    Command::new("git")
        .args(["add", "."])
        .current_dir(project_path)
        .output()
        .expect("Failed to git add");
    let output = Command::new("git")
        .args(["commit", "-m", "Initial commit"])
        .current_dir(project_path)
        .output()
        .expect("Failed to git commit");
    assert!(output.status.success(), "git commit failed");
    temp_dir
 }
 #[test]
 fn test_flock_config_validation() {
    let temp_dir = TempDir::new().unwrap();
    let project_path = temp_dir.path().to_path_buf();
    let workspace_path = temp_dir.path().join("workspace");
    // Should fail - not a git repo
    let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
    assert!(result.is_err());
    assert!(result
        .unwrap_err()
        .to_string()
        .contains("must be a git repository"));
    // Initialize git repo
    Command::new("git")
        .arg("init")
        .current_dir(&project_path)
        .output()
        .expect("Failed to run git init");
    // Should fail - no flock-requirements.md
    let result = FlockConfig::new(project_path.clone(), workspace_path.clone(), 2);
    assert!(result.is_err());
    assert!(result
        .unwrap_err()
        .to_string()
        .contains("flock-requirements.md"));
    // Create flock-requirements.md
    fs::write(project_path.join("flock-requirements.md"), "# Test\n")
        .expect("Failed to write requirements");
    // Should succeed now
    let result = FlockConfig::new(project_path, workspace_path, 2);
    assert!(result.is_ok());
 }
 #[test]
 fn test_flock_config_builder() {
    let project_dir = create_test_project("builder-test");
    let workspace_dir = TempDir::new().unwrap();
    let config = FlockConfig::new(
        project_dir.path().to_path_buf(),
        workspace_dir.path().to_path_buf(),
        2,
    )
    .expect("Failed to create config")
    .with_max_turns(15)
    .with_g3_binary(PathBuf::from("/custom/g3"));
    assert_eq!(config.num_segments, 2);
    assert_eq!(config.max_turns, 15);
    assert_eq!(config.g3_binary, Some(PathBuf::from("/custom/g3")));
 }
 #[test]
 fn test_workspace_creation() {
    let project_dir = create_test_project("workspace-test");
    let workspace_dir = TempDir::new().unwrap();
    let config = FlockConfig::new(
        project_dir.path().to_path_buf(),
        workspace_dir.path().to_path_buf(),
        2,
    )
    .expect("Failed to create config");
    // Create FlockMode instance
    let _flock = FlockMode::new(config).expect("Failed to create FlockMode");
    // Verify workspace directory structure will be created
    // (We can't run the full flock without LLM access, but we can test the setup)
    assert!(project_dir.path().join(".git").exists());
    assert!(project_dir.path().join("flock-requirements.md").exists());
 }
 #[test]
 fn test_git_clone_functionality() {
    let project_dir = create_test_project("clone-test");
    let workspace_dir = TempDir::new().unwrap();
    // Manually test git cloning (what flock mode does internally)
    let segment_dir = workspace_dir.path().join("segment-1");
    let output = Command::new("git")
        .arg("clone")
        .arg(project_dir.path())
        .arg(&segment_dir)
        .output()
        .expect("Failed to run git clone");
    assert!(output.status.success(), "git clone failed: {:?}", output);
    // Verify the clone
    assert!(segment_dir.exists());
    assert!(segment_dir.join(".git").exists());
    assert!(segment_dir.join("flock-requirements.md").exists());
    assert!(segment_dir.join("README.md").exists());
    // Verify it's a proper git repo
    let output = Command::new("git")
        .args(["log", "--oneline"])
        .current_dir(&segment_dir)
        .output()
        .expect("Failed to run git log");
    assert!(output.status.success());
    let log = String::from_utf8_lossy(&output.stdout);
    assert!(log.contains("Initial commit"));
 }
 #[test]
 fn test_multiple_segment_clones() {
    let project_dir = create_test_project("multi-clone-test");
    let workspace_dir = TempDir::new().unwrap();
    // Clone multiple segments
    for i in 1..=2 {
        let segment_dir = workspace_dir.path().join(format!("segment-{}", i));
        let output = Command::new("git")
            .arg("clone")
            .arg(project_dir.path())
            .arg(&segment_dir)
            .output()
            .expect("Failed to run git clone");
        assert!(output.status.success(), "git clone {} failed", i);
        assert!(segment_dir.exists());
        assert!(segment_dir.join(".git").exists());
        assert!(segment_dir.join("flock-requirements.md").exists());
    }
    // Verify both segments exist and are independent
    let segment1 = workspace_dir.path().join("segment-1");
    let segment2 = workspace_dir.path().join("segment-2");
    assert!(segment1.exists());
    assert!(segment2.exists());
    // Modify segment 1
    fs::write(segment1.join("test.txt"), "segment 1")
        .expect("Failed to write to segment 1");
    // Verify segment 2 is unaffected
    assert!(!segment2.join("test.txt").exists());
 }
 #[test]
 fn test_segment_requirements_creation() {
    let project_dir = create_test_project("segment-req-test");
    let workspace_dir = TempDir::new().unwrap();
    // Clone a segment
    let segment_dir = workspace_dir.path().join("segment-1");
    Command::new("git")
        .arg("clone")
        .arg(project_dir.path())
        .arg(&segment_dir)
        .output()
        .expect("Failed to clone");
    // Create segment-requirements.md (what flock mode does)
    let segment_requirements = "# Module A\n\nImplement module A functionality\n";
    fs::write(segment_dir.join("segment-requirements.md"), segment_requirements)
        .expect("Failed to write segment requirements");
    // Verify it was created
    assert!(segment_dir.join("segment-requirements.md").exists());
    let content = fs::read_to_string(segment_dir.join("segment-requirements.md"))
        .expect("Failed to read segment requirements");
    assert!(content.contains("Module A"));
 }
 #[test]
 fn test_status_file_operations() {
    use g3_ensembles::FlockStatus;
    let temp_dir = TempDir::new().unwrap();
    let status_file = temp_dir.path().join("flock-status.json");
    // Create a status
    let status = FlockStatus::new(
        "test-session".to_string(),
        PathBuf::from("/test/project"),
        PathBuf::from("/test/workspace"),
        2,
    );
    // Save to file
    status
        .save_to_file(&status_file)
        .expect("Failed to save status");
    // Verify file exists
    assert!(status_file.exists());
    // Load from file
    let loaded = FlockStatus::load_from_file(&status_file).expect("Failed to load status");
    assert_eq!(loaded.session_id, "test-session");
    assert_eq!(loaded.num_segments, 2);
 }
 #[test]
 fn test_json_extraction() {
    // Test the JSON extraction logic used in partition_requirements
    let test_cases = vec![
        (
            "Here is the result: [{\"module_name\": \"test\"}]",
            Some("[{\"module_name\": \"test\"}]"),
        ),
        (
            "```json\n[{\"module_name\": \"test\"}]\n```",
            Some("[{\"module_name\": \"test\"}]"),
        ),
        (
            "Some text before\n[{\"a\": 1}, {\"b\": 2}]\nSome text after",
            Some("[{\"a\": 1}, {\"b\": 2}]"),
        ),
        ("No JSON here", None),
    ];
    for (input, expected) in test_cases {
        let result = extract_json_array(input);
        match expected {
            Some(exp) => {
                assert!(result.is_some(), "Failed to extract from: {}", input);
                assert_eq!(result.unwrap(), exp);
            }
            None => {
                assert!(result.is_none(), "Should not extract from: {}", input);
            }
        }
    }
 }
 // Helper function to extract JSON array (mimics the logic in flock.rs)
 fn extract_json_array(output: &str) -> Option<String> {
    if let Some(start) = output.find('[') {
        if let Some(end) = output.rfind(']') {
            if end > start {
                return Some(output[start..=end].to_string());
            }
        }
    }
    None
 }
 #[test]
 fn test_partition_json_parsing() {
    // Test parsing of partition JSON
    let json = r#"[
        {
            "module_name": "core-library",
            "requirements": "Build the core library with basic functionality",
            "dependencies": []
        },
        {
            "module_name": "cli-tool",
            "requirements": "Create a CLI tool that uses the core library",
            "dependencies": ["core-library"]
        }
    ]"#;
    let partitions: Vec<serde_json::Value> =
        serde_json::from_str(json).expect("Failed to parse JSON");
    assert_eq!(partitions.len(), 2);
    assert_eq!(partitions[0]["module_name"], "core-library");
    assert_eq!(partitions[1]["module_name"], "cli-tool");
    assert_eq!(partitions[1]["dependencies"][0], "core-library");
 }
 #[test]
 fn test_requirements_file_content() {
    let project_dir = create_test_project("content-test");
    let requirements_path = project_dir.path().join("flock-requirements.md");
    let content = fs::read_to_string(&requirements_path).expect("Failed to read requirements");
    // Verify content structure
    assert!(content.contains("# content-test Test Project"));
    assert!(content.contains("## Module A"));
    assert!(content.contains("## Module B"));
    assert!(content.contains("Hello from Module A"));
    assert!(content.contains("Hello from Module B"));
 }
 #[test]
 fn test_git_repo_independence() {
    let project_dir = create_test_project("independence-test");
    let workspace_dir = TempDir::new().unwrap();
    // Clone two segments
    let segment1 = workspace_dir.path().join("segment-1");
    let segment2 = workspace_dir.path().join("segment-2");
    Command::new("git")
        .arg("clone")
        .arg(project_dir.path())
        .arg(&segment1)
        .output()
        .expect("Failed to clone segment 1");
    Command::new("git")
        .arg("clone")
        .arg(project_dir.path())
        .arg(&segment2)
        .output()
        .expect("Failed to clone segment 2");
    // Make a commit in segment 1
    fs::write(segment1.join("file1.txt"), "content 1").expect("Failed to write file1");
    Command::new("git")
        .args(["add", "file1.txt"])
        .current_dir(&segment1)
        .output()
        .expect("Failed to git add");
    Command::new("git")
        .args(["commit", "-m", "Add file1"])
        .current_dir(&segment1)
        .output()
        .expect("Failed to commit in segment 1");
    // Make a different commit in segment 2
    fs::write(segment2.join("file2.txt"), "content 2").expect("Failed to write file2");
    Command::new("git")
        .args(["add", "file2.txt"])
        .current_dir(&segment2)
        .output()
        .expect("Failed to git add");
    Command::new("git")
        .args(["commit", "-m", "Add file2"])
        .current_dir(&segment2)
        .output()
        .expect("Failed to commit in segment 2");
    // Verify they have different commits
    let log1 = Command::new("git")
        .args(["log", "--oneline"])
        .current_dir(&segment1)
        .output()
        .expect("Failed to get log 1");
    let log2 = Command::new("git")
        .args(["log", "--oneline"])
        .current_dir(&segment2)
        .output()
        .expect("Failed to get log 2");
    let log1_str = String::from_utf8_lossy(&log1.stdout);
    let log2_str = String::from_utf8_lossy(&log2.stdout);
    assert!(log1_str.contains("Add file1"));
    assert!(!log1_str.contains("Add file2"));
    assert!(log2_str.contains("Add file2"));
    assert!(!log2_str.contains("Add file1"));
    // Verify files exist only in their respective segments
    assert!(segment1.join("file1.txt").exists());
    assert!(!segment1.join("file2.txt").exists());
    assert!(segment2.join("file2.txt").exists());
    assert!(!segment2.join("file1.txt").exists());
 }
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -21,22 +21,18 @@
 //!     // Create the provider with your API key
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         Some("claude-3-5-sonnet-20241022".to_string()), // Optional: defaults to claude-3-5-sonnet-20241022
+//!         Some("claude-3-5-sonnet-20241022".to_string()),
-//!         Some(4096),  // Optional: max tokens
+//!         Some(4096),
-//!         Some(0.1),   // Optional: temperature
+//!         Some(0.1),
 //!         None, // cache_config
 //!         None, // enable_1m_context
 //!     )?;
 //!
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
+//!             Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
-//!                 role: MessageRole::System,
+//!             Message::new(MessageRole::User, "Hello! How are you?".to_string()),
 //!                 content: "You are a helpful assistant.".to_string(),
 //!             },
 //!             Message {
 //!                 role: MessageRole::User,
 //!                 content: "Hello! How are you?".to_string(),
 //!             },
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -62,15 +58,16 @@
 //! async fn main() -> anyhow::Result<()> {
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         None, None, None,
+//!         None,
 //!         None,
 //!         None,
 //!         None, // cache_config
 //!         None, // enable_1m_context
 //!     )?;
 //!
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
+//!             Message::new(MessageRole::User, "Write a short story about a robot.".to_string()),
 //!                 role: MessageRole::User,
 //!                 content: "Write a short story about a robot.".to_string(),
 //!             },
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -123,6 +120,8 @@ pub struct AnthropicProvider {
    model: String,
    max_tokens: u32,
    temperature: f32,
    cache_config: Option<String>,
    enable_1m_context: bool,
 }
 impl AnthropicProvider {
@@ -131,6 +130,8 @@ impl AnthropicProvider {
        model: Option<String>,
        max_tokens: Option<u32>,
        temperature: Option<f32>,
        cache_config: Option<String>,
        enable_1m_context: Option<bool>,
    ) -> Result<Self> {
        let client = Client::builder()
            .timeout(Duration::from_secs(300))
@@ -147,6 +148,8 @@ impl AnthropicProvider {
            model,
            max_tokens: max_tokens.unwrap_or(4096),
            temperature: temperature.unwrap_or(0.1),
            cache_config,
            enable_1m_context: enable_1m_context.unwrap_or(false),
        })
    }
@@ -156,9 +159,12 @@ impl AnthropicProvider {
            .post(ANTHROPIC_API_URL)
            .header("x-api-key", &self.api_key)
            .header("anthropic-version", ANTHROPIC_VERSION)
            // Anthropic beta 1m context window. Enable if needed. It costs extra, so check first.
            // .header("anthropic-beta", "context-1m-2025-08-07")
            .header("content-type", "application/json");
        if self.enable_1m_context {
            builder = builder.header("anthropic-beta", "context-1m-2025-08-07");
        }
        if streaming {
            builder = builder.header("accept", "text/event-stream");
        }
@@ -166,6 +172,11 @@ impl AnthropicProvider {
        builder
    }
    fn convert_cache_control(cache_control: &crate::CacheControl) -> crate::CacheControl {
        // Anthropic uses the same format, so just clone it
        cache_control.clone()
    }
    fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
        tools
            .iter()
@@ -214,6 +225,8 @@ impl AnthropicProvider {
                        role: "user".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
                            cache_control: message.cache_control.as_ref()
                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -222,6 +235,8 @@ impl AnthropicProvider {
                        role: "assistant".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
                            cache_control: message.cache_control.as_ref()
                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -564,7 +579,7 @@ impl LLMProvider for AnthropicProvider {
            .content
            .iter()
            .filter_map(|c| match c {
-                AnthropicContent::Text { text } => Some(text.as_str()),
+                AnthropicContent::Text { text, .. } => Some(text.as_str()),
                _ => None,
            })
            .collect::<Vec<_>>()
@@ -658,6 +673,11 @@ impl LLMProvider for AnthropicProvider {
        // Claude models support native tool calling
        true
    }
    fn supports_cache_control(&self) -> bool {
        // Anthropic supports cache control
        true
    }
 }
 // Anthropic API request/response structures
@@ -701,7 +721,11 @@ struct AnthropicMessage {
 #[serde(tag = "type")]
 enum AnthropicContent {
    #[serde(rename = "text")]
-    Text { text: String },
+    Text { 
        text: String,
        #[serde(skip_serializing_if = "Option::is_none")]
        cache_control: Option<crate::CacheControl>,
    },
    #[serde(rename = "tool_use")]
    ToolUse {
        id: String,
@@ -771,21 +795,14 @@ mod tests {
            None,
            None,
            None,
            None,
            None,
        ).unwrap();
        let messages = vec![
-            Message {
+            Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
-                role: MessageRole::System,
+            Message::new(MessageRole::User, "Hello!".to_string()),
-                content: "You are a helpful assistant.".to_string(),
+            Message::new(MessageRole::Assistant, "Hi there!".to_string()),
            },
            Message {
                role: MessageRole::User,
                content: "Hello!".to_string(),
            },
            Message {
                role: MessageRole::Assistant,
                content: "Hi there!".to_string(),
            },
        ];
        let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
@@ -803,14 +820,11 @@ mod tests {
            Some("claude-3-haiku-20240307".to_string()),
            Some(1000),
            Some(0.5),
            None,
            None,
        ).unwrap();
-        let messages = vec![
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
            Message {
                role: MessageRole::User,
                content: "Test message".to_string(),
            },
        ];
        let request_body = provider
            .create_request_body(&messages, None, false, 1000, 0.5)
@@ -831,6 +845,8 @@ mod tests {
            None,
            None,
            None,
            None,
            None,
        ).unwrap();
        let tools = vec![
@@ -859,4 +875,48 @@ mod tests {
        assert!(anthropic_tools[0].input_schema.required.is_some());
        assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
    }
    #[test]
    fn test_cache_control_serialization() {
        let provider = AnthropicProvider::new(
            "test-key".to_string(),
            None,
            None,
            None,
            None,
            None,
        ).unwrap();
        // Test message WITHOUT cache_control
        let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
        let (_, anthropic_messages_without) = provider.convert_messages(&messages_without).unwrap();
        let json_without = serde_json::to_string(&anthropic_messages_without).unwrap();
        println!("Anthropic JSON without cache_control: {}", json_without);
        // Check if cache_control appears in the JSON
        if json_without.contains("cache_control") {
            println!("WARNING: JSON contains 'cache_control' field when not configured!");
            assert!(!json_without.contains("\"cache_control\":null"), 
                    "JSON should not contain 'cache_control: null'");
        }
        // Test message WITH cache_control
        let messages_with = vec![Message::with_cache_control(
            MessageRole::User,
            "Hello".to_string(),
            crate::CacheControl::ephemeral(),
        )];
        let (_, anthropic_messages_with) = provider.convert_messages(&messages_with).unwrap();
        let json_with = serde_json::to_string(&anthropic_messages_with).unwrap();
        println!("Anthropic JSON with cache_control: {}", json_with);
        assert!(json_with.contains("cache_control"), 
                "JSON should contain 'cache_control' field when configured");
        assert!(json_with.contains("ephemeral"), 
                "JSON should contain 'ephemeral' type");
        // The key assertion: when cache_control is None, it should not appear in JSON
        assert!(!json_without.contains("cache_control") || !json_without.contains("null"),
                "JSON should not contain 'cache_control' field or null values when not configured");
    }
 }
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -39,10 +39,7 @@
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
+//!             Message::new(MessageRole::User, "Hello! How are you?".to_string()),
 //!                 role: MessageRole::User,
 //!                 content: "Hello! How are you?".to_string(),
 //!             },
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -251,9 +248,12 @@ impl DatabricksProvider {
                MessageRole::Assistant => "assistant",
            };
            // Always use simple string format (Databricks doesn't support cache_control)
            let content = serde_json::Value::String(message.content.clone());
            databricks_messages.push(DatabricksMessage {
                role: role.to_string(),
-                content: Some(message.content.clone()),
+                content: Some(content),
                tool_calls: None, // Only used in responses, not requests
            });
        }
@@ -864,8 +864,22 @@ impl LLMProvider for DatabricksProvider {
        let content = databricks_response
            .choices
            .first()
-            .and_then(|choice| choice.message.content.as_ref())
+            .and_then(|choice| {
-            .cloned()
+                choice.message.content.as_ref().map(|c| {
                    // Handle both string and array formats
                    if let Some(s) = c.as_str() {
                        s.to_string()
                    } else if let Some(arr) = c.as_array() {
                        // Extract text from content blocks
                        arr.iter()
                            .filter_map(|block| block.get("text").and_then(|t| t.as_str()))
                            .collect::<Vec<_>>()
                            .join("")
                    } else {
                        String::new()
                    }
                })
            })
            .unwrap_or_default();
        // Check if there are tool calls in the response
@@ -1037,6 +1051,10 @@ impl LLMProvider for DatabricksProvider {
        // This includes Claude, Llama, DBRX, and most other models on the platform
        true
    }
    fn supports_cache_control(&self) -> bool {
        false
    }
 }
 // Databricks API request/response structures
@@ -1067,7 +1085,8 @@ struct DatabricksFunction {
 #[derive(Debug, Serialize, Deserialize)]
 struct DatabricksMessage {
    role: String,
-    content: Option<String>, // Make content optional since tool calls might not have content
+    #[serde(skip_serializing_if = "Option::is_none")]
    content: Option<serde_json::Value>, // Can be string or array of content blocks
    #[serde(skip_serializing_if = "Option::is_none")]
    tool_calls: Option<Vec<DatabricksToolCall>>, // Add tool_calls field for responses
 }
@@ -1154,18 +1173,9 @@ mod tests {
        .unwrap();
        let messages = vec![
-            Message {
+            Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
-                role: MessageRole::System,
+            Message::new(MessageRole::User, "Hello!".to_string()),
-                content: "You are a helpful assistant.".to_string(),
+            Message::new(MessageRole::Assistant, "Hi there!".to_string()),
            },
            Message {
                role: MessageRole::User,
                content: "Hello!".to_string(),
            },
            Message {
                role: MessageRole::Assistant,
                content: "Hi there!".to_string(),
            },
        ];
        let databricks_messages = provider.convert_messages(&messages).unwrap();
@@ -1187,10 +1197,7 @@ mod tests {
        )
        .unwrap();
-        let messages = vec![Message {
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
            role: MessageRole::User,
            content: "Test message".to_string(),
        }];
        let request_body = provider
            .create_request_body(&messages, None, false, 1000, 0.5)
@@ -1273,4 +1280,62 @@ mod tests {
        assert!(llama_provider.has_native_tool_calling());
        assert!(dbrx_provider.has_native_tool_calling());
    }
    #[test]
    fn test_cache_control_serialization() {
        let provider = DatabricksProvider::from_token(
            "https://test.databricks.com".to_string(),
            "test-token".to_string(),
            "databricks-claude-sonnet-4".to_string(),
            None,
            None,
        )
        .unwrap();
        // Test message WITHOUT cache_control
        let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
        let databricks_messages_without = provider.convert_messages(&messages_without).unwrap();
        let json_without = serde_json::to_string(&databricks_messages_without).unwrap();
        println!("JSON without cache_control: {}", json_without);
        assert!(!json_without.contains("cache_control"), 
                "JSON should not contain 'cache_control' field when not configured");
        // Test message WITH cache_control - should still NOT include it (Databricks doesn't support it)
        let messages_with = vec![Message::with_cache_control(
            MessageRole::User,
            "Hello".to_string(),
            crate::CacheControl::ephemeral(),
        )];
        let databricks_messages_with = provider.convert_messages(&messages_with).unwrap();
        let json_with = serde_json::to_string(&databricks_messages_with).unwrap();
        println!("JSON with cache_control: {}", json_with);
        assert!(!json_with.contains("cache_control"), 
                "JSON should NOT contain 'cache_control' field - Databricks doesn't support it");
    }
    #[test]
    fn test_databricks_does_not_support_cache_control() {
        let claude_provider = DatabricksProvider::from_token(
            "https://test.databricks.com".to_string(),
            "test-token".to_string(),
            "databricks-claude-sonnet-4".to_string(),
            None,
            None,
        )
        .unwrap();
        let llama_provider = DatabricksProvider::from_token(
            "https://test.databricks.com".to_string(),
            "test-token".to_string(),
            "databricks-meta-llama-3-3-70b-instruct".to_string(),
            None,
            None,
        )
        .unwrap();
        assert!(!claude_provider.supports_cache_control(), "Databricks should not support cache_control even for Claude models");
        assert!(!llama_provider.supports_cache_control(), "Databricks should not support cache_control for Llama models");
    }
 }
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -21,6 +21,11 @@ pub trait LLMProvider: Send + Sync {
    fn has_native_tool_calling(&self) -> bool {
        false
    }
    /// Check if the provider supports cache control
    fn supports_cache_control(&self) -> bool {
        false
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -32,10 +37,40 @@ pub struct CompletionRequest {
    pub tools: Option<Vec<Tool>>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct CacheControl {
    #[serde(rename = "type")]
    pub cache_type: CacheType,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub ttl: Option<String>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum CacheType {
    Ephemeral,
 }
 impl CacheControl {
    pub fn ephemeral() -> Self {
        Self { cache_type: CacheType::Ephemeral, ttl: None }
    }
    pub fn five_minute() -> Self {
        Self { cache_type: CacheType::Ephemeral, ttl: Some("5m".to_string()) }
    }
    pub fn one_hour() -> Self {
        Self { cache_type: CacheType::Ephemeral, ttl: Some("1h".to_string()) }
    }
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Message {
    pub role: MessageRole,
    pub content: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub cache_control: Option<CacheControl>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -95,6 +130,45 @@ pub use databricks::DatabricksProvider;
 pub use embedded::EmbeddedProvider;
 pub use openai::OpenAIProvider;
 impl Message {
    /// Create a new message with optional cache control
    pub fn new(role: MessageRole, content: String) -> Self {
        Self {
            role,
            content,
            cache_control: None,
        }
    }
    /// Create a new message with cache control
    pub fn with_cache_control(role: MessageRole, content: String, cache_control: CacheControl) -> Self {
        Self {
            role,
            content,
            cache_control: Some(cache_control),
        }
    }
    /// Create a message with cache control, with provider validation
    pub fn with_cache_control_validated(
        role: MessageRole, 
        content: String, 
        cache_control: CacheControl,
        provider: &dyn LLMProvider
    ) -> Self {
        if !provider.supports_cache_control() {
            tracing::warn!(
                "Cache control requested for provider '{}' which does not support it. \
                Cache control is only supported by Anthropic and Anthropic via Databricks.",
                provider.name()
            );
            return Self::new(role, content);
        }
        Self::with_cache_control(role, content, cache_control)
    }
 }
 /// Provider registry for managing multiple LLM providers
 pub struct ProviderRegistry {
    providers: HashMap<String, Box<dyn LLMProvider>>,
@@ -144,3 +218,68 @@ impl Default for ProviderRegistry {
        Self::new()
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_message_serialization_without_cache_control() {
        let msg = Message::new(MessageRole::User, "Hello".to_string());
        let json = serde_json::to_string(&msg).unwrap();
        println!("Message JSON without cache_control: {}", json);
        assert!(!json.contains("cache_control"), 
                "JSON should not contain 'cache_control' field when not configured");
    }
    #[test]
    fn test_message_serialization_with_cache_control() {
        let msg = Message::with_cache_control(
            MessageRole::User,
            "Hello".to_string(),
            CacheControl::ephemeral(),
        );
        let json = serde_json::to_string(&msg).unwrap();
        println!("Message JSON with cache_control: {}", json);
        assert!(json.contains("cache_control"), 
                "JSON should contain 'cache_control' field when configured");
        assert!(json.contains("ephemeral"), 
                "JSON should contain 'ephemeral' value");
        assert!(json.contains("\"type\":"), 
                "JSON should contain 'type' field in cache_control");
        assert!(!json.contains("null"), 
                "JSON should not contain null values");
    }
    #[test]
    fn test_cache_control_five_minute_serialization() {
        let msg = Message::with_cache_control(
            MessageRole::User,
            "Hello".to_string(),
            CacheControl::five_minute(),
        );
        let json = serde_json::to_string(&msg).unwrap();
        println!("Message JSON with 5-minute cache_control: {}", json);
        assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
        assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
        assert!(json.contains("\"ttl\":\"5m\""), "JSON should contain ttl field with 5m value");
    }
    #[test]
    fn test_cache_control_one_hour_serialization() {
        let msg = Message::with_cache_control(
            MessageRole::User,
            "Hello".to_string(),
            CacheControl::one_hour(),
        );
        let json = serde_json::to_string(&msg).unwrap();
        println!("Message JSON with 1-hour cache_control: {}", json);
        assert!(json.contains("cache_control"), "JSON should contain 'cache_control' field");
        assert!(json.contains("ephemeral"), "JSON should contain 'ephemeral' type");
        assert!(json.contains("\"ttl\":\"1h\""), "JSON should contain ttl field with 1h value");
    }
 }
--- a/crates/g3-providers/tests/cache_control_error_regression_test.rs
+++ b/crates/g3-providers/tests/cache_control_error_regression_test.rs
@@ -0,0 +1,131 @@
 //! Regression test for cache_control serialization bug
 //!
 //! This test verifies that cache_control is NOT serialized in the wrong format.
 //! The bug was that it serialized as:
 //!   - `system.0.cache_control.ephemeral.ttl` (WRONG)
 //!
 //! It should serialize as:
 //!   - `"cache_control": {"type": "ephemeral"}` for ephemeral
 //!   - `"cache_control": {"type": "ephemeral", "ttl": "5m"}` for 5minute
 //!   - `"cache_control": {"type": "ephemeral", "ttl": "1h"}` for 1hour
 use g3_providers::{CacheControl, Message, MessageRole};
 #[test]
 fn test_no_wrong_serialization_format() {
    // Test ephemeral
    let msg = Message::with_cache_control(
        MessageRole::System,
        "Test".to_string(),
        CacheControl::ephemeral(),
    );
    let json = serde_json::to_string(&msg).unwrap();
    println!("Ephemeral message JSON: {}", json);
    // Should NOT contain the wrong format
    assert!(!json.contains("system.0.cache_control"), 
            "JSON should not contain 'system.0.cache_control' path");
    assert!(!json.contains("cache_control.ephemeral"), 
            "JSON should not contain 'cache_control.ephemeral' path");
    // Should contain the correct format
    assert!(json.contains(r#""cache_control":{"type":"ephemeral"}"#),
            "JSON should contain correct cache_control format");
 }
 #[test]
 fn test_five_minute_no_wrong_format() {
    let msg = Message::with_cache_control(
        MessageRole::System,
        "Test".to_string(),
        CacheControl::five_minute(),
    );
    let json = serde_json::to_string(&msg).unwrap();
    println!("5-minute message JSON: {}", json);
    // Should NOT contain the wrong format
    assert!(!json.contains("system.0.cache_control"), 
            "JSON should not contain 'system.0.cache_control' path");
    assert!(!json.contains("cache_control.ephemeral.ttl"), 
            "JSON should not contain 'cache_control.ephemeral.ttl' path");
    // Should contain the correct format with ttl as a direct field
    assert!(json.contains(r#""type":"ephemeral""#),
            "JSON should contain type field");
    assert!(json.contains(r#""ttl":"5m""#),
            "JSON should contain ttl field with value 5m");
 }
 #[test]
 fn test_one_hour_no_wrong_format() {
    let msg = Message::with_cache_control(
        MessageRole::System,
        "Test".to_string(),
        CacheControl::one_hour(),
    );
    let json = serde_json::to_string(&msg).unwrap();
    println!("1-hour message JSON: {}", json);
    // Should NOT contain the wrong format
    assert!(!json.contains("system.0.cache_control"), 
            "JSON should not contain 'system.0.cache_control' path");
    assert!(!json.contains("cache_control.ephemeral.ttl"), 
            "JSON should not contain 'cache_control.ephemeral.ttl' path");
    // Should contain the correct format with ttl as a direct field
    assert!(json.contains(r#""type":"ephemeral""#),
            "JSON should contain type field");
    assert!(json.contains(r#""ttl":"1h""#),
            "JSON should contain ttl field with value 1h");
 }
 #[test]
 fn test_cache_control_structure_is_flat() {
    // Verify that the cache_control object has a flat structure
    // with 'type' and optional 'ttl' at the same level
    let cache_control = CacheControl::five_minute();
    let json_value = serde_json::to_value(&cache_control).unwrap();
    println!("Cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
    let obj = json_value.as_object().expect("Should be an object");
    // Should have exactly 2 keys at the top level
    assert_eq!(obj.len(), 2, "Cache control should have exactly 2 top-level fields");
    // Both 'type' and 'ttl' should be at the same level
    assert!(obj.contains_key("type"), "Should have 'type' field");
    assert!(obj.contains_key("ttl"), "Should have 'ttl' field");
    // 'type' should be a string, not an object
    assert!(obj["type"].is_string(), "'type' should be a string value");
    // 'ttl' should be a string, not nested
    assert!(obj["ttl"].is_string(), "'ttl' should be a string value");
 }
 #[test]
 fn test_ephemeral_cache_control_structure() {
    let cache_control = CacheControl::ephemeral();
    let json_value = serde_json::to_value(&cache_control).unwrap();
    println!("Ephemeral cache control as JSON value: {}", serde_json::to_string_pretty(&json_value).unwrap());
    let obj = json_value.as_object().expect("Should be an object");
    // Should have exactly 1 key (only 'type', no 'ttl')
    assert_eq!(obj.len(), 1, "Ephemeral cache control should have exactly 1 top-level field");
    // Should have 'type' field
    assert!(obj.contains_key("type"), "Should have 'type' field");
    // Should NOT have 'ttl' field
    assert!(!obj.contains_key("ttl"), "Ephemeral should not have 'ttl' field");
    // 'type' should be a string with value "ephemeral"
    assert_eq!(obj["type"].as_str().unwrap(), "ephemeral");
 }
--- a/crates/g3-providers/tests/cache_control_integration_test.rs
+++ b/crates/g3-providers/tests/cache_control_integration_test.rs
@@ -0,0 +1,164 @@
 //! Integration tests for cache_control feature
 //!
 //! These tests verify that cache_control is correctly serialized in messages
 //! for both Anthropic and Databricks providers.
 use g3_providers::{CacheControl, Message, MessageRole};
 use serde_json::json;
 #[test]
 fn test_ephemeral_cache_control_serialization() {
    let cache_control = CacheControl::ephemeral();
    let json = serde_json::to_value(&cache_control).unwrap();
    println!("Ephemeral cache_control JSON: {}", serde_json::to_string(&json).unwrap());
    assert_eq!(json, json!({
        "type": "ephemeral"
    }));
    // Verify no ttl field is present
    assert!(!json.as_object().unwrap().contains_key("ttl"));
 }
 #[test]
 fn test_five_minute_cache_control_serialization() {
    let cache_control = CacheControl::five_minute();
    let json = serde_json::to_value(&cache_control).unwrap();
    println!("5-minute cache_control JSON: {}", serde_json::to_string(&json).unwrap());
    assert_eq!(json, json!({
        "type": "ephemeral",
        "ttl": "5m"
    }));
 }
 #[test]
 fn test_one_hour_cache_control_serialization() {
    let cache_control = CacheControl::one_hour();
    let json = serde_json::to_value(&cache_control).unwrap();
    println!("1-hour cache_control JSON: {}", serde_json::to_string(&json).unwrap());
    assert_eq!(json, json!({
        "type": "ephemeral",
        "ttl": "1h"
    }));
 }
 #[test]
 fn test_message_with_ephemeral_cache_control() {
    let msg = Message::with_cache_control(
        MessageRole::System,
        "System prompt".to_string(),
        CacheControl::ephemeral(),
    );
    let json = serde_json::to_value(&msg).unwrap();
    println!("Message with ephemeral cache_control: {}", serde_json::to_string(&json).unwrap());
    let cache_control = json.get("cache_control").expect("cache_control field should exist");
    assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
    assert!(!cache_control.as_object().unwrap().contains_key("ttl"));
 }
 #[test]
 fn test_message_with_five_minute_cache_control() {
    let msg = Message::with_cache_control(
        MessageRole::System,
        "System prompt".to_string(),
        CacheControl::five_minute(),
    );
    let json = serde_json::to_value(&msg).unwrap();
    println!("Message with 5-minute cache_control: {}", serde_json::to_string(&json).unwrap());
    let cache_control = json.get("cache_control").expect("cache_control field should exist");
    assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
    assert_eq!(cache_control.get("ttl").unwrap(), "5m");
 }
 #[test]
 fn test_message_with_one_hour_cache_control() {
    let msg = Message::with_cache_control(
        MessageRole::System,
        "System prompt".to_string(),
        CacheControl::one_hour(),
    );
    let json = serde_json::to_value(&msg).unwrap();
    println!("Message with 1-hour cache_control: {}", serde_json::to_string(&json).unwrap());
    let cache_control = json.get("cache_control").expect("cache_control field should exist");
    assert_eq!(cache_control.get("type").unwrap(), "ephemeral");
    assert_eq!(cache_control.get("ttl").unwrap(), "1h");
 }
 #[test]
 fn test_message_without_cache_control() {
    let msg = Message::new(MessageRole::User, "Hello".to_string());
    let json = serde_json::to_value(&msg).unwrap();
    println!("Message without cache_control: {}", serde_json::to_string(&json).unwrap());
    // cache_control field should not be present when not set
    assert!(!json.as_object().unwrap().contains_key("cache_control"));
 }
 #[test]
 fn test_cache_control_json_format_ephemeral() {
    let cache_control = CacheControl::ephemeral();
    let json_str = serde_json::to_string(&cache_control).unwrap();
    println!("Ephemeral JSON string: {}", json_str);
    // Verify exact JSON format
    assert_eq!(json_str, r#"{"type":"ephemeral"}"#);
 }
 #[test]
 fn test_cache_control_json_format_five_minute() {
    let cache_control = CacheControl::five_minute();
    let json_str = serde_json::to_string(&cache_control).unwrap();
    println!("5-minute JSON string: {}", json_str);
    // Verify exact JSON format
    assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"5m"}"#);
 }
 #[test]
 fn test_cache_control_json_format_one_hour() {
    let cache_control = CacheControl::one_hour();
    let json_str = serde_json::to_string(&cache_control).unwrap();
    println!("1-hour JSON string: {}", json_str);
    // Verify exact JSON format
    assert_eq!(json_str, r#"{"type":"ephemeral","ttl":"1h"}"#);
 }
 #[test]
 fn test_deserialization_ephemeral() {
    let json_str = r#"{"type":"ephemeral"}"#;
    let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
    assert_eq!(cache_control.ttl, None);
 }
 #[test]
 fn test_deserialization_five_minute() {
    let json_str = r#"{"type":"ephemeral","ttl":"5m"}"#;
    let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
    assert_eq!(cache_control.ttl, Some("5m".to_string()));
 }
 #[test]
 fn test_deserialization_one_hour() {
    let json_str = r#"{"type":"ephemeral","ttl":"1h"}"#;
    let cache_control: CacheControl = serde_json::from_str(json_str).unwrap();
    assert_eq!(cache_control.ttl, Some("1h".to_string()));
 }
--- a/test-ai-requirements.sh
+++ b/test-ai-requirements.sh
@@ -0,0 +1,39 @@
 #!/bin/bash
 # Test script for AI-enhanced interactive requirements mode
 echo "Testing AI-enhanced interactive requirements mode..."
 echo ""
 # Create a test workspace
 TEST_WORKSPACE="/tmp/g3-test-interactive-$(date +%s)"
 mkdir -p "$TEST_WORKSPACE"
 echo "Test workspace: $TEST_WORKSPACE"
 echo ""
 # Create sample brief input
 BRIEF_INPUT="build a calculator cli in rust with basic operations"
 echo "Brief input:"
 echo "---"
 echo "$BRIEF_INPUT"
 echo "---"
 echo ""
 echo "This will:"
 echo "1. Send brief input to AI"
 echo "2. AI generates structured requirements.md"
 echo "3. Show enhanced requirements"
 echo "4. Prompt for confirmation (y/e/n)"
 echo ""
 echo "To test manually, run:"
 echo "cargo run -- --autonomous --interactive-requirements --workspace $TEST_WORKSPACE"
 echo ""
 echo "Then type: $BRIEF_INPUT"
 echo "Press Ctrl+D"
 echo "Review the AI-generated requirements"
 echo "Choose 'y' to proceed, 'e' to edit, or 'n' to cancel"
 echo ""
 echo "Test workspace will be at: $TEST_WORKSPACE"
--- a/test_anthropic_fix.md
+++ b/test_anthropic_fix.md
@@ -0,0 +1,70 @@
 # Anthropic max_tokens Error Fix - Test Plan
 ## Changes Made
 ### 1. Fixed Context Window Size Detection
 - **Problem**: Code used hardcoded 200k limit for Anthropic instead of configured max_tokens
 - **Fix**: Modified `determine_context_length()` to check configured max_tokens first before falling back to defaults
 - **Files**: `crates/g3-core/src/lib.rs` lines 923-945, 967-985
 ### 2. Added Thinning Before Summarization
 - **Problem**: Code attempted summarization even when context window was nearly full
 - **Fix**: Added logic to try thinning first when context usage is between 80-90%
 - **Files**: `crates/g3-core/src/lib.rs` lines 2415-2439
 ### 3. Added Capacity Checks Before Summarization
 - **Problem**: No validation that sufficient tokens remained for summarization
 - **Fix**: Added capacity checks for all provider types with helpful error messages
 - **Files**: `crates/g3-core/src/lib.rs` lines 2480-2520
 ### 4. Improved Error Messages
 - **Problem**: Generic errors when summarization failed
 - **Fix**: Specific error messages suggesting `/thinnify` and `/compact` commands
 - **Files**: Multiple locations in summarization logic
 ### 5. Dynamic Buffer Calculation
 - **Problem**: Fixed 5k buffer regardless of model size
 - **Fix**: Proportional buffer (2.5% of model limit, min 1k, max 10k)
 - **Files**: `crates/g3-core/src/lib.rs` line 2487
 ## Test Cases
 ### Test 1: Configured max_tokens Respected
 ```toml
 # In g3.toml
 [providers.anthropic]
 api_key = "your-key"
 model = "claude-3-5-sonnet-20241022"
 max_tokens = 50000  # Should use this instead of 200k default
 ```
 ### Test 2: Thinning Before Summarization
 - Fill context to 85% capacity
 - Verify thinning is attempted before summarization
 - Check that summarization is skipped if thinning resolves the issue
 ### Test 3: Capacity Error Handling
 - Fill context to 98% capacity
 - Verify helpful error message is shown instead of API error
 - Check that `/thinnify` and `/compact` commands are suggested
 ### Test 4: Provider-Specific Handling
 - Test with different providers (anthropic, databricks, embedded)
 - Verify each uses appropriate capacity checks and buffers
 ## Expected Behavior
 1. **No more max_tokens API errors** from Anthropic when context window is full
 2. **Automatic thinning** when approaching capacity (80-90%)
 3. **Clear error messages** with actionable suggestions when at capacity
 4. **Respect configured limits** instead of hardcoded defaults
 5. **Graceful degradation** with helpful user guidance
 ## Manual Testing Commands
 ```bash
 # Test with small max_tokens to trigger the issue quickly
 g3 --chat
 # Then paste large amounts of text to fill context window
 # Verify thinning and error handling work correctly
 ```
Author	SHA1	Message	Date
Jochen	3f21bdc7b2	fix tests	2025-11-19 12:42:37 +11:00
Jochen	9bffd8b1bf	cache_control removed from databricks	2025-11-19 12:15:49 +11:00
Jochen	bfee8040e9	regression tests added	2025-11-19 11:32:14 +11:00
Jochen	a150ba6a55	adds ttl to cache control	2025-11-18 23:23:49 +11:00
Jochen	296bf5a449	adds cache_control	2025-11-18 22:38:52 +11:00