Remove flock mode (superseded by studio)

Flock mode has been superseded by the studio multi-agent workspace manager. Changes: - Remove g3-ensembles crate entirely - Remove --project, --flock-workspace, --segments, --flock-max-turns CLI flags - Remove run_flock_mode() from autonomous.rs - Remove flock-related tests from cli_integration_test.rs - Update README.md, docs/architecture.md, analysis/memory.md - Delete docs/FLOCK_MODE.md
2026-01-13 15:01:12 +05:30
parent 82c0165765
commit 9a3b03a41f
18 changed files with 9 additions and 3093 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1356,7 +1356,6 @@ dependencies = [
 "g3-computer-control",
 "g3-config",
 "g3-core",
- "g3-ensembles",
 "g3-planner",
 "g3-providers",
 "hex",
@@ -1461,23 +1460,6 @@ dependencies = [
 "walkdir",
 ]

-[[package]]
-name = "g3-ensembles"
-version = "0.1.0"
-dependencies = [
- "anyhow",
- "chrono",
- "clap",
- "g3-config",
- "g3-core",
- "serde",
- "serde_json",
- "tempfile",
- "tokio",
- "tracing",
- "uuid",
-]
-
 [[package]]
 name = "g3-execution"
 version = "0.1.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,7 +7,6 @@ members = [
    "crates/g3-config",
    "crates/g3-execution",
    "crates/g3-computer-control",
-    "crates/g3-ensembles",
    "crates/studio"
 ]
 resolver = "2"
--- a/README.md
+++ b/README.md
@@ -108,7 +108,6 @@ These commands give you fine-grained control over context management, allowing y
  - Window listing and identification
 - **Code Search**: Embedded tree-sitter for syntax-aware code search (Rust, Python, JavaScript, TypeScript, Go, Java, C, C++) - see [Code Search Guide](docs/CODE_SEARCH.md)
 - **Final Output**: Formatted result presentation
- **Flock Mode**: Parallel multi-agent development for large projects - see [Flock Mode Guide](docs/FLOCK_MODE.md)

 ### Provider Flexibility
 - Support for multiple LLM providers through a unified interface
@@ -335,7 +334,6 @@ Detailed documentation is available in the `docs/` directory:
 | [Providers Guide](docs/providers.md) | LLM provider setup and selection guide |
 | [Control Commands](docs/CONTROL_COMMANDS.md) | Interactive `/` commands for context management |
 | [Code Search](docs/CODE_SEARCH.md) | Tree-sitter code search query patterns |
-| [Flock Mode](docs/FLOCK_MODE.md) | Parallel multi-agent development |

 For AI agents working with this codebase, see [AGENTS.md](AGENTS.md).

--- a/analysis/memory.md
+++ b/analysis/memory.md
@@ -47,7 +47,7 @@
 - `crates/g3-cli/src/lib.rs` [837] - `agent.set_agent_mode()` enables agent-specific session tracking

 ### CLI Entry Points and Modes
- `crates/g3-cli/src/lib.rs` [0..140000] - `run()` main entry, `run_agent_mode()`, `run_flock_mode()`, `run_accumulative_mode()`, `run_autonomous()`, `run_interactive()`, `run_interactive_machine()`
+- `crates/g3-cli/src/lib.rs` [0..140000] - `run()` main entry, `run_agent_mode()`, `run_accumulative_mode()`, `run_autonomous()`, `run_interactive()`, `run_interactive_machine()`
 - `crates/g3-cli/src/lib.rs` - `execute_task()` (~line 1990), `execute_task_machine()` (~line 2262) - duplicated retry logic

 ### Retry Infrastructure
@@ -142,7 +142,7 @@ if s.chars().count() <= max_len { ... }
 ### CLI Module Structure (Post-Refactor)
 - `crates/g3-cli/src/lib.rs` [0..415] - Entry point, `run()`, mode dispatch, config loading
 - `crates/g3-cli/src/cli_args.rs` [0..133] - `Cli` struct with clap derive macros, argument parsing
- `crates/g3-cli/src/autonomous.rs` [0..785] - `run_autonomous()`, `run_flock_mode()`, coach-player feedback loop
+- `crates/g3-cli/src/autonomous.rs` [0..785] - `run_autonomous()`, coach-player feedback loop
 - `crates/g3-cli/src/agent_mode.rs` [0..284] - `run_agent_mode()` specialized agent execution
 - `crates/g3-cli/src/accumulative.rs` [0..343] - `run_accumulative_mode()` iterative requirements
 - `crates/g3-cli/src/interactive.rs` [0..851] - `run_interactive()`, `run_interactive_machine()`, REPL with `/` commands
--- a/crates/g3-cli/Cargo.toml
+++ b/crates/g3-cli/Cargo.toml
@@ -11,7 +11,6 @@ g3-planner = { path = "../g3-planner" }
 g3-computer-control = { path = "../g3-computer-control" }
 g3-providers = { path = "../g3-providers" }
 clap = { workspace = true }
-g3-ensembles = { path = "../g3-ensembles" }
 tokio = { workspace = true }
 anyhow = { workspace = true }
 tracing = { workspace = true }
--- a/crates/g3-cli/src/autonomous.rs
+++ b/crates/g3-cli/src/autonomous.rs
@@ -1,4 +1,4 @@
-//! Autonomous mode for G3 CLI - coach-player feedback loop and flock mode.
+//! Autonomous mode for G3 CLI - coach-player feedback loop.

 use anyhow::Result;
 use sha2::{Digest, Sha256};
@@ -694,37 +694,6 @@ fn print_panic_report(
    output.print(&"=".repeat(60));
 }

-/// Run flock mode - parallel multi-agent development
-pub async fn run_flock_mode(
-    project_dir: PathBuf,
-    flock_workspace: PathBuf,
-    num_segments: usize,
-    max_turns: usize,
-) -> Result<()> {
-    let output = SimpleOutput::new();
-
-    output.print("");
-    output.print("🦅 G3 FLOCK MODE - Parallel Multi-Agent Development");
-    output.print("");
-    output.print(&format!("📁 Project: {}", project_dir.display()));
-    output.print(&format!("🗂️  Workspace: {}", flock_workspace.display()));
-    output.print(&format!("🔢 Segments: {}", num_segments));
-    output.print(&format!("🔄 Max Turns per Segment: {}", max_turns));
-    output.print("");
-
-    let config = g3_ensembles::FlockConfig::new(project_dir, flock_workspace, num_segments)?
-        .with_max_turns(max_turns);
-
-    let mut flock = g3_ensembles::FlockMode::new(config)?;
-
-    match flock.run().await {
-        Ok(_) => output.print("\n✅ Flock mode completed successfully"),
-        Err(e) => output.print(&format!("\n❌ Flock mode failed: {}", e)),
-    }
-
-    Ok(())
-}
-
 fn print_final_report(
    output: &SimpleOutput,
    agent: &Agent<ConsoleUiWriter>,
--- a/crates/g3-cli/src/cli_args.rs
+++ b/crates/g3-cli/src/cli_args.rs
@@ -79,22 +79,6 @@ pub struct Cli {
    #[arg(long)]
    pub safari: bool,

-    /// Enable flock mode - parallel multi-agent development
-    #[arg(long, requires = "flock_workspace", requires = "segments")]
-    pub project: Option<PathBuf>,
-
-    /// Flock workspace directory (where segment copies will be created)
-    #[arg(long, requires = "project")]
-    pub flock_workspace: Option<PathBuf>,
-
-    /// Number of segments to partition work into (for flock mode)
-    #[arg(long, requires = "project")]
-    pub segments: Option<usize>,
-
-    /// Maximum turns per segment in flock mode (default: 5)
-    #[arg(long, default_value = "5")]
-    pub flock_max_turns: usize,
-
    /// Enable planning mode for requirements-driven development
    #[arg(long, conflicts_with_all = ["autonomous", "auto", "chat"])]
    pub planning: bool,
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -29,7 +29,7 @@ use clap::Parser;

 use accumulative::run_accumulative_mode;
 use agent_mode::run_agent_mode;
-use autonomous::{run_autonomous, run_flock_mode};
+use autonomous::run_autonomous;
 use interactive::run_interactive;
 use project_files::{combine_project_content, read_agents_config, read_project_memory, read_project_readme};
 use simple_output::SimpleOutput;
@@ -39,19 +39,6 @@ use utils::{initialize_logging, load_config_with_cli_overrides, setup_workspace_
 pub async fn run() -> Result<()> {
    let cli = Cli::parse();

-    // Check if flock mode is enabled
-    if let (Some(project_dir), Some(flock_workspace), Some(num_segments)) =
-        (&cli.project, &cli.flock_workspace, cli.segments)
-    {
-        return run_flock_mode(
-            project_dir.clone(),
-            flock_workspace.clone(),
-            num_segments,
-            cli.flock_max_turns,
-        )
-        .await;
-    }
-
    if cli.codebase_fast_start.is_some() {
        print!("codebase_fast_start is temporarily disabled.");
        std::process::exit(1);
--- a/crates/g3-cli/tests/cli_integration_test.rs
+++ b/crates/g3-cli/tests/cli_integration_test.rs
@@ -173,38 +173,6 @@ fn test_planning_conflicts_with_autonomous() {
    );
 }

-// =============================================================================
-// Test: Flock mode requires all related flags
-// =============================================================================
-
-#[test]
-fn test_flock_mode_requires_workspace() {
-    let output = Command::new(get_g3_binary())
-        .args(["--project", "/tmp/test"])
-        .output()
-        .expect("Failed to execute g3 with incomplete flock args");
-
-    // Should fail because --flock-workspace and --segments are required
-    assert!(
-        !output.status.success(),
-        "--project without --flock-workspace should fail"
-    );
-}
-
-#[test]
-fn test_flock_mode_requires_segments() {
-    let output = Command::new(get_g3_binary())
-        .args(["--project", "/tmp/test", "--flock-workspace", "/tmp/ws"])
-        .output()
-        .expect("Failed to execute g3 with incomplete flock args");
-
-    // Should fail because --segments is required
-    assert!(
-        !output.status.success(),
-        "--project without --segments should fail"
-    );
-}
-
 // =============================================================================
 // Test: Workspace directory option is accepted
 // =============================================================================
--- a/crates/g3-ensembles/Cargo.toml
+++ b/crates/g3-ensembles/Cargo.toml
@@ -1,20 +0,0 @@
-[package]
-name = "g3-ensembles"
-version = "0.1.0"
-edition = "2021"
-description = "Multi-agent ensemble functionality for G3"
-
-[dependencies]
-g3-core = { path = "../g3-core" }
-g3-config = { path = "../g3-config" }
-clap = { workspace = true }
-tokio = { workspace = true }
-anyhow = { workspace = true }
-tracing = { workspace = true }
-serde = { workspace = true }
-serde_json = { workspace = true }
-chrono = { version = "0.4", features = ["serde"] }
-uuid = { workspace = true }
-
-[dev-dependencies]
-tempfile = "3.8"
--- a/crates/g3-ensembles/TESTING.md
+++ b/crates/g3-ensembles/TESTING.md
@@ -1,422 +0,0 @@
-# G3 Ensembles Testing Documentation
-
-This document describes the comprehensive test suite for the g3-ensembles crate (Flock Mode).
-
-## Test Coverage
-
-### Unit Tests (`src/tests.rs`)
-
-Unit tests cover the core data structures and logic:
-
-#### Status Module Tests
-
-1. **`test_segment_state_display`**
-   - Verifies that `SegmentState` enum displays correctly with emojis
-   - Tests all states: Pending, Running, Completed, Failed, Cancelled
-
-2. **`test_flock_status_creation`**
-   - Tests creation of `FlockStatus` with correct initial values
-   - Verifies session ID, segment count, and zero metrics
-
-3. **`test_segment_status_update`**
-   - Tests updating a single segment's status
-   - Verifies metrics are correctly aggregated
-
-4. **`test_multiple_segment_updates`**
-   - Tests updating multiple segments
-   - Verifies aggregate metrics (tokens, tool calls, errors) are summed correctly
-
-5. **`test_is_complete`**
-   - Tests the completion detection logic
-   - Verifies that flock is only complete when all segments are in terminal states
-   - Tests various scenarios: no segments, partial completion, full completion
-
-6. **`test_count_by_state`**
-   - Tests counting segments by their state
-   - Verifies correct counts for each state type
-
-7. **`test_status_serialization`**
-   - Tests JSON serialization and deserialization
-   - Verifies round-trip conversion preserves all data
-
-8. **`test_report_generation`**
-   - Tests the comprehensive report generation
-   - Verifies all expected sections are present
-   - Checks that metrics are correctly displayed
-
-**Run unit tests:**
-```bash
-cargo test -p g3-ensembles --lib
-```
-
-### Integration Tests (`tests/integration_tests.rs`)
-
-Integration tests verify end-to-end functionality with real file system and git operations:
-
-#### Configuration Tests
-
-1. **`test_flock_config_validation`**
-   - Tests validation of project directory requirements
-   - Verifies error messages for:
-     - Non-existent directory
-     - Non-git repository
-     - Missing flock-requirements.md
-   - Verifies successful creation with valid inputs
-
-2. **`test_flock_config_builder`**
-   - Tests the builder pattern for `FlockConfig`
-   - Verifies `with_max_turns()` and `with_g3_binary()` methods
-
-3. **`test_workspace_creation`**
-   - Tests creation of `FlockMode` instance
-   - Verifies project structure is valid
-
-#### Git Operations Tests
-
-4. **`test_git_clone_functionality`**
-   - Tests git cloning of project repository
-   - Verifies cloned repository structure:
-     - `.git` directory exists
-     - All files are present
-     - Git history is preserved
-
-5. **`test_multiple_segment_clones`**
-   - Tests cloning multiple segments (2 segments)
-   - Verifies each segment is independent
-   - Tests that modifications in one segment don't affect others
-
-6. **`test_git_repo_independence`**
-   - Comprehensive test of segment independence
-   - Creates commits in different segments
-   - Verifies git histories diverge correctly
-   - Ensures files in one segment don't appear in others
-
-#### Segment Management Tests
-
-7. **`test_segment_requirements_creation`**
-   - Tests creation of `segment-requirements.md` files
-   - Verifies content is written correctly
-
-8. **`test_requirements_file_content`**
-   - Tests the structure of flock-requirements.md
-   - Verifies content contains expected sections
-
-#### Status File Tests
-
-9. **`test_status_file_operations`**
-   - Tests saving and loading `flock-status.json`
-   - Verifies JSON serialization to file
-   - Tests deserialization from file
-
-#### JSON Processing Tests
-
-10. **`test_json_extraction`**
-    - Tests extraction of JSON arrays from text output
-    - Verifies handling of various formats:
-      - Plain JSON
-      - JSON in markdown code blocks
-      - JSON with surrounding text
-      - Invalid input (no JSON)
-
-11. **`test_partition_json_parsing`**
-    - Tests parsing of partition JSON structure
-    - Verifies module names, requirements, and dependencies are extracted correctly
-
-**Run integration tests:**
-```bash
-cargo test -p g3-ensembles --test integration_tests
-```
-
-### End-to-End Test Script (`scripts/test-flock-mode.sh`)
-
-A comprehensive bash script that tests the complete flock mode workflow:
-
-#### Test Scenarios
-
-1. **Project Creation**
-   - Creates a temporary test project
-   - Initializes git repository
-   - Creates flock-requirements.md with realistic content
-   - Makes initial commit
-
-2. **Project Structure Validation**
-   - Verifies `.git` directory exists
-   - Verifies `flock-requirements.md` exists
-
-3. **Git Operations**
-   - Tests cloning project to segment directories
-   - Verifies cloned repositories are valid
-   - Tests git log to ensure history is preserved
-
-4. **Segment Independence**
-   - Creates two segments
-   - Modifies one segment
-   - Verifies other segment is unaffected
-
-5. **Segment Requirements**
-   - Creates `segment-requirements.md` in segments
-   - Verifies content is written correctly
-
-6. **Status File Operations**
-   - Creates `flock-status.json`
-   - Validates JSON structure (if `jq` is available)
-
-**Run end-to-end test:**
-```bash
-./scripts/test-flock-mode.sh
-```
-
-## Test Results
-
-### Current Status
-
-✅ **All tests passing**
-
- **Unit tests**: 8/8 passed
- **Integration tests**: 11/11 passed
- **End-to-end test**: All scenarios passed
-
-### Test Execution Time
-
- Unit tests: ~0.01s
- Integration tests: ~0.35s (includes git operations)
- End-to-end test: ~1-2s (includes cleanup)
-
-## Running All Tests
-
-### Run all tests for g3-ensembles:
-```bash
-cargo test -p g3-ensembles
-```
-
-### Run with verbose output:
-```bash
-cargo test -p g3-ensembles -- --nocapture
-```
-
-### Run specific test:
-```bash
-cargo test -p g3-ensembles test_git_clone_functionality
-```
-
-### Run tests with coverage (requires cargo-tarpaulin):
-```bash
-cargo tarpaulin -p g3-ensembles
-```
-
-## Test Helpers
-
-### `create_test_project(name: &str) -> TempDir`
-
-Helper function in integration tests that creates a complete test project:
- Initializes git repository
- Configures git user
- Creates flock-requirements.md with two modules
- Creates README.md
- Makes initial commit
- Returns `TempDir` that auto-cleans on drop
-
-**Usage:**
-```rust
-let project_dir = create_test_project("my-test");
-// Use project_dir.path() to access the directory
-// Automatically cleaned up when project_dir goes out of scope
-```
-
-### `extract_json_array(output: &str) -> Option<String>`
-
-Helper function that extracts JSON arrays from text output:
- Finds first `[` and last `]`
- Returns content between them
- Returns `None` if no valid JSON array found
-
-## Test Data
-
-### Sample Requirements
-
-The test suite uses realistic requirements for a calculator project:
-
-**Module A: Core Library**
- Arithmetic operations (add, sub, mul, div)
- Error handling for division by zero
- Unit tests
- Documentation
-
-**Module B: CLI Application**
- Command-line interface using clap
- Subcommands for each operation
- User-friendly output
- Error handling
-
-This structure tests the partitioning logic with:
- Clear module boundaries
- Dependency relationship (CLI depends on Core)
- Realistic implementation requirements
-
-## Continuous Integration
-
-To integrate these tests into CI/CD:
-
-### GitHub Actions Example
-
-```yaml
-name: Test G3 Ensembles
-
-on: [push, pull_request]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v2
-      - uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-      - name: Run unit tests
-        run: cargo test -p g3-ensembles --lib
-      - name: Run integration tests
-        run: cargo test -p g3-ensembles --test integration_tests
-      - name: Run end-to-end test
-        run: ./scripts/test-flock-mode.sh
-```
-
-## Test Coverage Goals
-
-### Current Coverage
-
- ✅ Status data structures: 100%
- ✅ Configuration validation: 100%
- ✅ Git operations: 100%
- ✅ Segment independence: 100%
- ✅ JSON processing: 100%
- ⚠️  Full flock execution: Requires LLM access (tested manually)
-
-### Future Test Additions
-
-1. **Mock LLM Tests**
-   - Mock the partitioning agent response
-   - Test full flock workflow without real LLM calls
-
-2. **Performance Tests**
-   - Test with large numbers of segments (10+)
-   - Measure memory usage
-   - Test concurrent segment execution
-
-3. **Error Handling Tests**
-   - Test behavior when git operations fail
-   - Test behavior when segments fail
-   - Test recovery scenarios
-
-4. **Edge Cases**
-   - Empty requirements file
-   - Single segment (degenerate case)
-   - Very large requirements file
-   - Binary files in project
-
-## Debugging Tests
-
-### Enable debug logging:
-```bash
-RUST_LOG=debug cargo test -p g3-ensembles -- --nocapture
-```
-
-### Keep test artifacts:
-```bash
-# Modify test to not cleanup
-# Or inspect TEST_DIR before cleanup in end-to-end test
-export TEST_DIR=/tmp/my-test
-./scripts/test-flock-mode.sh
-ls -la $TEST_DIR
-```
-
-### Run single test with backtrace:
-```bash
-RUST_BACKTRACE=1 cargo test -p g3-ensembles test_git_clone_functionality -- --nocapture
-```
-
-## Contributing Tests
-
-When adding new features to g3-ensembles:
-
-1. **Add unit tests** for new data structures and logic
-2. **Add integration tests** for new file/git operations
-3. **Update end-to-end test** if workflow changes
-4. **Document tests** in this file
-5. **Ensure all tests pass** before submitting PR
-
-### Test Naming Convention
-
- Unit tests: `test_<functionality>`
- Integration tests: `test_<feature>_<scenario>`
- Use descriptive names that explain what is being tested
-
-### Test Structure
-
-```rust
-#[test]
-fn test_feature_name() {
-    // Arrange: Set up test data
-    let data = create_test_data();
-    
-    // Act: Perform the operation
-    let result = perform_operation(data);
-    
-    // Assert: Verify the result
-    assert_eq!(result, expected_value);
-    assert!(result.is_ok());
-}
-```
-
-## Troubleshooting
-
-### Tests fail with "git not found"
-
-**Solution**: Install git:
-```bash
-# macOS
-brew install git
-
-# Ubuntu/Debian
-sudo apt-get install git
-
-# Windows
-choco install git
-```
-
-### Tests fail with permission errors
-
-**Solution**: Ensure test directories are writable:
-```bash
-chmod -R u+w /tmp
-```
-
-### Integration tests are slow
-
-**Cause**: Git operations and file I/O take time
-
-**Solution**: Run only unit tests for quick feedback:
-```bash
-cargo test -p g3-ensembles --lib
-```
-
-### Test artifacts not cleaned up
-
-**Cause**: Test panicked before cleanup
-
-**Solution**: Manually clean temp directories:
-```bash
-rm -rf /tmp/tmp.*
-```
-
-## Summary
-
-The g3-ensembles test suite provides comprehensive coverage of:
- ✅ Core data structures and logic
- ✅ Configuration validation
- ✅ Git repository operations
- ✅ Segment independence
- ✅ Status tracking and reporting
- ✅ JSON processing
- ✅ End-to-end workflow
-
-All tests are automated, fast, and reliable. The test suite ensures that flock mode works correctly across different scenarios and edge cases.
--- a/crates/g3-ensembles/src/flock.rs
+++ b/crates/g3-ensembles/src/flock.rs
--- a/crates/g3-ensembles/src/lib.rs
+++ b/crates/g3-ensembles/src/lib.rs
@@ -1,12 +0,0 @@
-//! G3 Ensembles - Multi-agent ensemble functionality
-//!
-//! This crate provides functionality for running multiple G3 agents in coordination,
-//! enabling parallel development across different architectural modules.
-
-pub mod flock;
-pub mod status;
-mod tests;
-
-/// Re-export main types for convenience
-pub use flock::{FlockConfig, FlockMode};
-pub use status::{FlockStatus, SegmentStatus};
--- a/crates/g3-ensembles/src/status.rs
+++ b/crates/g3-ensembles/src/status.rs
@@ -1,270 +0,0 @@
-//! Status tracking for flock mode
-
-use chrono::{DateTime, Utc};
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::path::PathBuf;
-
-/// Status of an individual segment worker
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SegmentStatus {
-    /// Segment number
-    pub segment_id: usize,
-
-    /// Segment workspace directory
-    pub workspace: PathBuf,
-
-    /// Current state of the segment
-    pub state: SegmentState,
-
-    /// Start time
-    pub started_at: DateTime<Utc>,
-
-    /// Completion time (if finished)
-    pub completed_at: Option<DateTime<Utc>>,
-
-    /// Total tokens used
-    pub tokens_used: u64,
-
-    /// Number of tool calls made
-    pub tool_calls: u64,
-
-    /// Number of errors encountered
-    pub errors: u64,
-
-    /// Current turn number (for autonomous mode)
-    pub current_turn: usize,
-
-    /// Maximum turns allowed
-    pub max_turns: usize,
-
-    /// Last status message
-    pub last_message: Option<String>,
-
-    /// Error message (if failed)
-    pub error_message: Option<String>,
-}
-
-/// State of a segment worker
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub enum SegmentState {
-    /// Waiting to start
-    Pending,
-
-    /// Currently running
-    Running,
-
-    /// Completed successfully
-    Completed,
-
-    /// Failed with error
-    Failed,
-
-    /// Cancelled by user
-    Cancelled,
-}
-
-impl std::fmt::Display for SegmentState {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            SegmentState::Pending => write!(f, "⏳ Pending"),
-            SegmentState::Running => write!(f, "🔄 Running"),
-            SegmentState::Completed => write!(f, "✅ Completed"),
-            SegmentState::Failed => write!(f, "❌ Failed"),
-            SegmentState::Cancelled => write!(f, "⚠️  Cancelled"),
-        }
-    }
-}
-
-/// Overall flock status
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FlockStatus {
-    /// Flock session ID
-    pub session_id: String,
-
-    /// Project directory
-    pub project_dir: PathBuf,
-
-    /// Flock workspace directory
-    pub flock_workspace: PathBuf,
-
-    /// Number of segments
-    pub num_segments: usize,
-
-    /// Start time
-    pub started_at: DateTime<Utc>,
-
-    /// Completion time (if finished)
-    pub completed_at: Option<DateTime<Utc>>,
-
-    /// Status of each segment
-    pub segments: HashMap<usize, SegmentStatus>,
-
-    /// Total tokens used across all segments
-    pub total_tokens: u64,
-
-    /// Total tool calls across all segments
-    pub total_tool_calls: u64,
-
-    /// Total errors across all segments
-    pub total_errors: u64,
-}
-
-impl FlockStatus {
-    /// Create a new flock status
-    pub fn new(
-        session_id: String,
-        project_dir: PathBuf,
-        flock_workspace: PathBuf,
-        num_segments: usize,
-    ) -> Self {
-        Self {
-            session_id,
-            project_dir,
-            flock_workspace,
-            num_segments,
-            started_at: Utc::now(),
-            completed_at: None,
-            segments: HashMap::new(),
-            total_tokens: 0,
-            total_tool_calls: 0,
-            total_errors: 0,
-        }
-    }
-
-    /// Update segment status
-    pub fn update_segment(&mut self, segment_id: usize, status: SegmentStatus) {
-        self.segments.insert(segment_id, status);
-        self.recalculate_totals();
-    }
-
-    /// Recalculate total metrics
-    fn recalculate_totals(&mut self) {
-        self.total_tokens = self.segments.values().map(|s| s.tokens_used).sum();
-        self.total_tool_calls = self.segments.values().map(|s| s.tool_calls).sum();
-        self.total_errors = self.segments.values().map(|s| s.errors).sum();
-    }
-
-    /// Check if all segments are complete
-    pub fn is_complete(&self) -> bool {
-        self.segments.len() == self.num_segments
-            && self.segments.values().all(|s| {
-                matches!(
-                    s.state,
-                    SegmentState::Completed | SegmentState::Failed | SegmentState::Cancelled
-                )
-            })
-    }
-
-    /// Get count of segments by state
-    pub fn count_by_state(&self, state: SegmentState) -> usize {
-        self.segments.values().filter(|s| s.state == state).count()
-    }
-
-    /// Save status to file
-    pub fn save_to_file(&self, path: &PathBuf) -> anyhow::Result<()> {
-        let json = serde_json::to_string_pretty(self)?;
-        std::fs::write(path, json)?;
-        Ok(())
-    }
-
-    /// Load status from file
-    pub fn load_from_file(path: &PathBuf) -> anyhow::Result<Self> {
-        let json = std::fs::read_to_string(path)?;
-        let status = serde_json::from_str(&json)?;
-        Ok(status)
-    }
-
-    /// Generate a summary report
-    pub fn generate_report(&self) -> String {
-        let mut report = String::new();
-
-        report.push_str(&format!("\n{}", "=".repeat(80)));
-        report.push_str(&format!("\n📊 FLOCK MODE SESSION REPORT"));
-        report.push_str(&format!("\n{}", "=".repeat(80)));
-
-        report.push_str(&format!("\n\n🆔 Session ID: {}", self.session_id));
-        report.push_str(&format!("\n📁 Project: {}", self.project_dir.display()));
-        report.push_str(&format!(
-            "\n🗂️  Workspace: {}",
-            self.flock_workspace.display()
-        ));
-        report.push_str(&format!("\n🔢 Segments: {}", self.num_segments));
-
-        let duration = if let Some(completed) = self.completed_at {
-            completed.signed_duration_since(self.started_at)
-        } else {
-            Utc::now().signed_duration_since(self.started_at)
-        };
-
-        report.push_str(&format!(
-            "\n⏱️  Duration: {:.2}s",
-            duration.num_milliseconds() as f64 / 1000.0
-        ));
-
-        // Segment status summary
-        report.push_str(&format!("\n\n📈 Segment Status:"));
-        report.push_str(&format!(
-            "\n   • Completed: {}",
-            self.count_by_state(SegmentState::Completed)
-        ));
-        report.push_str(&format!(
-            "\n   • Running: {}",
-            self.count_by_state(SegmentState::Running)
-        ));
-        report.push_str(&format!(
-            "\n   • Failed: {}",
-            self.count_by_state(SegmentState::Failed)
-        ));
-        report.push_str(&format!(
-            "\n   • Pending: {}",
-            self.count_by_state(SegmentState::Pending)
-        ));
-        report.push_str(&format!(
-            "\n   • Cancelled: {}",
-            self.count_by_state(SegmentState::Cancelled)
-        ));
-
-        // Metrics
-        report.push_str(&format!("\n\n📊 Aggregate Metrics:"));
-        report.push_str(&format!("\n   • Total Tokens: {}", self.total_tokens));
-        report.push_str(&format!(
-            "\n   • Total Tool Calls: {}",
-            self.total_tool_calls
-        ));
-        report.push_str(&format!("\n   • Total Errors: {}", self.total_errors));
-
-        // Per-segment details
-        report.push_str(&format!("\n\n🔍 Segment Details:"));
-        let mut segments: Vec<_> = self.segments.iter().collect();
-        segments.sort_by_key(|(id, _)| *id);
-
-        for (id, segment) in segments {
-            report.push_str(&format!("\n\n   Segment {}:", id));
-            report.push_str(&format!("\n      Status: {}", segment.state));
-            report.push_str(&format!(
-                "\n      Workspace: {}",
-                segment.workspace.display()
-            ));
-            report.push_str(&format!("\n      Tokens: {}", segment.tokens_used));
-            report.push_str(&format!("\n      Tool Calls: {}", segment.tool_calls));
-            report.push_str(&format!("\n      Errors: {}", segment.errors));
-            report.push_str(&format!(
-                "\n      Turn: {}/{}",
-                segment.current_turn, segment.max_turns
-            ));
-
-            if let Some(ref msg) = segment.last_message {
-                report.push_str(&format!("\n      Last Message: {}", msg));
-            }
-
-            if let Some(ref err) = segment.error_message {
-                report.push_str(&format!("\n      Error: {}", err));
-            }
-        }
-
-        report.push_str(&format!("\n\n{}", "=".repeat(80)));
-
-        report
-    }
-}
--- a/crates/g3-ensembles/src/tests.rs
+++ b/crates/g3-ensembles/src/tests.rs
@@ -1,330 +0,0 @@
-//! Unit tests for g3-ensembles
-
-#[cfg(test)]
-mod tests {
-    use crate::status::{FlockStatus, SegmentState, SegmentStatus};
-    use chrono::Utc;
-    use std::path::PathBuf;
-
-    #[test]
-    fn test_segment_state_display() {
-        assert_eq!(format!("{}", SegmentState::Pending), "⏳ Pending");
-        assert_eq!(format!("{}", SegmentState::Running), "🔄 Running");
-        assert_eq!(format!("{}", SegmentState::Completed), "✅ Completed");
-        assert_eq!(format!("{}", SegmentState::Failed), "❌ Failed");
-        assert_eq!(format!("{}", SegmentState::Cancelled), "⚠️  Cancelled");
-    }
-
-    #[test]
-    fn test_flock_status_creation() {
-        let status = FlockStatus::new(
-            "test-session".to_string(),
-            PathBuf::from("/test/project"),
-            PathBuf::from("/test/workspace"),
-            3,
-        );
-
-        assert_eq!(status.session_id, "test-session");
-        assert_eq!(status.num_segments, 3);
-        assert_eq!(status.segments.len(), 0);
-        assert_eq!(status.total_tokens, 0);
-        assert_eq!(status.total_tool_calls, 0);
-        assert_eq!(status.total_errors, 0);
-        assert!(status.completed_at.is_none());
-    }
-
-    #[test]
-    fn test_segment_status_update() {
-        let mut status = FlockStatus::new(
-            "test-session".to_string(),
-            PathBuf::from("/test/project"),
-            PathBuf::from("/test/workspace"),
-            2,
-        );
-
-        let segment1 = SegmentStatus {
-            segment_id: 1,
-            workspace: PathBuf::from("/test/workspace/segment-1"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 1000,
-            tool_calls: 50,
-            errors: 2,
-            current_turn: 5,
-            max_turns: 10,
-            last_message: Some("Done".to_string()),
-            error_message: None,
-        };
-
-        status.update_segment(1, segment1);
-
-        assert_eq!(status.segments.len(), 1);
-        assert_eq!(status.total_tokens, 1000);
-        assert_eq!(status.total_tool_calls, 50);
-        assert_eq!(status.total_errors, 2);
-    }
-
-    #[test]
-    fn test_multiple_segment_updates() {
-        let mut status = FlockStatus::new(
-            "test-session".to_string(),
-            PathBuf::from("/test/project"),
-            PathBuf::from("/test/workspace"),
-            2,
-        );
-
-        let segment1 = SegmentStatus {
-            segment_id: 1,
-            workspace: PathBuf::from("/test/workspace/segment-1"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 1000,
-            tool_calls: 50,
-            errors: 2,
-            current_turn: 5,
-            max_turns: 10,
-            last_message: Some("Done".to_string()),
-            error_message: None,
-        };
-
-        let segment2 = SegmentStatus {
-            segment_id: 2,
-            workspace: PathBuf::from("/test/workspace/segment-2"),
-            state: SegmentState::Failed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 500,
-            tool_calls: 25,
-            errors: 5,
-            current_turn: 3,
-            max_turns: 10,
-            last_message: Some("Error".to_string()),
-            error_message: Some("Test error".to_string()),
-        };
-
-        status.update_segment(1, segment1);
-        status.update_segment(2, segment2);
-
-        assert_eq!(status.segments.len(), 2);
-        assert_eq!(status.total_tokens, 1500);
-        assert_eq!(status.total_tool_calls, 75);
-        assert_eq!(status.total_errors, 7);
-    }
-
-    #[test]
-    fn test_is_complete() {
-        let mut status = FlockStatus::new(
-            "test-session".to_string(),
-            PathBuf::from("/test/project"),
-            PathBuf::from("/test/workspace"),
-            2,
-        );
-
-        // Not complete - no segments
-        assert!(!status.is_complete());
-
-        // Add one completed segment
-        let segment1 = SegmentStatus {
-            segment_id: 1,
-            workspace: PathBuf::from("/test/workspace/segment-1"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 1000,
-            tool_calls: 50,
-            errors: 0,
-            current_turn: 5,
-            max_turns: 10,
-            last_message: None,
-            error_message: None,
-        };
-        status.update_segment(1, segment1);
-
-        // Still not complete - only 1 of 2 segments
-        assert!(!status.is_complete());
-
-        // Add second segment (running)
-        let segment2 = SegmentStatus {
-            segment_id: 2,
-            workspace: PathBuf::from("/test/workspace/segment-2"),
-            state: SegmentState::Running,
-            started_at: Utc::now(),
-            completed_at: None,
-            tokens_used: 500,
-            tool_calls: 25,
-            errors: 0,
-            current_turn: 3,
-            max_turns: 10,
-            last_message: None,
-            error_message: None,
-        };
-        status.update_segment(2, segment2);
-
-        // Still not complete - segment 2 is running
-        assert!(!status.is_complete());
-
-        // Update segment 2 to completed
-        let segment2_done = SegmentStatus {
-            segment_id: 2,
-            workspace: PathBuf::from("/test/workspace/segment-2"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 500,
-            tool_calls: 25,
-            errors: 0,
-            current_turn: 5,
-            max_turns: 10,
-            last_message: None,
-            error_message: None,
-        };
-        status.update_segment(2, segment2_done);
-
-        // Now complete
-        assert!(status.is_complete());
-    }
-
-    #[test]
-    fn test_count_by_state() {
-        let mut status = FlockStatus::new(
-            "test-session".to_string(),
-            PathBuf::from("/test/project"),
-            PathBuf::from("/test/workspace"),
-            3,
-        );
-
-        let segment1 = SegmentStatus {
-            segment_id: 1,
-            workspace: PathBuf::from("/test/workspace/segment-1"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 1000,
-            tool_calls: 50,
-            errors: 0,
-            current_turn: 5,
-            max_turns: 10,
-            last_message: None,
-            error_message: None,
-        };
-
-        let segment2 = SegmentStatus {
-            segment_id: 2,
-            workspace: PathBuf::from("/test/workspace/segment-2"),
-            state: SegmentState::Failed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 500,
-            tool_calls: 25,
-            errors: 5,
-            current_turn: 3,
-            max_turns: 10,
-            last_message: None,
-            error_message: Some("Error".to_string()),
-        };
-
-        let segment3 = SegmentStatus {
-            segment_id: 3,
-            workspace: PathBuf::from("/test/workspace/segment-3"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 800,
-            tool_calls: 40,
-            errors: 1,
-            current_turn: 4,
-            max_turns: 10,
-            last_message: None,
-            error_message: None,
-        };
-
-        status.update_segment(1, segment1);
-        status.update_segment(2, segment2);
-        status.update_segment(3, segment3);
-
-        assert_eq!(status.count_by_state(SegmentState::Completed), 2);
-        assert_eq!(status.count_by_state(SegmentState::Failed), 1);
-        assert_eq!(status.count_by_state(SegmentState::Running), 0);
-        assert_eq!(status.count_by_state(SegmentState::Pending), 0);
-    }
-
-    #[test]
-    fn test_status_serialization() {
-        let mut status = FlockStatus::new(
-            "test-session".to_string(),
-            PathBuf::from("/test/project"),
-            PathBuf::from("/test/workspace"),
-            1,
-        );
-
-        let segment1 = SegmentStatus {
-            segment_id: 1,
-            workspace: PathBuf::from("/test/workspace/segment-1"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 1000,
-            tool_calls: 50,
-            errors: 2,
-            current_turn: 5,
-            max_turns: 10,
-            last_message: Some("Done".to_string()),
-            error_message: None,
-        };
-
-        status.update_segment(1, segment1);
-
-        // Serialize to JSON
-        let json = serde_json::to_string(&status).expect("Failed to serialize");
-        assert!(json.contains("test-session"));
-        assert!(json.contains("segment_id"));
-        assert!(json.contains("Completed"));
-
-        // Deserialize back
-        let deserialized: FlockStatus = serde_json::from_str(&json).expect("Failed to deserialize");
-        assert_eq!(deserialized.session_id, "test-session");
-        assert_eq!(deserialized.segments.len(), 1);
-        assert_eq!(deserialized.total_tokens, 1000);
-    }
-
-    #[test]
-    fn test_report_generation() {
-        let mut status = FlockStatus::new(
-            "test-session".to_string(),
-            PathBuf::from("/test/project"),
-            PathBuf::from("/test/workspace"),
-            2,
-        );
-
-        let segment1 = SegmentStatus {
-            segment_id: 1,
-            workspace: PathBuf::from("/test/workspace/segment-1"),
-            state: SegmentState::Completed,
-            started_at: Utc::now(),
-            completed_at: Some(Utc::now()),
-            tokens_used: 1000,
-            tool_calls: 50,
-            errors: 2,
-            current_turn: 5,
-            max_turns: 10,
-            last_message: Some("Done".to_string()),
-            error_message: None,
-        };
-
-        status.update_segment(1, segment1);
-
-        let report = status.generate_report();
-
-        // Check that report contains expected sections
-        assert!(report.contains("FLOCK MODE SESSION REPORT"));
-        assert!(report.contains("test-session"));
-        assert!(report.contains("Segment Status:"));
-        assert!(report.contains("Aggregate Metrics:"));
-        assert!(report.contains("Segment Details:"));
-        assert!(report.contains("Total Tokens: 1000"));
-        assert!(report.contains("Total Tool Calls: 50"));
-        assert!(report.contains("Total Errors: 2"));
-    }
-}
--- a/crates/g3-ensembles/tests/integration_tests.rs
+++ b/crates/g3-ensembles/tests/integration_tests.rs
@@ -1,492 +0,0 @@
-//! Integration tests for g3-ensembles flock mode
-
-use g3_ensembles::{FlockConfig, FlockMode};
-use std::fs;
-use std::path::PathBuf;
-use std::process::Command;
-use tempfile::TempDir;
-
-/// Create a test config file with the new format
-fn create_test_config(temp_dir: &TempDir) -> PathBuf {
-    let config_path = temp_dir.path().join(".g3.toml");
-    let config_content = r#"
-[providers]
-default_provider = "databricks.default"
-
-[providers.databricks.default]
-host = "https://test.databricks.com"
-token = "test-token"
-model = "test-model"
-
-[agent]
-fallback_default_max_tokens = 8192
-enable_streaming = true
-timeout_seconds = 60
-auto_compact = true
-max_retry_attempts = 3
-autonomous_max_retry_attempts = 6
-
-[computer_control]
-enabled = false
-require_confirmation = true
-max_actions_per_second = 10
-
-[webdriver]
-enabled = false
-safari_port = 4444
-
-[macax]
-enabled = false
-"#;
-    fs::write(&config_path, config_content).expect("Failed to write config");
-    config_path
-}
-
-/// Helper to create a test git repository with flock-requirements.md
-fn create_test_project(name: &str) -> TempDir {
-    let temp_dir = TempDir::new().expect("Failed to create temp dir");
-    let project_path = temp_dir.path();
-
-    // Initialize git repo
-    let output = Command::new("git")
-        .arg("init")
-        .current_dir(project_path)
-        .output()
-        .expect("Failed to run git init");
-    assert!(output.status.success(), "git init failed");
-
-    // Configure git user (required for commits)
-    Command::new("git")
-        .args(["config", "user.email", "test@example.com"])
-        .current_dir(project_path)
-        .output()
-        .expect("Failed to configure git email");
-
-    Command::new("git")
-        .args(["config", "user.name", "Test User"])
-        .current_dir(project_path)
-        .output()
-        .expect("Failed to configure git name");
-
-    // Create flock-requirements.md
-    let requirements = format!(
-        "# {} Test Project\n\n\
-        ## Module A\n\
-        - Create a simple Rust library\n\
-        - Add a function that returns \"Hello from Module A\"\n\
-        - Write a unit test for the function\n\n\
-        ## Module B\n\
-        - Create another Rust library\n\
-        - Add a function that returns \"Hello from Module B\"\n\
-        - Write a unit test for the function\n",
-        name
-    );
-
-    fs::write(project_path.join("flock-requirements.md"), requirements)
-        .expect("Failed to write requirements");
-
-    // Create a simple README
-    fs::write(project_path.join("README.md"), format!("# {}\n", name))
-        .expect("Failed to write README");
-
-    // Create initial commit
-    Command::new("git")
-        .args(["add", "."])
-        .current_dir(project_path)
-        .output()
-        .expect("Failed to git add");
-
-    let output = Command::new("git")
-        .args(["commit", "-m", "Initial commit"])
-        .current_dir(project_path)
-        .output()
-        .expect("Failed to git commit");
-    assert!(output.status.success(), "git commit failed");
-
-    temp_dir
-}
-
-#[test]
-fn test_flock_config_validation() {
-    let temp_dir = TempDir::new().unwrap();
-    let config_path = create_test_config(&temp_dir);
-    let project_path = temp_dir.path().to_path_buf();
-    let workspace_path = temp_dir.path().join("workspace");
-
-    // Should fail - not a git repo
-    let result = FlockConfig::new_with_config(
-        project_path.clone(), workspace_path.clone(), 2,
-        Some(config_path.to_str().unwrap()));
-    assert!(result.is_err());
-    assert!(result
-        .unwrap_err()
-        .to_string()
-        .contains("must be a git repository"));
-
-    // Initialize git repo
-    Command::new("git")
-        .arg("init")
-        .current_dir(&project_path)
-        .output()
-        .expect("Failed to run git init");
-
-    // Should fail - no flock-requirements.md
-    let result = FlockConfig::new_with_config(
-        project_path.clone(), workspace_path.clone(), 2,
-        Some(config_path.to_str().unwrap()));
-    assert!(result.is_err());
-    assert!(result
-        .unwrap_err()
-        .to_string()
-        .contains("flock-requirements.md"));
-
-    // Create flock-requirements.md
-    fs::write(project_path.join("flock-requirements.md"), "# Test\n")
-        .expect("Failed to write requirements");
-
-    // Should succeed now
-    let result = FlockConfig::new_with_config(
-        project_path, workspace_path, 2,
-        Some(config_path.to_str().unwrap()));
-    assert!(result.is_ok());
-}
-
-#[test]
-fn test_flock_config_builder() {
-    let project_dir = create_test_project("builder-test");
-    let workspace_dir = TempDir::new().unwrap();
-    let config_path = create_test_config(&workspace_dir);
-
-    let config = FlockConfig::new_with_config(
-        project_dir.path().to_path_buf(),
-        workspace_dir.path().to_path_buf(),
-        2,
-        Some(config_path.to_str().unwrap()),
-    )
-    .expect("Failed to create config")
-    .with_max_turns(15)
-    .with_g3_binary(PathBuf::from("/custom/g3"));
-
-    assert_eq!(config.num_segments, 2);
-    assert_eq!(config.max_turns, 15);
-    assert_eq!(config.g3_binary, Some(PathBuf::from("/custom/g3")));
-}
-
-#[test]
-fn test_workspace_creation() {
-    let project_dir = create_test_project("workspace-test");
-    let workspace_dir = TempDir::new().unwrap();
-    let config_path = create_test_config(&workspace_dir);
-
-    let config = FlockConfig::new_with_config(
-        project_dir.path().to_path_buf(),
-        workspace_dir.path().to_path_buf(),
-        2,
-        Some(config_path.to_str().unwrap()),
-    )
-    .expect("Failed to create config");
-
-    // Create FlockMode instance
-    let _flock = FlockMode::new(config).expect("Failed to create FlockMode");
-
-    // Verify workspace directory structure will be created
-    // (We can't run the full flock without LLM access, but we can test the setup)
-    assert!(project_dir.path().join(".git").exists());
-    assert!(project_dir.path().join("flock-requirements.md").exists());
-}
-
-#[test]
-fn test_git_clone_functionality() {
-    let project_dir = create_test_project("clone-test");
-    let workspace_dir = TempDir::new().unwrap();
-
-    // Manually test git cloning (what flock mode does internally)
-    let segment_dir = workspace_dir.path().join("segment-1");
-
-    let output = Command::new("git")
-        .arg("clone")
-        .arg(project_dir.path())
-        .arg(&segment_dir)
-        .output()
-        .expect("Failed to run git clone");
-
-    assert!(output.status.success(), "git clone failed: {:?}", output);
-
-    // Verify the clone
-    assert!(segment_dir.exists());
-    assert!(segment_dir.join(".git").exists());
-    assert!(segment_dir.join("flock-requirements.md").exists());
-    assert!(segment_dir.join("README.md").exists());
-
-    // Verify it's a proper git repo
-    let output = Command::new("git")
-        .args(["log", "--oneline"])
-        .current_dir(&segment_dir)
-        .output()
-        .expect("Failed to run git log");
-
-    assert!(output.status.success());
-    let log = String::from_utf8_lossy(&output.stdout);
-    assert!(log.contains("Initial commit"));
-}
-
-#[test]
-fn test_multiple_segment_clones() {
-    let project_dir = create_test_project("multi-clone-test");
-    let workspace_dir = TempDir::new().unwrap();
-
-    // Clone multiple segments
-    for i in 1..=2 {
-        let segment_dir = workspace_dir.path().join(format!("segment-{}", i));
-
-        let output = Command::new("git")
-            .arg("clone")
-            .arg(project_dir.path())
-            .arg(&segment_dir)
-            .output()
-            .expect("Failed to run git clone");
-
-        assert!(output.status.success(), "git clone {} failed", i);
-        assert!(segment_dir.exists());
-        assert!(segment_dir.join(".git").exists());
-        assert!(segment_dir.join("flock-requirements.md").exists());
-    }
-
-    // Verify both segments exist and are independent
-    let segment1 = workspace_dir.path().join("segment-1");
-    let segment2 = workspace_dir.path().join("segment-2");
-
-    assert!(segment1.exists());
-    assert!(segment2.exists());
-
-    // Modify segment 1
-    fs::write(segment1.join("test.txt"), "segment 1").expect("Failed to write to segment 1");
-
-    // Verify segment 2 is unaffected
-    assert!(!segment2.join("test.txt").exists());
-}
-
-#[test]
-fn test_segment_requirements_creation() {
-    let project_dir = create_test_project("segment-req-test");
-    let workspace_dir = TempDir::new().unwrap();
-
-    // Clone a segment
-    let segment_dir = workspace_dir.path().join("segment-1");
-    Command::new("git")
-        .arg("clone")
-        .arg(project_dir.path())
-        .arg(&segment_dir)
-        .output()
-        .expect("Failed to clone");
-
-    // Create segment-requirements.md (what flock mode does)
-    let segment_requirements = "# Module A\n\nImplement module A functionality\n";
-    fs::write(
-        segment_dir.join("segment-requirements.md"),
-        segment_requirements,
-    )
-    .expect("Failed to write segment requirements");
-
-    // Verify it was created
-    assert!(segment_dir.join("segment-requirements.md").exists());
-    let content = fs::read_to_string(segment_dir.join("segment-requirements.md"))
-        .expect("Failed to read segment requirements");
-    assert!(content.contains("Module A"));
-}
-
-#[test]
-fn test_status_file_operations() {
-    use g3_ensembles::FlockStatus;
-
-    let temp_dir = TempDir::new().unwrap();
-    let status_file = temp_dir.path().join("flock-status.json");
-
-    // Create a status
-    let status = FlockStatus::new(
-        "test-session".to_string(),
-        PathBuf::from("/test/project"),
-        PathBuf::from("/test/workspace"),
-        2,
-    );
-
-    // Save to file
-    status
-        .save_to_file(&status_file)
-        .expect("Failed to save status");
-
-    // Verify file exists
-    assert!(status_file.exists());
-
-    // Load from file
-    let loaded = FlockStatus::load_from_file(&status_file).expect("Failed to load status");
-
-    assert_eq!(loaded.session_id, "test-session");
-    assert_eq!(loaded.num_segments, 2);
-}
-
-#[test]
-fn test_json_extraction() {
-    // Test the JSON extraction logic used in partition_requirements
-    let test_cases = vec![
-        (
-            "Here is the result: [{\"module_name\": \"test\"}]",
-            Some("[{\"module_name\": \"test\"}]"),
-        ),
-        (
-            "```json\n[{\"module_name\": \"test\"}]\n```",
-            Some("[{\"module_name\": \"test\"}]"),
-        ),
-        (
-            "Some text before\n[{\"a\": 1}, {\"b\": 2}]\nSome text after",
-            Some("[{\"a\": 1}, {\"b\": 2}]"),
-        ),
-        ("No JSON here", None),
-    ];
-
-    for (input, expected) in test_cases {
-        let result = extract_json_array(input);
-        match expected {
-            Some(exp) => {
-                assert!(result.is_some(), "Failed to extract from: {}", input);
-                assert_eq!(result.unwrap(), exp);
-            }
-            None => {
-                assert!(result.is_none(), "Should not extract from: {}", input);
-            }
-        }
-    }
-}
-
-// Helper function to extract JSON array (mimics the logic in flock.rs)
-fn extract_json_array(output: &str) -> Option<String> {
-    if let Some(start) = output.find('[') {
-        if let Some(end) = output.rfind(']') {
-            if end > start {
-                return Some(output[start..=end].to_string());
-            }
-        }
-    }
-    None
-}
-
-#[test]
-fn test_partition_json_parsing() {
-    // Test parsing of partition JSON
-    let json = r#"[
-        {
-            "module_name": "core-library",
-            "requirements": "Build the core library with basic functionality",
-            "dependencies": []
-        },
-        {
-            "module_name": "cli-tool",
-            "requirements": "Create a CLI tool that uses the core library",
-            "dependencies": ["core-library"]
-        }
-    ]"#;
-
-    let partitions: Vec<serde_json::Value> =
-        serde_json::from_str(json).expect("Failed to parse JSON");
-
-    assert_eq!(partitions.len(), 2);
-    assert_eq!(partitions[0]["module_name"], "core-library");
-    assert_eq!(partitions[1]["module_name"], "cli-tool");
-    assert_eq!(partitions[1]["dependencies"][0], "core-library");
-}
-
-#[test]
-fn test_requirements_file_content() {
-    let project_dir = create_test_project("content-test");
-
-    let requirements_path = project_dir.path().join("flock-requirements.md");
-    let content = fs::read_to_string(&requirements_path).expect("Failed to read requirements");
-
-    // Verify content structure
-    assert!(content.contains("# content-test Test Project"));
-    assert!(content.contains("## Module A"));
-    assert!(content.contains("## Module B"));
-    assert!(content.contains("Hello from Module A"));
-    assert!(content.contains("Hello from Module B"));
-}
-
-#[test]
-fn test_git_repo_independence() {
-    let project_dir = create_test_project("independence-test");
-    let workspace_dir = TempDir::new().unwrap();
-
-    // Clone two segments
-    let segment1 = workspace_dir.path().join("segment-1");
-    let segment2 = workspace_dir.path().join("segment-2");
-
-    Command::new("git")
-        .arg("clone")
-        .arg(project_dir.path())
-        .arg(&segment1)
-        .output()
-        .expect("Failed to clone segment 1");
-
-    Command::new("git")
-        .arg("clone")
-        .arg(project_dir.path())
-        .arg(&segment2)
-        .output()
-        .expect("Failed to clone segment 2");
-
-    // Make a commit in segment 1
-    fs::write(segment1.join("file1.txt"), "content 1").expect("Failed to write file1");
-
-    Command::new("git")
-        .args(["add", "file1.txt"])
-        .current_dir(&segment1)
-        .output()
-        .expect("Failed to git add");
-
-    Command::new("git")
-        .args(["commit", "-m", "Add file1"])
-        .current_dir(&segment1)
-        .output()
-        .expect("Failed to commit in segment 1");
-
-    // Make a different commit in segment 2
-    fs::write(segment2.join("file2.txt"), "content 2").expect("Failed to write file2");
-
-    Command::new("git")
-        .args(["add", "file2.txt"])
-        .current_dir(&segment2)
-        .output()
-        .expect("Failed to git add");
-
-    Command::new("git")
-        .args(["commit", "-m", "Add file2"])
-        .current_dir(&segment2)
-        .output()
-        .expect("Failed to commit in segment 2");
-
-    // Verify they have different commits
-    let log1 = Command::new("git")
-        .args(["log", "--oneline"])
-        .current_dir(&segment1)
-        .output()
-        .expect("Failed to get log 1");
-
-    let log2 = Command::new("git")
-        .args(["log", "--oneline"])
-        .current_dir(&segment2)
-        .output()
-        .expect("Failed to get log 2");
-
-    let log1_str = String::from_utf8_lossy(&log1.stdout);
-    let log2_str = String::from_utf8_lossy(&log2.stdout);
-
-    assert!(log1_str.contains("Add file1"));
-    assert!(!log1_str.contains("Add file2"));
-    assert!(log2_str.contains("Add file2"));
-    assert!(!log2_str.contains("Add file1"));
-
-    // Verify files exist only in their respective segments
-    assert!(segment1.join("file1.txt").exists());
-    assert!(!segment1.join("file2.txt").exists());
-    assert!(segment2.join("file2.txt").exists());
-    assert!(!segment2.join("file1.txt").exists());
-}
--- a/docs/FLOCK_MODE.md
+++ b/docs/FLOCK_MODE.md
@@ -1,397 +0,0 @@
-# g3 Flock Mode Guide
-
-**Last updated**: January 2025  
-**Source of truth**: `crates/g3-ensembles/src/flock.rs`
-
-## Purpose
-
-Flock mode enables parallel multi-agent development by spawning multiple g3 agent instances that work on different parts of a project simultaneously. This is useful for large projects with modular architectures where independent components can be developed in parallel.
-
-## Overview
-
-In Flock mode:
- Multiple agent instances run concurrently
- Each agent works on a specific module or component
- Agents operate independently but share the same codebase
- Progress is tracked and coordinated centrally
-
-```
-┌─────────────────────────────────────────────────────────┐
-│                    Flock Coordinator                     │
-│                                                         │
-│  ┌─────────┐  ┌─────────┐  ┌─────────┐  ┌─────────┐   │
-│  │ Agent 1 │  │ Agent 2 │  │ Agent 3 │  │ Agent N │   │
-│  │ Module A│  │ Module B│  │ Module C│  │ Module N│   │
-│  └─────────┘  └─────────┘  └─────────┘  └─────────┘   │
-│       │            │            │            │         │
-│       ▼            ▼            ▼            ▼         │
-│  ┌─────────────────────────────────────────────────┐   │
-│  │              Shared Codebase                     │   │
-│  └─────────────────────────────────────────────────┘   │
-└─────────────────────────────────────────────────────────┘
-```
-
-## When to Use Flock Mode
-
-**Good candidates**:
- Microservices architectures
- Projects with independent modules
- Large refactoring across multiple files
- Parallel feature development
- Test suite expansion
-
-**Not recommended for**:
- Tightly coupled code
- Sequential dependencies
- Small projects
- Single-file changes
-
-## Configuration
-
-Flock mode is configured through a YAML manifest file:
-
-```yaml
-# flock.yaml
-name: "my-project-flock"
-description: "Parallel development of project modules"
-
-# Global settings
-settings:
-  max_agents: 4
-  timeout_minutes: 60
-  provider: "anthropic.default"
-
-# Agent definitions
-agents:
-  - name: "api-agent"
-    description: "Develops the REST API layer"
-    working_dir: "src/api"
-    requirements: |
-      Implement REST endpoints for user management:
-      - GET /users
-      - POST /users
-      - GET /users/{id}
-      - PUT /users/{id}
-      - DELETE /users/{id}
-
-  - name: "db-agent"
-    description: "Develops the database layer"
-    working_dir: "src/db"
-    requirements: |
-      Implement database models and queries:
-      - User model with CRUD operations
-      - Connection pooling
-      - Migration support
-
-  - name: "test-agent"
-    description: "Writes integration tests"
-    working_dir: "tests"
-    requirements: |
-      Write integration tests for:
-      - API endpoints
-      - Database operations
-      - Error handling
-```
-
-## Usage
-
-### Starting a Flock
-
-```bash
-# Start flock with manifest
-g3 --flock flock.yaml
-
-# Start with specific agents only
-g3 --flock flock.yaml --agents api-agent,db-agent
-
-# Start with custom timeout
-g3 --flock flock.yaml --timeout 120
-```
-
-### Monitoring Progress
-
-Flock mode provides real-time status updates:
-
-```
-🐦 Flock Status: my-project-flock
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-
-  api-agent     [████████░░] 80%  Implementing DELETE endpoint
-  db-agent      [██████████] 100% ✅ Complete
-  test-agent    [██████░░░░] 60%  Writing error handling tests
-
-Elapsed: 15m 32s | Tokens: 45,230 | Errors: 0
-```
-
-### Stopping a Flock
-
-```bash
-# Graceful stop (wait for current tasks)
-Ctrl+C
-
-# Force stop all agents
-Ctrl+C Ctrl+C
-```
-
-## Agent Communication
-
-Agents in a flock operate independently but can:
-
-1. **Read shared files**: All agents can read the entire codebase
-2. **Write to their area**: Each agent writes to its designated working directory
-3. **Signal completion**: Agents report when their tasks are done
-4. **Report errors**: Failures are logged and can trigger coordinator action
-
-### Conflict Prevention
-
-To prevent conflicts:
- Assign non-overlapping working directories
- Use clear module boundaries
- Define explicit interfaces between modules
- Run integration after all agents complete
-
-## Status Tracking
-
-Flock status is tracked in `.g3/flock/`:
-
-```
-.g3/flock/
-├── status.json           # Overall flock status
-├── api-agent/
-│   ├── session.json      # Agent session log
-│   └── todo.g3.md        # Agent's TODO list
-├── db-agent/
-│   ├── session.json
-│   └── todo.g3.md
-└── test-agent/
-    ├── session.json
-    └── todo.g3.md
-```
-
-### Status File Format
-
-```json
-{
-  "flock_name": "my-project-flock",
-  "started_at": "2025-01-03T10:00:00Z",
-  "status": "running",
-  "agents": [
-    {
-      "name": "api-agent",
-      "status": "running",
-      "progress": 80,
-      "current_task": "Implementing DELETE endpoint",
-      "tokens_used": 15000,
-      "errors": 0
-    }
-  ]
-}
-```
-
-## Best Practices
-
-### 1. Define Clear Boundaries
-
-```yaml
-# Good: Clear module separation
-agents:
-  - name: "frontend"
-    working_dir: "src/frontend"
-  - name: "backend"
-    working_dir: "src/backend"
-
-# Bad: Overlapping directories
-agents:
-  - name: "agent1"
-    working_dir: "src"
-  - name: "agent2"
-    working_dir: "src/utils"  # Overlaps with agent1!
-```
-
-### 2. Specify Interfaces First
-
-Define shared interfaces before parallel development:
-
-```yaml
-agents:
-  - name: "interface-agent"
-    priority: 1  # Runs first
-    requirements: |
-      Define shared interfaces in src/interfaces/:
-      - UserService trait
-      - DatabaseConnection trait
-      - Error types
-
-  - name: "impl-agent"
-    priority: 2  # Runs after interfaces
-    depends_on: ["interface-agent"]
-    requirements: |
-      Implement UserService trait...
-```
-
-### 3. Use Appropriate Granularity
-
- **Too few agents**: Doesn't leverage parallelism
- **Too many agents**: Coordination overhead, potential conflicts
- **Sweet spot**: 2-6 agents for most projects
-
-### 4. Include a Test Agent
-
-Always include an agent for testing:
-
-```yaml
-agents:
-  - name: "test-agent"
-    working_dir: "tests"
-    requirements: |
-      Write tests for all new functionality.
-      Run tests after other agents complete.
-```
-
-### 5. Plan for Integration
-
-After flock completion:
-
-```bash
-# Run all tests
-cargo test
-
-# Check for conflicts
-git status
-
-# Review changes
-git diff
-```
-
-## Error Handling
-
-### Agent Failures
-
-If an agent fails:
-1. Error is logged to agent's session
-2. Coordinator is notified
-3. Other agents continue (by default)
-4. Failed agent can be restarted
-
-### Restart Failed Agent
-
-```bash
-# Restart specific agent
-g3 --flock flock.yaml --restart api-agent
-
-# Restart all failed agents
-g3 --flock flock.yaml --restart-failed
-```
-
-### Conflict Resolution
-
-If agents modify the same file:
-1. Last write wins (by default)
-2. Conflicts are logged
-3. Manual resolution may be needed
-
-## Resource Management
-
-### Token Usage
-
-Each agent has its own token budget:
-
-```yaml
-settings:
-  max_tokens_per_agent: 100000
-  total_token_budget: 500000
-```
-
-### Concurrency
-
-Limit concurrent agents based on:
- API rate limits
- System resources
- Provider capacity
-
-```yaml
-settings:
-  max_concurrent_agents: 3  # Run at most 3 at once
-```
-
-## Example: Microservices Project
-
-```yaml
-name: "microservices-flock"
-
-settings:
-  max_agents: 5
-  provider: "anthropic.default"
-
-agents:
-  - name: "user-service"
-    working_dir: "services/user"
-    requirements: |
-      Implement user service:
-      - User registration
-      - Authentication
-      - Profile management
-
-  - name: "order-service"
-    working_dir: "services/order"
-    requirements: |
-      Implement order service:
-      - Order creation
-      - Order status tracking
-      - Payment integration
-
-  - name: "inventory-service"
-    working_dir: "services/inventory"
-    requirements: |
-      Implement inventory service:
-      - Stock management
-      - Availability checking
-      - Reorder alerts
-
-  - name: "gateway"
-    working_dir: "services/gateway"
-    requirements: |
-      Implement API gateway:
-      - Request routing
-      - Authentication middleware
-      - Rate limiting
-
-  - name: "integration-tests"
-    working_dir: "tests/integration"
-    depends_on: ["user-service", "order-service", "inventory-service", "gateway"]
-    requirements: |
-      Write integration tests for:
-      - End-to-end order flow
-      - Service communication
-      - Error scenarios
-```
-
-## Limitations
-
- **No real-time coordination**: Agents don't communicate during execution
- **File conflicts**: Possible if boundaries aren't clear
- **Resource intensive**: Multiple LLM calls in parallel
- **Debugging complexity**: Multiple logs to review
-
-## Troubleshooting
-
-### Agents Not Starting
-
-1. Check manifest syntax (YAML)
-2. Verify working directories exist
-3. Check provider configuration
-4. Review logs in `.g3/flock/`
-
-### Slow Progress
-
-1. Reduce number of concurrent agents
-2. Check for rate limiting
-3. Simplify requirements
-4. Use faster provider
-
-### Inconsistent Results
-
-1. Define clearer interfaces
-2. Add more specific requirements
-3. Use lower temperature
-4. Add validation steps
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -48,12 +48,11 @@ g3 follows a **tool-first philosophy**: instead of just providing advice, it act
                                 │
         ┌───────────────────────┼───────────────────────┐
         │                       │                       │
-┌─────────────────┐    ┌─────────────────┐
-│ g3-ensembles    │    │     studio      │
-│ • Flock mode    │    │                 │
-│ • Multi-agent   │    │ • Worktree mgmt │
-│ • Parallel dev  │    │ • Session mgmt  │
-└─────────────────┘    └─────────────────┘
+         ┌─────────────────┐
+         │     studio      │
+         │ • Worktree mgmt │
+         │ • Session mgmt  │
+         └─────────────────┘
 ```

 ## Workspace Structure
@@ -71,7 +70,6 @@ g3/
 │   ├── g3-execution/             # Code execution engine
 │   ├── g3-computer-control/      # Computer automation
 │   ├── g3-planner/               # Planning mode workflow
-│   ├── g3-ensembles/             # Multi-agent (flock) mode
 │   └── studio/                   # Multi-agent workspace manager
 ├── agents/                       # Agent persona definitions
 ├── logs/                         # Session logs (auto-created)
@@ -220,17 +218,6 @@ Key modules:
 5. Files archived with timestamps
 6. Git commit with LLM-generated message

-### g3-ensembles (Multi-Agent)
-
-**Location**: `crates/g3-ensembles/`  
-**Purpose**: Parallel multi-agent development (Flock mode)
-
-Key modules:
- `flock.rs` - Flock orchestration (~43k chars)
- `status.rs` - Agent status tracking
-
-Flock mode enables parallel development by spawning multiple agent instances working on different parts of a project.
-
 ### studio (Multi-Agent Workspace Manager)

 **Location**: `crates/studio/`