use proper context for qwen3-coder

works better without streaming
gpt context aware
2025-11-05 13:55:45 +11:00 · 2025-11-05 12:55:21 +11:00 · 2025-11-05 12:25:02 +11:00 · 2025-11-05 12:17:01 +11:00 · 2025-11-04 16:28:41 +11:00 · 2025-11-04 14:35:11 +11:00
20 changed files with 4115 additions and 999 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 debug
 target
 .build
 appy/
 # These are backup files generated by rustfmt
 **/*.rs.bk
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -990,7 +990,7 @@ dependencies = [
 "libc",
 "option-ext",
 "redox_users 0.5.2",
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
 ]
 [[package]]
@@ -1062,7 +1062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
 "libc",
- "windows-sys 0.52.0",
+ "windows-sys 0.61.2",
 ]
 [[package]]
@@ -1391,6 +1391,7 @@ dependencies = [
 "reqwest",
 "serde",
 "serde_json",
 "serde_yaml",
 "shellexpand",
 "thiserror 1.0.69",
 "tokio",
@@ -2333,7 +2334,7 @@ version = "0.50.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
 ]
 [[package]]
@@ -2904,7 +2905,7 @@ dependencies = [
 "errno",
 "libc",
 "linux-raw-sys 0.11.0",
- "windows-sys 0.52.0",
+ "windows-sys 0.61.2",
 ]
 [[package]]
@@ -3078,6 +3079,19 @@ dependencies = [
 "serde",
 ]
 [[package]]
 name = "serde_yaml"
 version = "0.9.34+deprecated"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47"
 dependencies = [
 "indexmap",
 "itoa",
 "ryu",
 "serde",
 "unsafe-libyaml",
 ]
 [[package]]
 name = "sha2"
 version = "0.10.9"
@@ -3292,7 +3306,7 @@ dependencies = [
 "getrandom 0.3.4",
 "once_cell",
 "rustix 1.1.2",
- "windows-sys 0.52.0",
+ "windows-sys 0.61.2",
 ]
 [[package]]
@@ -3667,6 +3681,12 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
 [[package]]
 name = "unsafe-libyaml"
 version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861"
 [[package]]
 name = "url"
 version = "2.5.7"
@@ -3935,7 +3955,7 @@ version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys 0.48.0",
+ "windows-sys 0.61.2",
 ]
 [[package]]
--- a/OLLAMA_CONFIG.md
+++ b/OLLAMA_CONFIG.md
@@ -0,0 +1,456 @@
 # Configuring Ollama Provider in G3
 This guide shows you how to configure G3 to use Ollama as your LLM provider.
 ## Quick Start
 ### 1. Install Ollama
 ```bash
 # Visit https://ollama.ai to download and install
 # Or use curl:
 curl https://ollama.ai/install.sh | sh
 ```
 ### 2. Pull a Model
 ```bash
 ollama pull llama3.2
 # or any other model you prefer
 ```
 ### 3. Create Configuration File
 Copy the example configuration:
 ```bash
 cp config.ollama.example.toml ~/.config/g3/config.toml
 ```
 Or create it manually:
 ```toml
 [providers]
 default_provider = "ollama"
 [providers.ollama]
 model = "llama3.2"
 ```
 ### 4. Run G3
 ```bash
 g3
 # G3 will now use Ollama with llama3.2!
 ```
 ## Configuration Options
 ### Basic Configuration
 ```toml
 [providers]
 default_provider = "ollama"
 [providers.ollama]
 model = "llama3.2"
 ```
 This is the minimal configuration needed. It uses all defaults:
 - Base URL: `http://localhost:11434`
 - Temperature: `0.7`
 - Max tokens: Not limited (uses model default)
 ### Full Configuration
 ```toml
 [providers]
 default_provider = "ollama"
 [providers.ollama]
 model = "llama3.2"
 base_url = "http://localhost:11434"
 max_tokens = 2048
 temperature = 0.7
 ```
 ### Custom Ollama Host
 If you're running Ollama on a different machine or port:
 ```toml
 [providers.ollama]
 model = "llama3.2"
 base_url = "http://192.168.1.100:11434"
 ```
 ### Different Models
 You can use any Ollama model:
 ```toml
 [providers.ollama]
 model = "qwen2.5:7b"  # Alibaba's Qwen model
 ```
 ```toml
 [providers.ollama]
 model = "mistral"  # Mistral AI
 ```
 ```toml
 [providers.ollama]
 model = "llama3.1:70b"  # Larger Llama model
 ```
 ## Multiple Provider Configuration
 You can configure multiple providers and switch between them:
 ```toml
 [providers]
 default_provider = "ollama"  # Default for most operations
 # Ollama for local, fast responses
 [providers.ollama]
 model = "llama3.2:3b"
 temperature = 0.7
 # Databricks for more complex tasks
 [providers.databricks]
 host = "https://your-workspace.cloud.databricks.com"
 model = "databricks-claude-sonnet-4"
 max_tokens = 4096
 temperature = 0.1
 use_oauth = true
 ```
 Then switch providers with:
 ```bash
 g3 --provider databricks
 ```
 ## Autonomous Mode (Coach-Player)
 Use different providers for code review (coach) and implementation (player):
 ```toml
 [providers]
 default_provider = "ollama"
 coach = "databricks"  # Use powerful cloud model for review
 player = "ollama"     # Use local model for implementation
 [providers.ollama]
 model = "qwen2.5:14b"  # Larger local model for coding
 [providers.databricks]
 host = "https://your-workspace.cloud.databricks.com"
 model = "databricks-claude-sonnet-4"
 use_oauth = true
 ```
 This gives you the best of both worlds:
 - Fast local execution for coding tasks
 - Powerful cloud review for quality assurance
 ## Recommended Models
 ### For Coding Tasks
 | Model | Size | Speed | Quality | Notes |
 |-------|------|-------|---------|-------|
 | **qwen2.5:7b** | 7B | Fast | Excellent | Best balance for coding |
 | **llama3.2:3b** | 3B | Very Fast | Good | Great for quick tasks |
 | **llama3.1:8b** | 8B | Medium | Very Good | Solid all-rounder |
 | **mistral** | 7B | Fast | Good | Good for general use |
 ### For Complex Tasks
 | Model | Size | Speed | Quality | Notes |
 |-------|------|-------|---------|-------|
 | **qwen2.5:14b** | 14B | Medium | Excellent | Best local model for coding |
 | **qwen2.5:32b** | 32B | Slow | Outstanding | If you have the resources |
 | **llama3.1:70b** | 70B | Very Slow | Outstanding | Requires significant RAM/GPU |
 ## Temperature Settings
 Temperature controls randomness in responses:
 - **0.1-0.3**: Deterministic, good for code generation
 - **0.5-0.7**: Balanced, good for most tasks
 - **0.8-1.0**: Creative, good for brainstorming
 ```toml
 [providers.ollama]
 model = "qwen2.5:7b"
 temperature = 0.2  # Focused code generation
 ```
 ## Max Tokens
 Control response length:
 ```toml
 [providers.ollama]
 model = "llama3.2"
 max_tokens = 1024  # Shorter responses
 ```
 ```toml
 [providers.ollama]
 model = "qwen2.5:7b"
 max_tokens = 4096  # Longer, detailed responses
 ```
 Leave it unset for model defaults (recommended).
 ## Performance Tuning
 ### GPU Acceleration
 Ollama automatically uses GPU if available. To check:
 ```bash
 ollama ps
 ```
 ### Quantized Models
 For faster responses with less RAM:
 ```toml
 [providers.ollama]
 model = "llama3.2:3b-q4_0"  # 4-bit quantization
 ```
 Quantization options:
 - `q4_0`: 4-bit, fastest, lowest quality
 - `q5_0`: 5-bit, balanced
 - `q8_0`: 8-bit, slower, better quality
 ### Multiple Models
 You can pull multiple models and switch easily:
 ```bash
 ollama pull llama3.2:3b    # Fast for chat
 ollama pull qwen2.5:7b     # Better for code
 ollama pull mistral        # General purpose
 ```
 Then change your config:
 ```toml
 [providers.ollama]
 model = "qwen2.5:7b"  # Just change this line
 ```
 ## Troubleshooting
 ### Ollama Not Running
 ```bash
 # Check if Ollama is running
 curl http://localhost:11434/api/version
 # Start Ollama (macOS/Linux)
 ollama serve
 # Or just run a model (auto-starts)
 ollama run llama3.2
 ```
 ### Model Not Found
 ```bash
 # List available models
 ollama list
 # Pull the model
 ollama pull llama3.2
 ```
 ### Slow Responses
 1. Use a smaller model:
   ```toml
   model = "llama3.2:1b"  # Smallest, fastest
   ```
 2. Use quantized version:
   ```toml
   model = "llama3.2:3b-q4_0"
   ```
 3. Reduce max_tokens:
   ```toml
   max_tokens = 512
   ```
 ### Out of Memory
 1. Switch to smaller model
 2. Use quantized version
 3. Close other applications
 4. Check GPU memory: `ollama ps`
 ### Connection Refused
 Check base_url is correct:
 ```toml
 [providers.ollama]
 model = "llama3.2"
 base_url = "http://localhost:11434"  # Default
 ```
 For remote Ollama:
 ```toml
 base_url = "http://your-server:11434"
 ```
 ## Complete Example Configs
 ### Minimal Local Setup
 ```toml
 [providers]
 default_provider = "ollama"
 [providers.ollama]
 model = "llama3.2"
 [agent]
 max_context_length = 8192
 enable_streaming = true
 timeout_seconds = 60
 ```
 ### Optimized for Coding
 ```toml
 [providers]
 default_provider = "ollama"
 [providers.ollama]
 model = "qwen2.5:7b"
 temperature = 0.2
 max_tokens = 2048
 [agent]
 max_context_length = 16384
 enable_streaming = true
 timeout_seconds = 120
 ```
 ### Fast Responses
 ```toml
 [providers]
 default_provider = "ollama"
 [providers.ollama]
 model = "llama3.2:3b-q4_0"
 temperature = 0.7
 max_tokens = 1024
 [agent]
 max_context_length = 4096
 enable_streaming = true
 timeout_seconds = 30
 ```
 ### High Quality (Requires Good Hardware)
 ```toml
 [providers]
 default_provider = "ollama"
 [providers.ollama]
 model = "qwen2.5:32b"
 temperature = 0.3
 max_tokens = 4096
 [agent]
 max_context_length = 32768
 enable_streaming = true
 timeout_seconds = 300
 ```
 ### Hybrid (Local + Cloud)
 ```toml
 [providers]
 default_provider = "ollama"
 coach = "databricks"
 player = "ollama"
 [providers.ollama]
 model = "qwen2.5:14b"
 temperature = 0.2
 [providers.databricks]
 host = "https://your-workspace.cloud.databricks.com"
 model = "databricks-claude-sonnet-4"
 use_oauth = true
 [agent]
 max_context_length = 16384
 enable_streaming = true
 timeout_seconds = 120
 ```
 ## Environment Variables
 You can override config with environment variables:
 ```bash
 # Override model
 G3_PROVIDERS_OLLAMA_MODEL=qwen2.5:7b g3
 # Override base URL
 G3_PROVIDERS_OLLAMA_BASE_URL=http://192.168.1.100:11434 g3
 # Override default provider
 G3_PROVIDERS_DEFAULT_PROVIDER=ollama g3
 ```
 ## Best Practices
 1. **Start Small**: Begin with llama3.2:3b, scale up if needed
 2. **Use Quantization**: q4_0 or q5_0 for best speed/quality balance
 3. **Match Task to Model**: 
   - Quick edits: 1B-3B models
   - Code generation: 7B-14B models
   - Complex refactoring: 14B-32B models
 4. **Temperature for Code**: Use 0.1-0.3 for deterministic output
 5. **Enable Streaming**: Always enable for better UX
 6. **Local First**: Use Ollama by default, cloud for special cases
 ## Comparison with Other Providers
 | Feature | Ollama | Databricks | OpenAI | Anthropic |
 |---------|--------|------------|--------|-----------|
 | Cost | Free | Paid | Paid | Paid |
 | Privacy | Full | Medium | Low | Low |
 | Speed (small models) | Fast | Fast | Medium | Medium |
 | Speed (large models) | Slow | Fast | Fast | Fast |
 | Setup Complexity | Low | Medium | Low | Low |
 | Authentication | None | OAuth/Token | API Key | API Key |
 | Offline Support | Yes | No | No | No |
 | Tool Calling | Yes | Yes | Yes | Yes |
 ## Next Steps
 1. Try different models: `ollama pull mistral`, `ollama pull qwen2.5`
 2. Experiment with temperature settings
 3. Set up hybrid config with cloud provider for complex tasks
 4. Share your config in the community!
 ## Getting Help
 - Ollama docs: https://ollama.ai/docs
 - G3 issues: https://github.com/your-repo/issues
 - Test your config: `g3 --help`
--- a/OLLAMA_EXAMPLE.md
+++ b/OLLAMA_EXAMPLE.md
@@ -0,0 +1,315 @@
 # Ollama Provider for g3
 A simple, local LLM provider implementation for g3 that connects to Ollama.
 ## Features
 - ✅ **Simple Setup**: No API keys or authentication required
 - ✅ **Local Execution**: Runs entirely on your machine
 - ✅ **Tool Calling Support**: Native tool calling for compatible models
 - ✅ **Streaming**: Full streaming support with real-time responses
 - ✅ **Flexible Configuration**: Custom base URL, temperature, and max tokens
 - ✅ **Model Discovery**: Automatic detection of available models
 ## Quick Start
 ### Prerequisites
 1. Install and start Ollama: https://ollama.ai
 2. Pull a model: `ollama pull llama3.2`
 ### Basic Usage
 ```rust
 use g3_providers::{OllamaProvider, LLMProvider, CompletionRequest, Message, MessageRole};
 #[tokio::main]
 async fn main() -> anyhow::Result<()> {
    // Create provider with default settings (localhost:11434)
    let provider = OllamaProvider::new(
        "llama3.2".to_string(),
        None,  // base_url: defaults to http://localhost:11434
        None,  // max_tokens: optional
        None,  // temperature: defaults to 0.7
    )?;
    // Create a simple request
    let request = CompletionRequest {
        messages: vec![
            Message {
                role: MessageRole::User,
                content: "What is the capital of France?".to_string(),
            },
        ],
        max_tokens: Some(1000),
        temperature: Some(0.7),
        stream: false,
        tools: None,
    };
    // Get completion
    let response = provider.complete(request).await?;
    println!("Response: {}", response.content);
    println!("Tokens: {}", response.usage.total_tokens);
    Ok(())
 }
 ```
 ### Streaming Example
 ```rust
 use futures_util::StreamExt;
 let request = CompletionRequest {
    messages: vec![
        Message {
            role: MessageRole::User,
            content: "Write a short poem about coding".to_string(),
        },
    ],
    max_tokens: Some(500),
    temperature: Some(0.8),
    stream: true,
    tools: None,
 };
 let mut stream = provider.stream(request).await?;
 while let Some(chunk_result) = stream.next().await {
    match chunk_result {
        Ok(chunk) => {
            print!("{}", chunk.content);
            if chunk.finished {
                println!("\n\nDone!");
                if let Some(usage) = chunk.usage {
                    println!("Total tokens: {}", usage.total_tokens);
                }
            }
        }
        Err(e) => eprintln!("Error: {}", e),
    }
 }
 ```
 ### Tool Calling Example
 ```rust
 use serde_json::json;
 let tools = vec![Tool {
    name: "get_weather".to_string(),
    description: "Get current weather for a location".to_string(),
    input_schema: json!({
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "City name"
            },
            "unit": {
                "type": "string",
                "enum": ["celsius", "fahrenheit"],
                "description": "Temperature unit"
            }
        },
        "required": ["location"]
    }),
 }];
 let request = CompletionRequest {
    messages: vec![
        Message {
            role: MessageRole::User,
            content: "What's the weather in Paris?".to_string(),
        },
    ],
    max_tokens: Some(500),
    temperature: Some(0.5),
    stream: false,
    tools: Some(tools),
 };
 let response = provider.complete(request).await?;
 println!("Response: {}", response.content);
 ```
 ### Custom Ollama Host
 ```rust
 // Connect to remote Ollama instance
 let provider = OllamaProvider::new(
    "llama3.2".to_string(),
    Some("http://192.168.1.100:11434".to_string()),
    None,
    None,
 )?;
 ```
 ### Fetch Available Models
 ```rust
 // Discover what models are available
 let models = provider.fetch_available_models().await?;
 println!("Available models:");
 for model in models {
    println!("  - {}", model);
 }
 ```
 ## Supported Models
 The provider works with any Ollama model, including:
 - **llama3.2** (1B, 3B) - Meta's latest Llama models
 - **llama3.1** (8B, 70B, 405B) - Previous generation
 - **qwen2.5** (7B, 14B, 32B) - Alibaba's Qwen models  
 - **mistral** - Mistral AI models
 - **mixtral** - Mixture of experts model
 - **phi3** - Microsoft's Phi-3
 - **gemma2** - Google's Gemma 2
 ## Configuration
 ### Constructor Parameters
 ```rust
 OllamaProvider::new(
    model: String,           // Model name (e.g., "llama3.2")
    base_url: Option<String>, // Ollama API URL (default: http://localhost:11434)
    max_tokens: Option<u32>,  // Maximum tokens to generate (optional)
    temperature: Option<f32>, // Sampling temperature (default: 0.7)
 )
 ```
 ### Request Options
 ```rust
 CompletionRequest {
    messages: Vec<Message>,      // Conversation history
    max_tokens: Option<u32>,     // Override provider's max_tokens
    temperature: Option<f32>,    // Override provider's temperature
    stream: bool,                // Enable streaming responses
    tools: Option<Vec<Tool>>,    // Tools for function calling
 }
 ```
 ## Comparison with Other Providers
 | Feature | Ollama | OpenAI | Anthropic | Databricks |
 |---------|--------|--------|-----------|------------|
 | Local Execution | ✅ | ❌ | ❌ | ❌ |
 | Authentication | None | API Key | API Key | OAuth/Token |
 | Tool Calling | ✅ | ✅ | ✅ | ✅ |
 | Streaming | ✅ | ✅ | ✅ | ✅ |
 | Cost | Free | Paid | Paid | Paid |
 | Privacy | High | Low | Low | Medium |
 ## Implementation Details
 ### API Endpoints
 - **Chat Completion**: `POST /api/chat`
 - **Model List**: `GET /api/tags`
 ### Response Format
 Ollama uses a simple JSON-per-line streaming format:
 ```json
 {"message":{"role":"assistant","content":"Hello"},"done":false}
 {"message":{"role":"assistant","content":" there"},"done":false}
 {"done":true,"prompt_eval_count":10,"eval_count":20}
 ```
 ### Tool Call Format
 Tool calls are returned in the message structure:
 ```json
 {
  "message": {
    "role": "assistant",
    "content": "",
    "tool_calls": [
      {
        "function": {
          "name": "get_weather",
          "arguments": {"location": "Paris", "unit": "celsius"}
        }
      }
    ]
  },
  "done": true
 }
 ```
 ## Troubleshooting
 ### Connection Errors
 If you see connection errors, ensure Ollama is running:
 ```bash
 # Check if Ollama is running
 curl http://localhost:11434/api/version
 # Start Ollama (if needed)
 ollama serve
 ```
 ### Model Not Found
 Pull the model first:
 ```bash
 ollama pull llama3.2
 ollama list  # Check available models
 ```
 ### Performance Issues
 - Use smaller models (1B, 3B) for faster responses
 - Reduce `max_tokens` to limit generation length
 - Enable GPU acceleration if available
 - Consider quantized models (e.g., `llama3.2:3b-q4_0`)
 ## Testing
 Run the included tests:
 ```bash
 cargo test --package g3-providers ollama
 ```
 All tests should pass:
 ```
 running 4 tests
 test ollama::tests::test_custom_base_url ... ok
 test ollama::tests::test_message_conversion ... ok
 test ollama::tests::test_provider_creation ... ok
 test ollama::tests::test_tool_conversion ... ok
 ```
 ## Architecture
 The provider follows the same architecture as other g3 providers:
 1. **OllamaProvider**: Main struct implementing `LLMProvider` trait
 2. **Request/Response Structures**: Internal types for Ollama API
 3. **Streaming Parser**: Handles line-by-line JSON parsing
 4. **Tool Call Handling**: Accumulates and converts tool calls
 5. **Error Handling**: Robust error handling with retries
 ## Contributing
 The provider is part of the g3-providers crate. To contribute:
 1. Add features to `ollama.rs`
 2. Update tests
 3. Run `cargo test --package g3-providers`
 4. Update this documentation
 ## License
 Same as the g3 project.
--- a/README.md
+++ b/README.md
@@ -14,7 +14,6 @@ The heart of the agent system, containing:
 - **Context Window Management**: Intelligent tracking of token usage with context thinning (50-80%) and auto-summarization at 80% capacity
 - **Tool System**: Built-in tools for file operations, shell commands, computer control, TODO management, and structured output
 - **Streaming Response Parser**: Real-time parsing of LLM responses with tool call detection and execution
 - **Smart Project Awareness**: Automatically detects and respects `.gitignore` patterns, informing the agent about ignored files
 - **Task Execution**: Support for single and iterative task execution with automatic retry logic
 #### **g3-providers**
@@ -98,10 +97,7 @@ These commands give you fine-grained control over context management, allowing y
 - **Final Output**: Formatted result presentation
 ### Provider Flexibility
-
+- Support for multiple LLM providers through a unified interface
 ### Smart Project Awareness
 - Automatically detects and respects `.gitignore` when present
 - Hot-swappable providers without code changes
 - Provider-specific optimizations and feature support
 - Local model support for offline operation
@@ -136,6 +132,40 @@ G3 is designed for:
 ## Getting Started
 ### Default Mode: Accumulative Autonomous
 The default interactive mode now uses **accumulative autonomous mode**, which combines the best of interactive and autonomous workflows:
 ```bash
 # Simply run g3 in any directory
 g3
 # You'll be prompted to describe what you want to build
 # Each input you provide:
 # 1. Gets added to accumulated requirements
 # 2. Automatically triggers autonomous mode (coach-player loop)
 # 3. Implements your requirements iteratively
 # Example session:
 requirement> create a simple web server in Python with Flask
 # ... autonomous mode runs and implements it ...
 requirement> add a /health endpoint that returns JSON
 # ... autonomous mode runs again with both requirements ...
 ```
 ### Other Modes
 ```bash
 # Single-shot mode (one task, then exit)
 g3 "implement a function to calculate fibonacci numbers"
 # Traditional autonomous mode (reads requirements.md)
 g3 --autonomous
 # Traditional chat mode (simple interactive chat without autonomous runs)
 g3 --chat
 ```
 ```bash
 # Build the project
 cargo build --release
--- a/config.ollama.example.toml
+++ b/config.ollama.example.toml
@@ -0,0 +1,26 @@
 # Example G3 configuration using Ollama provider
 # Copy this to ~/.config/g3/config.toml or ./g3.toml to use it
 [providers]
 default_provider = "ollama"
 # Ollama configuration (local LLM)
 [providers.ollama]
 model = "llama3.2"  # or qwen2.5, mistral, etc.
 # base_url = "http://localhost:11434"  # Optional, defaults to localhost
 # max_tokens = 2048  # Optional
 # temperature = 0.7  # Optional
 # Optional: Specify different providers for coach and player in autonomous mode
 # coach = "ollama"    # Provider for coach (code reviewer)
 # player = "ollama"   # Provider for player (code implementer)
 [agent]
 max_context_length = 8192
 enable_streaming = true
 timeout_seconds = 60
 [computer_control]
 enabled = false  # Set to true to enable computer control (requires OS permissions)
 require_confirmation = true
 max_actions_per_second = 5
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -1,4 +1,5 @@
 use anyhow::Result;
 use crossterm::style::{Color, SetForegroundColor, ResetColor};
 use std::time::{Duration, Instant};
 #[derive(Debug, Clone)]
@@ -174,7 +175,7 @@ mod machine_ui_writer;
 use machine_ui_writer::MachineUiWriter;
 use ui_writer_impl::ConsoleUiWriter;
-#[derive(Parser)]
+#[derive(Parser, Clone)]
 #[command(name = "g3")]
 #[command(about = "A modular, composable AI coding agent")]
 #[command(version)]
@@ -183,6 +184,10 @@ pub struct Cli {
    #[arg(short, long)]
    pub verbose: bool,
    /// Enable manual control of context compaction (disables auto-compact at 90%)
    #[arg(long = "manual-compact")]
    pub manual_compact: bool,
    /// Show the system prompt being sent to the LLM
    #[arg(long)]
    pub show_prompt: bool,
@@ -214,9 +219,9 @@ pub struct Cli {
    #[arg(long, value_name = "TEXT")]
    pub requirements: Option<String>,
-    /// Interactive mode: prompt for requirements and save to requirements.md before starting autonomous mode
+    /// Enable accumulative autonomous mode (default is chat mode)
    #[arg(long)]
-    pub interactive_requirements: bool,
+    pub auto: bool,
    /// Enable machine-friendly output mode with JSON markers and stats
    #[arg(long)]
@@ -285,10 +290,6 @@ pub async fn run() -> Result<()> {
        tracing_subscriber::registry().with(filter).init();
    }
    if !cli.machine {
        info!("Starting G3 AI Coding Agent");
    }
    // Set up workspace directory
    let workspace_dir = if let Some(ws) = &cli.workspace {
        ws.clone()
@@ -309,112 +310,6 @@ pub async fn run() -> Result<()> {
    // Create project model
    let project = if cli.autonomous {
        // Handle interactive requirements mode with AI enhancement
        if cli.interactive_requirements {
            println!("\n📝 Interactive Requirements Mode");
            println!("================================\n");
            println!("Describe what you want to build (can be brief):");
            println!("Press Ctrl+D (Unix) or Ctrl+Z (Windows) when done.\n");
            use std::io::{self, Read, Write};
            let mut requirements_input = String::new();
            io::stdin().read_to_string(&mut requirements_input)?;
            if requirements_input.trim().is_empty() {
                anyhow::bail!("No requirements provided. Exiting.");
            }
            println!("\n🤖 Enhancing your requirements with AI...\n");
            // Create a temporary agent to enhance the requirements
            let temp_config = Config::load_with_overrides(
                cli.config.as_deref(),
                cli.provider.clone(),
                cli.model.clone(),
            )?;
            let ui_writer = ConsoleUiWriter::new();
            let mut temp_agent = Agent::new_with_readme_and_quiet(
                temp_config,
                ui_writer,
                None,
                true, // quiet mode
            ).await?;
            // Craft the enhancement prompt
            let enhancement_prompt = format!(
                r#"You are a requirements analyst. Take this brief user input and expand it into a structured requirements document.
 USER INPUT:
 {}
 Create a professional requirements document with:
 1. A clear project title (# heading)
 2. An overview section explaining what will be built
 3. Organized requirements (functional, technical, quality)
 4. Acceptance criteria
 5. Any technical constraints or preferences mentioned
 Format as proper markdown. Be specific and actionable. If the user's input is vague, make reasonable assumptions but keep it focused on what they described.
 Output ONLY the markdown content, no explanations or meta-commentary."#,
                requirements_input.trim()
            );
            // Execute enhancement task
            let result = temp_agent
                .execute_task_with_timing(&enhancement_prompt, None, false, false, false, false)
                .await?;
            let enhanced_requirements = result.response.trim().to_string();
            // Show the enhanced requirements
            println!("\n📋 Enhanced Requirements Document:");
            println!("{}\n", "=".repeat(60));
            println!("{}", enhanced_requirements);
            println!("{}\n", "=".repeat(60));
            // Ask for confirmation
            println!("\n❓ Is this requirements document acceptable?");
            println!("   [y] Yes, proceed with autonomous mode");
            println!("   [e] Edit and save manually");
            println!("   [n] No, cancel\n");
            print!("Your choice (y/e/n): ");
            io::stdout().flush()?;
            let mut choice = String::new();
            io::stdin().read_line(&mut choice)?;
            let choice = choice.trim().to_lowercase();
            let requirements_path = workspace_dir.join("requirements.md");
            match choice.as_str() {
                "y" | "yes" => {
                    // Save enhanced requirements
                    std::fs::write(&requirements_path, &enhanced_requirements)?;
                    println!("\n✅ Requirements saved to: {}", requirements_path.display());
                    println!("🚀 Starting autonomous mode...\n");
                }
                "e" | "edit" => {
                    // Save enhanced requirements for manual editing
                    std::fs::write(&requirements_path, &enhanced_requirements)?;
                    println!("\n✅ Requirements saved to: {}", requirements_path.display());
                    println!("📝 Please edit the file and run: g3 --autonomous");
                    println!("   Exiting for now.\n");
                    return Ok(());
                }
                "n" | "no" => {
                    println!("\n❌ Cancelled. No files were saved.\n");
                    return Ok(());
                }
                _ => {
                    println!("\n❌ Invalid choice. Cancelled.\n");
                    return Ok(());
                }
            }
        }
        if let Some(requirements_text) = &cli.requirements {
            // Use requirements text override
            Project::new_autonomous_with_requirements(workspace_dir.clone(), requirements_text.clone())?
@@ -430,10 +325,6 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
    project.ensure_workspace_exists()?;
    project.enter_workspace()?;
    if !cli.machine {
        info!("Using workspace: {}", project.workspace().display());
    }
    // Load configuration with CLI overrides
    let mut config = Config::load_with_overrides(
        cli.config.as_deref(),
@@ -444,9 +335,6 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
    // Apply macax flag override
    if cli.macax {
        config.macax.enabled = true;
        if !cli.machine {
            info!("macOS Accessibility API tools enabled");
        }
    }
    // Apply webdriver flag override
@@ -454,6 +342,11 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
        config.webdriver.enabled = true;
    }
    // Apply no-auto-compact flag override
    if cli.manual_compact {
        config.agent.auto_compact = false;
    }
    // Validate provider if specified
    if let Some(ref provider) = cli.provider {
        let valid_providers = ["anthropic", "databricks", "embedded", "openai"];
@@ -482,6 +375,7 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
    // Execute task, autonomous mode, or start interactive mode based on machine mode
    if cli.machine {
        // Machine mode - use MachineUiWriter
        let ui_writer = MachineUiWriter::new();
        let agent = if cli.autonomous {
@@ -505,6 +399,20 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
        run_with_machine_mode(agent, cli, project).await?;
    } else {
        // Normal mode - use ConsoleUiWriter
        // DEFAULT: Chat mode for interactive sessions
        // It runs when:
        // 1. No task is provided (not single-shot)
        // 2. Not in autonomous mode
        // 3. Not explicitly enabled with --auto flag
        let use_accumulative = cli.task.is_none() && !cli.autonomous && cli.auto;
        if use_accumulative {
            // Run accumulative mode and return early
            run_accumulative_mode(workspace_dir.clone(), cli.clone(), combined_content.clone()).await?;
            return Ok(());
        }
        let ui_writer = ConsoleUiWriter::new();
        let agent = if cli.autonomous {
@@ -527,7 +435,284 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
        run_with_console_mode(agent, cli, project, combined_content).await?;
    }
    Ok(())
 }
 /// Accumulative autonomous mode: accumulates requirements from user input
 /// and runs autonomous mode after each input
 async fn run_accumulative_mode(
    workspace_dir: PathBuf,
    cli: Cli,
    combined_content: Option<String>,
 ) -> Result<()> {
    let output = SimpleOutput::new();
    output.print("");
    output.print("🪿 G3 AI Coding Agent - Autonomous Mode");
    output.print("      >> describe what you want, I'll build it iteratively");
    output.print("");
    output.print(&format!("📁 Workspace: {}", workspace_dir.display()));
    output.print("");
    output.print("💡 Each input you provide will be added to requirements");
    output.print("   and I'll automatically work on implementing them. You can");
    output.print("   interrupt at any time (Ctrl+C) to add clarifications or more requirements.");
    output.print("");
    output.print("   Type '/help' for commands, 'exit' or 'quit' to stop, Ctrl+D to finish");
    output.print("");
    // Initialize rustyline editor with history
    let mut rl = DefaultEditor::new()?;
    let history_file = dirs::home_dir().map(|mut path| {
        path.push(".g3_accumulative_history");
        path
    });
    if let Some(ref history_path) = history_file {
        let _ = rl.load_history(history_path);
    }
    // Accumulated requirements stored in memory
    let mut accumulated_requirements = Vec::new();
    let mut turn_number = 0;
    loop {
        output.print(&format!("\n{}", "=".repeat(60)));
        if accumulated_requirements.is_empty() {
            output.print("📝 What would you like me to build? (describe your requirements)");
        } else {
            output.print(&format!("📝 Turn {} - What's next? (add more requirements or refinements)", turn_number + 1));
        }
        output.print(&format!("{}", "=".repeat(60)));
        let readline = rl.readline("requirement> ");
        match readline {
            Ok(line) => {
                let input = line.trim().to_string();
                if input.is_empty() {
                    continue;
                }
                if input == "exit" || input == "quit" {
                    output.print("\n👋 Goodbye!");
                    break;
                }
                // Check for slash commands
                if input.starts_with('/') {
                    match input.as_str() {
                        "/help" => {
                            output.print("");
                            output.print("📖 Available Commands:");
                            output.print("  /requirements - Show all accumulated requirements");
                            output.print("  /chat         - Switch to interactive chat mode");
                            output.print("  /help         - Show this help message");
                            output.print("  exit/quit     - Exit the session");
                            output.print("");
                            continue;
                        }
                        "/requirements" => {
                            output.print("");
                            if accumulated_requirements.is_empty() {
                                output.print("📋 No requirements accumulated yet");
                            } else {
                                output.print("📋 Accumulated Requirements:");
                                output.print("");
                                for req in &accumulated_requirements {
                                    output.print(&format!("   {}", req));
                                }
                            }
                            output.print("");
                            continue;
                        }
                        "/chat" => {
                            output.print("");
                            output.print("🔄 Switching to interactive chat mode...");
                            output.print("");
                            // Build context message with accumulated requirements
                            let requirements_context = if accumulated_requirements.is_empty() {
                                None
                            } else {
                                Some(format!(
                                    "📋 Context from Accumulative Mode:\n\n\
                                    We were working on these requirements. There may be unstaged or in-progress changes or recent changes to this branch. This is for your information.\n\n\
                                    Requirements:\n{}\n",
                                    accumulated_requirements.join("\n")
                                ))
                            };
                            // Combine with existing content (README/AGENTS.md)
                            let chat_combined_content = match (requirements_context, combined_content.clone()) {
                                (Some(req_ctx), Some(existing)) => Some(format!("{}\n\n{}", req_ctx, existing)),
                                (Some(req_ctx), None) => Some(req_ctx),
                                (None, existing) => existing,
                            };
                            // Load configuration
                            let mut config = Config::load_with_overrides(
                                cli.config.as_deref(),
                                cli.provider.clone(),
                                cli.model.clone(),
                            )?;
                            // Apply macax flag override
                            if cli.macax {
                                config.macax.enabled = true;
                            }
                            // Apply webdriver flag override
                            if cli.webdriver {
                                config.webdriver.enabled = true;
                            }
                            // Apply no-auto-compact flag override
                            if cli.manual_compact {
                                config.agent.auto_compact = false;
                            }
                            // Create agent for interactive mode with requirements context
                            let ui_writer = ConsoleUiWriter::new();
                            let agent = Agent::new_with_readme_and_quiet(
                                config,
                                ui_writer,
                                chat_combined_content.clone(),
                                cli.quiet,
                            )
                            .await?;
                            // Run interactive mode
                            run_interactive(agent, cli.show_prompt, cli.show_code, chat_combined_content).await?;
                            // After returning from interactive mode, exit
                            output.print("\n👋 Goodbye!");
                            break;
                        }
                        _ => {
                            output.print(&format!("❌ Unknown command: {}. Type /help for available commands.", input));
                            continue;
                        }
                    }
                }
                // Add to history
                rl.add_history_entry(&input)?;
                // Add this requirement to accumulated list
                turn_number += 1;
                accumulated_requirements.push(format!("{}. {}", turn_number, input));
                // Build the complete requirements document
                let requirements_doc = format!(
                    "# Project Requirements\n\n\
                    ## Current Instructions and Requirements:\n\n\
                    {}\n\n\
                    ## Latest Requirement (Turn {}):\n\n\
                    {}",
                    accumulated_requirements.join("\n"),
                    turn_number,
                    input
                );
                output.print("");
                output.print(&format!("📋 Current instructions and requirements (Turn {}):", turn_number));
                output.print(&format!("   {}", input));
                output.print("");
                output.print("🚀 Starting autonomous implementation...");
                output.print("");
                // Create a project with the accumulated requirements
                let project = Project::new_autonomous_with_requirements(
                    workspace_dir.clone(),
                    requirements_doc.clone()
                )?;
                // Ensure workspace exists and enter it
                project.ensure_workspace_exists()?;
                project.enter_workspace()?;
                // Load configuration with CLI overrides
                let mut config = Config::load_with_overrides(
                    cli.config.as_deref(),
                    cli.provider.clone(),
                    cli.model.clone(),
                )?;
                // Apply macax flag override
                if cli.macax {
                    config.macax.enabled = true;
                }
                // Apply webdriver flag override
                if cli.webdriver {
                    config.webdriver.enabled = true;
                }
                // Apply no-auto-compact flag override
                if cli.manual_compact {
                    config.agent.auto_compact = false;
                }
                // Create agent for this autonomous run
                let ui_writer = ConsoleUiWriter::new();
                let agent = Agent::new_autonomous_with_readme_and_quiet(
                    config.clone(),
                    ui_writer,
                    combined_content.clone(),
                    cli.quiet,
                )
                .await?;
                // Run autonomous mode with the accumulated requirements
                let autonomous_result = tokio::select! {
                    result = run_autonomous(
                    agent,
                    project,
                    cli.show_prompt,
                    cli.show_code,
                    cli.max_turns,
                    cli.quiet,
                    ) => result,
                    _ = tokio::signal::ctrl_c() => {
                        output.print("\n⚠️  Autonomous run cancelled by user (Ctrl+C)");
                        Ok(())
                    }
                };
                match autonomous_result
                {
                    Ok(_) => {
                        output.print("");
                        output.print("✅ Autonomous run completed");
                    }
                    Err(e) => {
                        output.print("");
                        output.print(&format!("❌ Autonomous run failed: {}", e));
                        output.print("   You can provide more requirements to continue.");
                    }
                }
            }
            Err(ReadlineError::Interrupted) => {
                output.print("\n👋 Interrupted. Goodbye!");
                break;
            }
            Err(ReadlineError::Eof) => {
                output.print("\n👋 Goodbye!");
                break;
            }
            Err(err) => {
                error!("Error: {:?}", err);
                break;
            }
        }
    }
    // Save history before exiting
    if let Some(ref history_path) = history_file {
        let _ = rl.save_history(history_path);
    }
    Ok(())
 }
@@ -589,9 +774,6 @@ async fn run_with_console_mode(
    // Execute task, autonomous mode, or start interactive mode
    if cli.autonomous {
        // Autonomous mode with coach-player feedback loop
        if !cli.machine {
            info!("Starting autonomous mode");
        }
        run_autonomous(
            agent,
            project,
@@ -603,9 +785,6 @@ async fn run_with_console_mode(
        .await?;
    } else if let Some(task) = cli.task {
        // Single-shot mode
        if !cli.machine {
            info!("Executing task: {}", task);
        }
        let output = SimpleOutput::new();
        let result = agent
            .execute_task_with_timing(&task, None, false, cli.show_prompt, cli.show_code, true)
@@ -613,9 +792,6 @@ async fn run_with_console_mode(
        output.print_smart(&result.response);
    } else {
        // Interactive mode (default)
        if !cli.machine {
            info!("Starting interactive mode");
        }
        println!("📁 Workspace: {}", project.workspace().display());
        run_interactive(agent, cli.show_prompt, cli.show_code, combined_content).await?;
    }
@@ -664,7 +840,6 @@ fn read_agents_config(workspace_dir: &Path) -> Option<String> {
        match std::fs::read_to_string(&agents_path) {
            Ok(content) => {
                // Return the content with a note about which file was read
                info!("Loaded AGENTS.md from {}", agents_path.display());
                Some(format!(
                    "🤖 Agent Configuration (from AGENTS.md):\n\n{}",
                    content
@@ -682,7 +857,6 @@ fn read_agents_config(workspace_dir: &Path) -> Option<String> {
        if alt_path.exists() {
            match std::fs::read_to_string(&alt_path) {
                Ok(content) => {
                    info!("Loaded agents.md from {}", alt_path.display());
                    Some(format!("🤖 Agent Configuration (from agents.md):\n\n{}", content))
                }
                Err(e) => {
@@ -1303,10 +1477,32 @@ fn handle_execution_error(e: &anyhow::Error, input: &str, output: &SimpleOutput,
    }
 }
-fn display_context_progress<W: UiWriter>(agent: &Agent<W>, output: &SimpleOutput) {
+fn display_context_progress<W: UiWriter>(agent: &Agent<W>, _output: &SimpleOutput) {
    let context = agent.get_context_window();
-    output.print(&format!("Context: {}/{} tokens ({:.1}%)", 
+    let percentage = context.percentage_used();
-        context.used_tokens, context.total_tokens, context.percentage_used()));
+    
    // Create 10 dots representing context fullness
    let total_dots: usize = 10;
    let filled_dots = ((percentage / 100.0) * total_dots as f32).round() as usize;
    let empty_dots = total_dots.saturating_sub(filled_dots);
    let filled_str = "●".repeat(filled_dots);
    let empty_str = "○".repeat(empty_dots);
    // Determine color based on percentage
    let color = if percentage < 40.0 {
        Color::Green
    } else if percentage < 60.0 {
        Color::Yellow
    } else if percentage < 80.0 {
        Color::Rgb { r: 255, g: 165, b: 0 } // Orange
    } else {
        Color::Red
    };
    // Print with colored dots (using print! directly to handle color codes)
    print!("Context: {}{}{}{} {:.0}% ({}/{} tokens)\n", 
        SetForegroundColor(color), filled_str, empty_str, ResetColor, percentage, context.used_tokens, context.total_tokens);
 }
 /// Set up the workspace directory for autonomous mode
--- a/crates/g3-cli/src/tui.rs
+++ b/crates/g3-cli/src/tui.rs
@@ -71,18 +71,20 @@ impl SimpleOutput {
    }
    pub fn print_context(&self, used: u32, total: u32, percentage: f32) {
-        let bar_width: usize = 10;
+        let total_dots = 10;
-        let filled_width = ((percentage / 100.0) * bar_width as f32) as usize;
+        let filled_dots = ((percentage / 100.0) * total_dots as f32) as usize;
-        let empty_width = bar_width.saturating_sub(filled_width);
+        let empty_dots = total_dots.saturating_sub(filled_dots);
-        let filled_chars = "●".repeat(filled_width);
+        let filled_str = "●".repeat(filled_dots);
-        let empty_chars = "○".repeat(empty_width);
+        let empty_str = "○".repeat(empty_dots);
        // Determine color based on percentage
-        let color = if percentage < 60.0 {
+        let color = if percentage < 40.0 {
            crossterm::style::Color::Green
-        } else if percentage < 80.0 {
+        } else if percentage < 60.0 {
            crossterm::style::Color::Yellow
        } else if percentage < 80.0 {
            crossterm::style::Color::Rgb { r: 255, g: 165, b: 0 } // Orange
        } else {
            crossterm::style::Color::Red
        };
@@ -90,9 +92,9 @@ impl SimpleOutput {
        // Print with colored progress bar
        print!("Context: ");
        print!("{}", SetForegroundColor(color));
-        print!("{}{}", filled_chars, empty_chars);
+        print!("{}{}", filled_str, empty_str);
        print!("{}", ResetColor);
-        println!(" {:.1}% | {}/{} tokens", percentage, used, total);
+        println!(" {:.0}% ({}/{} tokens)", percentage, used, total);
    }
    pub fn print_context_thinning(&self, message: &str) {
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -17,6 +17,7 @@ pub struct ProvidersConfig {
    pub anthropic: Option<AnthropicConfig>,
    pub databricks: Option<DatabricksConfig>,
    pub embedded: Option<EmbeddedConfig>,
    pub ollama: Option<OllamaConfig>,
    pub default_provider: String,
    pub coach: Option<String>,  // Provider to use for coach in autonomous mode
    pub player: Option<String>, // Provider to use for player in autonomous mode
@@ -60,11 +61,20 @@ pub struct EmbeddedConfig {
    pub threads: Option<u32>,    // Number of CPU threads to use
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct OllamaConfig {
    pub model: String,
    pub base_url: Option<String>, // Default: http://localhost:11434
    pub max_tokens: Option<u32>,
    pub temperature: Option<f32>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct AgentConfig {
    pub max_context_length: usize,
    pub enable_streaming: bool,
    pub timeout_seconds: u64,
    pub auto_compact: bool,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -127,6 +137,7 @@ impl Default for Config {
                    use_oauth: Some(true),
                }),
                embedded: None,
                ollama: None,
                default_provider: "databricks".to_string(),
                coach: None,  // Will use default_provider if not specified
                player: None, // Will use default_provider if not specified
@@ -135,6 +146,7 @@ impl Default for Config {
                max_context_length: 8192,
                enable_streaming: true,
                timeout_seconds: 60,
                auto_compact: true,
            },
            computer_control: ComputerControlConfig::default(),
            webdriver: WebDriverConfig::default(),
@@ -242,6 +254,7 @@ impl Config {
                    gpu_layers: Some(32),
                    threads: Some(8),
                }),
                ollama: None,
                default_provider: "embedded".to_string(),
                coach: None,  // Will use default_provider if not specified
                player: None, // Will use default_provider if not specified
@@ -250,6 +263,7 @@ impl Config {
                max_context_length: 8192,
                enable_streaming: true,
                timeout_seconds: 60,
                auto_compact: true,
            },
            computer_control: ComputerControlConfig::default(),
            webdriver: WebDriverConfig::default(),
--- a/crates/g3-core/Cargo.toml
+++ b/crates/g3-core/Cargo.toml
@@ -25,3 +25,4 @@ chrono = { version = "0.4", features = ["serde"] }
 rand = "0.8"
 regex = "1.0"
 shellexpand = "3.1"
 serde_yaml = "0.9"
--- a/crates/g3-core/src/code_search.rs
+++ b/crates/g3-core/src/code_search.rs
@@ -0,0 +1,787 @@
 //! Code search functionality using ast-grep for syntax-aware semantic searches
 use anyhow::{anyhow, Result};
 use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use std::collections::HashMap;
 use std::process::Stdio;
 use std::time::{Duration, Instant};
 use tokio::io::{AsyncBufReadExt, BufReader};
 use tokio::process::Command;
 use tokio::sync::Semaphore;
 use tracing::{debug, error, info, warn};
 /// Maximum number of searches allowed per request
 const MAX_SEARCHES: usize = 20;
 /// Default timeout for individual searches in seconds
 const DEFAULT_TIMEOUT_SECS: u64 = 60;
 /// Default maximum concurrency
 const DEFAULT_MAX_CONCURRENCY: usize = 4;
 /// Default maximum matches per search
 const DEFAULT_MAX_MATCHES: usize = 500;
 /// Search specification for a single ast-grep search
 #[derive(Debug, Clone, Deserialize)]
 pub struct SearchSpec {
    pub name: String,
    pub mode: SearchMode,
    // Pattern mode fields
    pub pattern: Option<String>,
    pub language: Option<String>,
    // YAML mode fields
    pub rule_yaml: Option<String>,
    // Common fields
    pub paths: Option<Vec<String>>,
    pub globs: Option<Vec<String>>,
    pub json_style: Option<JsonStyle>,
    pub context: Option<u32>,
    pub threads: Option<u32>,
    pub include_metadata: Option<bool>,
    pub no_ignore: Option<Vec<NoIgnoreType>>,
    pub severity: Option<HashMap<String, SeverityLevel>>,
    pub timeout_secs: Option<u64>,
 }
 /// Search mode: pattern or yaml
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum SearchMode {
    Pattern,
    Yaml,
 }
 /// JSON output style
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum JsonStyle {
    Pretty,
    Stream,
    Compact,
 }
 impl Default for JsonStyle {
    fn default() -> Self {
        JsonStyle::Stream
    }
 }
 /// No-ignore types
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum NoIgnoreType {
    Hidden,
    Dot,
    Exclude,
    Global,
    Parent,
    Vcs,
 }
 /// Severity levels for YAML rules
 #[derive(Debug, Clone, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum SeverityLevel {
    Error,
    Warning,
    Info,
    Hint,
    Off,
 }
 /// Request structure for code search
 #[derive(Debug, Deserialize)]
 pub struct CodeSearchRequest {
    pub searches: Vec<SearchSpec>,
    pub max_concurrency: Option<usize>,
    pub max_matches_per_search: Option<usize>,
 }
 /// Result of a single search
 #[derive(Debug, Serialize)]
 pub struct SearchResult {
    pub name: String,
    pub mode: String,
    pub status: String,
    pub cmd: Vec<String>,
    pub match_count: Option<usize>,
    pub truncated: Option<bool>,
    pub matches: Option<Vec<Value>>,
    pub stderr: Option<String>,
    pub exit_code: Option<i32>,
    pub duration_ms: u64,
 }
 /// Summary of all searches
 #[derive(Debug, Serialize)]
 pub struct SearchSummary {
    pub completed: usize,
    pub total: usize,
    pub total_matches: usize,
    pub duration_ms: u64,
 }
 /// Complete response structure
 #[derive(Debug, Serialize)]
 pub struct CodeSearchResponse {
    pub summary: SearchSummary,
    pub searches: Vec<SearchResult>,
 }
 /// YAML rule structure for validation
 #[derive(Debug, Deserialize)]
 struct YamlRule {
    pub id: String,
    pub language: String,
    pub rule: Value,
 }
 /// Execute a batch of code searches using ast-grep
 pub async fn execute_code_search(request: CodeSearchRequest) -> Result<CodeSearchResponse> {
    let start_time = Instant::now();
    // Validate request
    if request.searches.is_empty() {
        return Err(anyhow!("No searches specified"));
    }
    if request.searches.len() > MAX_SEARCHES {
        return Err(anyhow!(
            "Too many searches: {} (max: {})",
            request.searches.len(),
            MAX_SEARCHES
        ));
    }
    // Check if ast-grep is available
    check_ast_grep_available().await?;
    let max_concurrency = request.max_concurrency.unwrap_or(DEFAULT_MAX_CONCURRENCY);
    let max_matches = request.max_matches_per_search.unwrap_or(DEFAULT_MAX_MATCHES);
    // Create semaphore for concurrency control
    let semaphore = std::sync::Arc::new(Semaphore::new(max_concurrency));
    // Execute searches concurrently
    let mut tasks = Vec::new();
    for search in request.searches {
        let sem = semaphore.clone();
        let task = tokio::spawn(async move {
            let _permit = sem.acquire().await.unwrap();
            execute_single_search(search, max_matches).await
        });
        tasks.push(task);
    }
    // Wait for all searches to complete
    let mut results = Vec::new();
    let mut total_matches = 0;
    let mut completed = 0;
    for task in tasks {
        match task.await {
            Ok(result) => {
                if result.status == "ok" {
                    completed += 1;
                    if let Some(count) = result.match_count {
                        total_matches += count;
                    }
                }
                results.push(result);
            }
            Err(e) => {
                error!("Task join error: {}", e);
                // Create an error result
                results.push(SearchResult {
                    name: "unknown".to_string(),
                    mode: "unknown".to_string(),
                    status: "error".to_string(),
                    cmd: vec![],
                    match_count: None,
                    truncated: None,
                    matches: None,
                    stderr: Some(format!("Task execution error: {}", e)),
                    exit_code: None,
                    duration_ms: 0,
                });
            }
        }
    }
    let total_duration = start_time.elapsed();
    Ok(CodeSearchResponse {
        summary: SearchSummary {
            completed,
            total: results.len(),
            total_matches,
            duration_ms: total_duration.as_millis() as u64,
        },
        searches: results,
    })
 }
 /// Execute a single search
 async fn execute_single_search(search: SearchSpec, max_matches: usize) -> SearchResult {
    let start_time = Instant::now();
    let timeout_secs = search.timeout_secs.unwrap_or(DEFAULT_TIMEOUT_SECS);
    // Validate the search specification
    if let Err(e) = validate_search_spec(&search) {
        return SearchResult {
            name: search.name,
            mode: format!("{:?}", search.mode).to_lowercase(),
            status: "error".to_string(),
            cmd: vec![],
            match_count: None,
            truncated: None,
            matches: None,
            stderr: Some(format!("Validation error: {}", e)),
            exit_code: None,
            duration_ms: start_time.elapsed().as_millis() as u64,
        };
    }
    // Build command
    let cmd_args = match build_ast_grep_command(&search) {
        Ok(args) => args,
        Err(e) => {
            return SearchResult {
                name: search.name,
                mode: format!("{:?}", search.mode).to_lowercase(),
                status: "error".to_string(),
                cmd: vec![],
                match_count: None,
                truncated: None,
                matches: None,
                stderr: Some(format!("Command build error: {}", e)),
                exit_code: None,
                duration_ms: start_time.elapsed().as_millis() as u64,
            };
        }
    };
    debug!("Executing ast-grep command: {:?}", cmd_args);
    // Execute with timeout
    let timeout_duration = Duration::from_secs(timeout_secs);
    match tokio::time::timeout(timeout_duration, run_ast_grep_command(&cmd_args)).await {
        Ok(Ok((stdout, stderr, exit_code))) => {
            let duration_ms = start_time.elapsed().as_millis() as u64;
            if exit_code == 0 {
                // Parse JSON output
                match parse_ast_grep_output(&stdout, max_matches) {
                    Ok((matches, truncated)) => {
                        SearchResult {
                            name: search.name,
                            mode: format!("{:?}", search.mode).to_lowercase(),
                            status: "ok".to_string(),
                            cmd: cmd_args,
                            match_count: Some(matches.len()),
                            truncated: Some(truncated),
                            matches: Some(matches),
                            stderr: if stderr.is_empty() { None } else { Some(stderr) },
                            exit_code: None,
                            duration_ms,
                        }
                    }
                    Err(e) => {
                        SearchResult {
                            name: search.name,
                            mode: format!("{:?}", search.mode).to_lowercase(),
                            status: "error".to_string(),
                            cmd: cmd_args,
                            match_count: None,
                            truncated: None,
                            matches: None,
                            stderr: Some(format!("JSON parse error: {}\nRaw output: {}", e, stdout)),
                            exit_code: Some(exit_code),
                            duration_ms,
                        }
                    }
                }
            } else {
                SearchResult {
                    name: search.name,
                    mode: format!("{:?}", search.mode).to_lowercase(),
                    status: "error".to_string(),
                    cmd: cmd_args,
                    match_count: None,
                    truncated: None,
                    matches: None,
                    stderr: Some(stderr),
                    exit_code: Some(exit_code),
                    duration_ms,
                }
            }
        }
        Ok(Err(e)) => {
            SearchResult {
                name: search.name,
                mode: format!("{:?}", search.mode).to_lowercase(),
                status: "error".to_string(),
                cmd: cmd_args,
                match_count: None,
                truncated: None,
                matches: None,
                stderr: Some(format!("Execution error: {}", e)),
                exit_code: None,
                duration_ms: start_time.elapsed().as_millis() as u64,
            }
        }
        Err(_) => {
            SearchResult {
                name: search.name,
                mode: format!("{:?}", search.mode).to_lowercase(),
                status: "timeout".to_string(),
                cmd: cmd_args,
                match_count: None,
                truncated: None,
                matches: None,
                stderr: Some(format!("Search timed out after {} seconds", timeout_secs)),
                exit_code: None,
                duration_ms: start_time.elapsed().as_millis() as u64,
            }
        }
    }
 }
 /// Validate a search specification
 fn validate_search_spec(search: &SearchSpec) -> Result<()> {
    match search.mode {
        SearchMode::Pattern => {
            if search.pattern.is_none() || search.pattern.as_ref().unwrap().is_empty() {
                return Err(anyhow!("Pattern mode requires non-empty 'pattern' field"));
            }
        }
        SearchMode::Yaml => {
            let rule_yaml = search.rule_yaml.as_ref()
                .ok_or_else(|| anyhow!("YAML mode requires 'rule_yaml' field"))?;
            if rule_yaml.is_empty() {
                return Err(anyhow!("YAML mode requires non-empty 'rule_yaml' field"));
            }
            // Parse and validate YAML structure
            let parsed: YamlRule = serde_yaml::from_str(rule_yaml)
                .map_err(|e| anyhow!("Invalid YAML rule: {}", e))?;
            if parsed.id.is_empty() {
                return Err(anyhow!("YAML rule must have non-empty 'id' field"));
            }
            if parsed.language.is_empty() {
                return Err(anyhow!("YAML rule must have non-empty 'language' field"));
            }
            // Validate language is supported (basic check)
            validate_language(&parsed.language)?;
        }
    }
    // Validate context range
    if let Some(context) = search.context {
        if context > 20 {
            return Err(anyhow!("Context lines cannot exceed 20"));
        }
    }
    Ok(())
 }
 /// Validate that a language is supported by ast-grep
 fn validate_language(language: &str) -> Result<()> {
    let supported_languages = [
        "rust", "javascript", "typescript", "python", "java", "c", "cpp", "csharp",
        "go", "html", "css", "json", "yaml", "xml", "bash", "kotlin", "swift",
        "php", "ruby", "scala", "dart", "lua", "r", "sql", "dockerfile",
        "Rust", "JavaScript", "TypeScript", "Python", "Java", "C", "Cpp", "CSharp",
        "Go", "Html", "Css", "Json", "Yaml", "Xml", "Bash", "Kotlin", "Swift",
        "Php", "Ruby", "Scala", "Dart", "Lua", "R", "Sql", "Dockerfile"
    ];
    if !supported_languages.contains(&language) {
        warn!("Language '{}' may not be supported by ast-grep", language);
    }
    Ok(())
 }
 /// Build ast-grep command arguments
 fn build_ast_grep_command(search: &SearchSpec) -> Result<Vec<String>> {
    let mut args = vec!["ast-grep".to_string()];
    match search.mode {
        SearchMode::Pattern => {
            args.push("run".to_string());
            // Add pattern
            args.push("-p".to_string());
            args.push(search.pattern.as_ref().unwrap().clone());
            // Add language if specified
            if let Some(ref lang) = search.language {
                args.push("-l".to_string());
                args.push(lang.clone());
            }
        }
        SearchMode::Yaml => {
            args.push("scan".to_string());
            // Add inline rules
            args.push("--inline-rules".to_string());
            args.push(search.rule_yaml.as_ref().unwrap().clone());
            // Add include-metadata if requested
            if search.include_metadata.unwrap_or(false) {
                args.push("--include-metadata".to_string());
            }
            // Add severity overrides
            if let Some(ref severity_map) = search.severity {
                for (rule_id, severity) in severity_map {
                    match severity {
                        SeverityLevel::Error => {
                            args.push("--error".to_string());
                            args.push(rule_id.clone());
                        }
                        SeverityLevel::Warning => {
                            args.push("--warning".to_string());
                            args.push(rule_id.clone());
                        }
                        SeverityLevel::Info => {
                            args.push("--info".to_string());
                            args.push(rule_id.clone());
                        }
                        SeverityLevel::Hint => {
                            args.push("--hint".to_string());
                            args.push(rule_id.clone());
                        }
                        SeverityLevel::Off => {
                            args.push("--off".to_string());
                            args.push(rule_id.clone());
                        }
                    }
                }
            }
        }
    }
    // Add common arguments
    // Add globs if specified
    if let Some(ref globs) = search.globs {
        if !globs.is_empty() {
            args.push("--globs".to_string());
            args.push(globs.join(","));
        }
    }
    // Add context
    if let Some(context) = search.context {
        args.push("-C".to_string());
        args.push(context.to_string());
    }
    // Add threads
    if let Some(threads) = search.threads {
        args.push("-j".to_string());
        args.push(threads.to_string());
    }
    // Add JSON output style
    let json_style = search.json_style.as_ref().unwrap_or(&JsonStyle::Stream);
    let json_arg = match json_style {
        JsonStyle::Pretty => "--json=pretty",
        JsonStyle::Stream => "--json=stream",
        JsonStyle::Compact => "--json=compact",
    };
    args.push(json_arg.to_string());
    // Add no-ignore options
    if let Some(ref no_ignore_list) = search.no_ignore {
        for no_ignore_type in no_ignore_list {
            let flag = match no_ignore_type {
                NoIgnoreType::Hidden => "--no-ignore=hidden",
                NoIgnoreType::Dot => "--no-ignore=dot",
                NoIgnoreType::Exclude => "--no-ignore=exclude",
                NoIgnoreType::Global => "--no-ignore=global",
                NoIgnoreType::Parent => "--no-ignore=parent",
                NoIgnoreType::Vcs => "--no-ignore=vcs",
            };
            args.push(flag.to_string());
        }
    }
    // Add paths (default to current directory if none specified)
    if let Some(ref paths) = search.paths {
        if !paths.is_empty() {
            args.extend(paths.clone());
        } else {
            args.push(".".to_string());
        }
    } else {
        args.push(".".to_string());
    }
    Ok(args)
 }
 /// Run ast-grep command and capture output
 async fn run_ast_grep_command(args: &[String]) -> Result<(String, String, i32)> {
    let mut cmd = Command::new(&args[0]);
    cmd.args(&args[1..]);
    cmd.stdout(Stdio::piped());
    cmd.stderr(Stdio::piped());
    debug!("Running command: {:?}", args);
    let mut child = cmd.spawn()
        .map_err(|e| anyhow!("Failed to spawn ast-grep process: {}", e))?;
    let stdout = child.stdout.take().unwrap();
    let stderr = child.stderr.take().unwrap();
    let stdout_reader = BufReader::new(stdout);
    let stderr_reader = BufReader::new(stderr);
    let stdout_task = tokio::spawn(async move {
        let mut lines = stdout_reader.lines();
        let mut output = String::new();
        while let Ok(Some(line)) = lines.next_line().await {
            if !output.is_empty() {
                output.push('\n');
            }
            output.push_str(&line);
        }
        output
    });
    let stderr_task = tokio::spawn(async move {
        let mut lines = stderr_reader.lines();
        let mut output = String::new();
        while let Ok(Some(line)) = lines.next_line().await {
            if !output.is_empty() {
                output.push('\n');
            }
            output.push_str(&line);
        }
        output
    });
    let status = child.wait().await
        .map_err(|e| anyhow!("Failed to wait for ast-grep process: {}", e))?;
    let stdout_output = stdout_task.await
        .map_err(|e| anyhow!("Failed to read stdout: {}", e))?;
    let stderr_output = stderr_task.await
        .map_err(|e| anyhow!("Failed to read stderr: {}", e))?;
    let exit_code = status.code().unwrap_or(-1);
    Ok((stdout_output, stderr_output, exit_code))
 }
 /// Parse ast-grep JSON output
 fn parse_ast_grep_output(output: &str, max_matches: usize) -> Result<(Vec<Value>, bool)> {
    if output.trim().is_empty() {
        return Ok((vec![], false));
    }
    let mut matches = Vec::new();
    let mut truncated = false;
    // Handle stream format (line-delimited JSON)
    for line in output.lines() {
        let line = line.trim();
        if line.is_empty() {
            continue;
        }
        match serde_json::from_str::<Value>(line) {
            Ok(match_obj) => {
                if matches.len() >= max_matches {
                    truncated = true;
                    break;
                }
                matches.push(match_obj);
            }
            Err(e) => {
                debug!("Failed to parse JSON line '{}': {}", line, e);
                // Try to parse the entire output as a single JSON array
                match serde_json::from_str::<Vec<Value>>(output) {
                    Ok(array_matches) => {
                        let take_count = array_matches.len().min(max_matches);
                        let total_count = array_matches.len();
                        matches = array_matches.into_iter().take(take_count).collect();
                        truncated = take_count < total_count;
                        break;
                    }
                    Err(e2) => {
                        return Err(anyhow!(
                            "Failed to parse ast-grep output as line-delimited JSON or JSON array. Line error: {}, Array error: {}",
                            e, e2
                        ));
                    }
                }
            }
        }
    }
    Ok((matches, truncated))
 }
 /// Check if ast-grep is available and provide installation hints if not
 async fn check_ast_grep_available() -> Result<()> {
    match Command::new("ast-grep")
        .arg("--version")
        .output()
        .await
    {
        Ok(output) => {
            if output.status.success() {
                let version = String::from_utf8_lossy(&output.stdout);
                info!("Found ast-grep: {}", version.trim());
                Ok(())
            } else {
                Err(anyhow!("ast-grep command failed: {}", String::from_utf8_lossy(&output.stderr)))
            }
        }
        Err(_) => {
            Err(anyhow!(
                "ast-grep not found. Please install it using one of these methods:\n\n\
                • Homebrew (macOS): brew install ast-grep\n\
                • MacPorts (macOS): sudo port install ast-grep\n\
                • Nix: nix-env -iA nixpkgs.ast-grep\n\
                • Cargo: cargo install ast-grep\n\
                • npm: npm install -g @ast-grep/cli\n\
                • pip: pip install ast-grep\n\n\
                For more installation options, visit: https://ast-grep.github.io/guide/quick-start.html"
            ))
        }
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_validate_pattern_search() {
        let search = SearchSpec {
            name: "test".to_string(),
            mode: SearchMode::Pattern,
            pattern: Some("fn $NAME() {}".to_string()),
            language: Some("rust".to_string()),
            rule_yaml: None,
            paths: None,
            globs: None,
            json_style: None,
            context: None,
            threads: None,
            include_metadata: None,
            no_ignore: None,
            severity: None,
            timeout_secs: None,
        };
        assert!(validate_search_spec(&search).is_ok());
    }
    #[test]
    fn test_validate_yaml_search() {
        let yaml_rule = r#"
 id: test-rule
 language: Rust
 rule:
  pattern: "fn $NAME() {}"
 "#;
        let search = SearchSpec {
            name: "test".to_string(),
            mode: SearchMode::Yaml,
            pattern: None,
            language: None,
            rule_yaml: Some(yaml_rule.to_string()),
            paths: None,
            globs: None,
            json_style: None,
            context: None,
            threads: None,
            include_metadata: None,
            no_ignore: None,
            severity: None,
            timeout_secs: None,
        };
        assert!(validate_search_spec(&search).is_ok());
    }
    #[test]
    fn test_build_pattern_command() {
        let search = SearchSpec {
            name: "test".to_string(),
            mode: SearchMode::Pattern,
            pattern: Some("fn $NAME() {}".to_string()),
            language: Some("rust".to_string()),
            rule_yaml: None,
            paths: Some(vec!["src/".to_string()]),
            globs: None,
            json_style: Some(JsonStyle::Stream),
            context: Some(2),
            threads: Some(4),
            include_metadata: None,
            no_ignore: None,
            severity: None,
            timeout_secs: None,
        };
        let cmd = build_ast_grep_command(&search).unwrap();
        assert_eq!(cmd[0], "ast-grep");
        assert_eq!(cmd[1], "run");
        assert!(cmd.contains(&"-p".to_string()));
        assert!(cmd.contains(&"fn $NAME() {}".to_string()));
        assert!(cmd.contains(&"-l".to_string()));
        assert!(cmd.contains(&"rust".to_string()));
        assert!(cmd.contains(&"--json=stream".to_string()));
        assert!(cmd.contains(&"-C".to_string()));
        assert!(cmd.contains(&"2".to_string()));
        assert!(cmd.contains(&"-j".to_string()));
        assert!(cmd.contains(&"4".to_string()));
        assert!(cmd.contains(&"src/".to_string()));
    }
    #[test]
    fn test_parse_stream_json() {
        let output = r#"{"file":"test.rs","text":"fn hello() {}"}
 {"file":"test2.rs","text":"fn world() {}"}"#;
        let (matches, truncated) = parse_ast_grep_output(output, 10).unwrap();
        assert_eq!(matches.len(), 2);
        assert!(!truncated);
        assert_eq!(matches[0]["file"], "test.rs");
        assert_eq!(matches[1]["file"], "test2.rs");
    }
    #[test]
    fn test_parse_truncated_output() {
        let output = r#"{"file":"test1.rs","text":"fn a() {}"}
 {"file":"test2.rs","text":"fn b() {}"}
 {"file":"test3.rs","text":"fn c() {}"}"#;
        let (matches, truncated) = parse_ast_grep_output(output, 2).unwrap();
        assert_eq!(matches.len(), 2);
        assert!(truncated);
    }
 }
--- a/crates/g3-core/src/fixed_filter_json.rs
+++ b/crates/g3-core/src/fixed_filter_json.rs
@@ -4,6 +4,11 @@
 // 3. Only elide JSON content between first '{' and last '}' (inclusive)
 // 4. Return everything else as the final filtered string
 //! JSON tool call filtering for streaming LLM responses.
 //! 
 //! This module filters out JSON tool calls from LLM output streams while preserving
 //! regular text content. It uses a state machine to handle streaming chunks.
 use regex::Regex;
 use std::cell::RefCell;
 use tracing::debug;
@@ -13,37 +18,51 @@ thread_local! {
    static FIXED_JSON_TOOL_STATE: RefCell<FixedJsonToolState> = RefCell::new(FixedJsonToolState::new());
 }
 /// Internal state for tracking JSON tool call filtering across streaming chunks.
 #[derive(Debug, Clone)]
 struct FixedJsonToolState {
    /// True when actively suppressing a confirmed tool call
    suppression_mode: bool,
    /// True when buffering potential JSON (saw { but not yet confirmed as tool call)
    potential_json_mode: bool,
    /// Tracks nesting depth of braces within JSON
    brace_depth: i32,
    buffer: String,
-    json_start_in_buffer: Option<usize>,
+    json_start_in_buffer: Option<usize>, // Position where confirmed JSON tool call starts
    content_returned_up_to: usize, // Track how much content we've already returned
    potential_json_start: Option<usize>, // Where the potential JSON started
 }
 impl FixedJsonToolState {
    fn new() -> Self {
        Self {
            suppression_mode: false,
            potential_json_mode: false,
            brace_depth: 0,
            buffer: String::new(),
            json_start_in_buffer: None,
            content_returned_up_to: 0,
            potential_json_start: None,
        }
    }
    fn reset(&mut self) {
        self.suppression_mode = false;
        self.potential_json_mode = false;
        self.brace_depth = 0;
        self.buffer.clear();
        self.json_start_in_buffer = None;
        self.content_returned_up_to = 0;
        self.potential_json_start = None;
    }
 }
 // FINAL CORRECTED implementation according to specification
 /// Filters JSON tool calls from streaming LLM content.
 ///
 /// Processes content chunks and removes JSON tool calls while preserving regular text.
 /// Maintains state across calls to handle tool calls spanning multiple chunks.
 pub fn fixed_filter_json_tool_calls(content: &str) -> String {
    if content.is_empty() {
        return String::new();
@@ -87,13 +106,225 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
                    _ => {}
                }
            }
            // CRITICAL FIX: After counting braces, if still in suppression mode,
            // check if a new tool call pattern appears. This handles truncated JSON
            // followed by complete JSON.
            if state.suppression_mode {
                let current_json_start = state.json_start_in_buffer.unwrap();
                // Don't require newline - the new JSON might be concatenated directly
                let tool_call_regex = Regex::new(r#"\{\s*"tool"\s*:\s*""#).unwrap();
                // Look for new tool call patterns after the current one
                if let Some(captures) = tool_call_regex.find(&state.buffer[current_json_start + 1..]) {
                    let new_json_start = current_json_start + 1 + captures.start() + captures.as_str().find('{').unwrap();
                    debug!("Detected new tool call at position {} while processing incomplete one at {} - discarding old", new_json_start, current_json_start);
                    // The previous JSON was incomplete/malformed
                    // Return content before the old JSON (if any)
                    let content_before_old_json = if current_json_start > state.content_returned_up_to {
                        state.buffer[state.content_returned_up_to..current_json_start].to_string()
                    } else {
                        String::new()
                    };
                    // Update state to skip the incomplete JSON and position at the new one
                    // We'll process the new JSON on the next call
                    state.content_returned_up_to = new_json_start;
                    state.suppression_mode = false;
                    state.json_start_in_buffer = None;
                    state.brace_depth = 0;
                    return content_before_old_json;
                }
            }
            // Still in suppression mode, return empty string (content is being accumulated)
            return String::new();
        }
        // Check if we're in potential JSON mode (saw { but waiting to confirm it's a tool call)
        if state.potential_json_mode {
            // Check if the buffer contains a confirmed tool call pattern
            let tool_call_regex = Regex::new(r#"(?m)^\s*\{\s*"tool"\s*:\s*""#).unwrap();
            if let Some(captures) = tool_call_regex.find(&state.buffer) {
                // Confirmed! This is a tool call - enter suppression mode
                let match_text = captures.as_str();
                if let Some(brace_offset) = match_text.find('{') {
                    let json_start = captures.start() + brace_offset;
                    debug!("Confirmed JSON tool call at position {} - entering suppression mode", json_start);
                    state.potential_json_mode = false;
                    state.suppression_mode = true;
                    state.brace_depth = 0;
                    state.json_start_in_buffer = Some(json_start);
                    // Count braces from json_start to see if JSON is complete
                    let buffer_slice = state.buffer[json_start..].to_string();
                    for ch in buffer_slice.chars() {
                        match ch {
                            '{' => state.brace_depth += 1,
                            '}' => {
                                state.brace_depth -= 1;
                                if state.brace_depth <= 0 {
                                    debug!("JSON tool call completed immediately");
                                    let result = extract_fixed_content(&state.buffer, json_start);
                                    let new_content = if result.len() > state.content_returned_up_to {
                                        result[state.content_returned_up_to..].to_string()
                                    } else {
                                        String::new()
                                    };
                                    state.reset();
                                    return new_content;
                                }
                            }
                            _ => {}
                        }
                    }
                    // JSON incomplete, stay in suppression mode, return nothing
                    return String::new();
                }
            }
            // Check if we can rule out this being a tool call
            // If we have enough content after the { and it doesn't match the pattern, release it
            if let Some(potential_start) = state.potential_json_start {
                let content_after_brace = &state.buffer[potential_start..];
                // Rule out as a tool call if:
                // 1. Closing } appears before we see the full pattern
                // 2. Content clearly doesn't match the tool call pattern
                // 3. Newline appears after the opening brace (tool calls should be compact)
                let has_closing_brace = content_after_brace.contains('}');
                let has_newline = content_after_brace[1..].contains('\n'); // Skip first char which is {
                let long_enough = content_after_brace.len() >= 10;
                // Detect non-tool JSON patterns:
                // - { followed by " and a key that doesn't start with "tool"
                // - { followed by "t" but not "to"
                // - { followed by "to" but not "too", etc.
                let not_tool_pattern = Regex::new(r#"^\{\s*"(?:[^t]|t(?:[^o]|o(?:[^o]|o(?:[^l]|l[^"\s:]))))"#).unwrap();
                let definitely_not_tool = not_tool_pattern.is_match(content_after_brace);
                if has_closing_brace || has_newline || (long_enough && definitely_not_tool) {
                    debug!("Potential JSON ruled out - not a tool call");
                    state.potential_json_mode = false;
                    state.potential_json_start = None;
                    // Return the buffered content we've been holding
                    let new_content = if state.buffer.len() > state.content_returned_up_to {
                        state.buffer[state.content_returned_up_to..].to_string()
                    } else {
                        String::new()
                    };
                    state.content_returned_up_to = state.buffer.len();
                    return new_content;
                }
            }
            // Still in potential mode, keep buffering
            return String::new();
        }
        // Detect potential JSON start: { at the beginning of a line
        let potential_json_regex = Regex::new(r"(?m)^\s*\{\s*").unwrap();
        if let Some(captures) = potential_json_regex.find(&state.buffer[state.content_returned_up_to..]) {
            let match_start = state.content_returned_up_to + captures.start();
            let brace_pos = match_start + captures.as_str().find('{').unwrap();
            debug!("Potential JSON detected at position {} - entering buffering mode", brace_pos);
            // Fast path: check if this is already a confirmed tool call
            let tool_call_regex = Regex::new(r#"(?m)^\s*\{\s*"tool"\s*:\s*""#).unwrap();
            if tool_call_regex.is_match(&state.buffer[brace_pos..]) {
                // This is a confirmed tool call! Process it immediately
                let json_start = brace_pos;
                debug!("Immediately confirmed tool call at position {}", json_start);
                // Return content before JSON
                let content_before = if json_start > state.content_returned_up_to {
                    state.buffer[state.content_returned_up_to..json_start].to_string()
                } else {
                    String::new()
                };
                state.content_returned_up_to = json_start;
                state.suppression_mode = true;
                state.brace_depth = 0;
                state.json_start_in_buffer = Some(json_start);
                // Count braces to see if JSON is complete
                let buffer_slice = state.buffer[json_start..].to_string();
                for ch in buffer_slice.chars() {
                    match ch {
                        '{' => state.brace_depth += 1,
                        '}' => {
                            state.brace_depth -= 1;
                            if state.brace_depth <= 0 {
                                debug!("JSON tool call completed in same chunk");
                                let result = extract_fixed_content(&state.buffer, json_start);
                                let content_after = if result.len() > json_start {
                                    &result[json_start..]
                                } else {
                                    ""
                                };
                                let final_result = format!("{}{}", content_before, content_after);
                                state.reset();
                                return final_result;
                            }
                        }
                        _ => {}
                    }
                }
                // JSON incomplete, return content before and stay in suppression mode
                return content_before;
            }
            // Return content before the potential JSON
            let content_before = if brace_pos > state.content_returned_up_to {
                state.buffer[state.content_returned_up_to..brace_pos].to_string()
            } else {
                String::new()
            };
            state.content_returned_up_to = brace_pos;
            state.potential_json_mode = true;
            state.potential_json_start = Some(brace_pos);
            // Optimization: immediately check if we can rule this out for single-chunk processing
            let content_after_brace = &state.buffer[brace_pos..];
            let has_closing_brace = content_after_brace.contains('}');
            let has_newline = content_after_brace.len() > 1 && content_after_brace[1..].contains('\n');
            let long_enough = content_after_brace.len() >= 10;
            let not_tool_pattern = Regex::new(r#"^\{\s*"(?:[^t]|t(?:[^o]|o(?:[^o]|o(?:[^l]|l[^"\s:]))))"#).unwrap();
            let definitely_not_tool = not_tool_pattern.is_match(content_after_brace);
            if has_closing_brace || has_newline || (long_enough && definitely_not_tool) {
                debug!("Immediately ruled out as not a tool call");
                state.potential_json_mode = false;
                state.potential_json_start = None;
                // Return all the buffered content
                let new_content = if state.buffer.len() > state.content_returned_up_to {
                    state.buffer[state.content_returned_up_to..].to_string()
                } else {
                    String::new()
                };
                state.content_returned_up_to = state.buffer.len();
                return format!("{}{}", content_before, new_content);
            }
            return content_before;
        }
        // Check for tool call pattern using corrected regex
-        // More flexible than the strict specification to handle real-world JSON
+        let tool_call_regex = Regex::new(r#"(?m)^\s*\{\s*"tool"\s*:\s*"[^"]*""#).unwrap();
        let tool_call_regex = Regex::new(r#"(?m)^\s*\{\s*"tool"\s*:\s*""#).unwrap();
        if let Some(captures) = tool_call_regex.find(&state.buffer) {
            let match_text = captures.as_str();
@@ -168,9 +399,17 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
    })
 }
-// Helper function to extract content with JSON tool call filtered out
+/// Extracts content from buffer, removing the JSON tool call.
-// Returns everything except the JSON between the first '{' and last '}' (inclusive)
+///
-
+/// Given a buffer and the start position of a JSON tool call, this function:
 /// 1. Extracts all content before the JSON
 /// 2. Finds the end of the JSON (matching closing brace)
 /// 3. Extracts all content after the JSON
 /// 4. Returns the concatenation of before + after (JSON removed)
 ///
 /// # Arguments
 /// * `full_content` - The full content buffer
 /// * `json_start` - Position where the JSON tool call begins
 fn extract_fixed_content(full_content: &str, json_start: usize) -> String {
    // Find the end of the JSON using proper brace counting with string handling
    let mut brace_depth = 0;
@@ -212,8 +451,10 @@ fn extract_fixed_content(full_content: &str, json_start: usize) -> String {
    format!("{}{}", before, after)
 }
-// Reset function for testing
+/// Resets the global JSON filtering state.
-
+///
 /// Call this between independent filtering sessions to ensure clean state.
 /// This is particularly important in tests and when starting new conversations.
 pub fn reset_fixed_json_tool_state() {
    FIXED_JSON_TOOL_STATE.with(|state| {
        let mut state = state.borrow_mut();
--- a/crates/g3-core/src/fixed_filter_tests.rs
+++ b/crates/g3-core/src/fixed_filter_tests.rs
@@ -1,8 +1,14 @@
 //! Tests for JSON tool call filtering.
 //!
 //! These tests verify that the filter correctly identifies and removes JSON tool calls
 //! from LLM output streams while preserving all other content.
 #[cfg(test)]
 mod fixed_filter_tests {
    use crate::fixed_filter_json::{fixed_filter_json_tool_calls, reset_fixed_json_tool_state};
    use regex::Regex;
    /// Test that regular text without tool calls passes through unchanged.
    #[test]
    fn test_no_tool_call_passthrough() {
        reset_fixed_json_tool_state();
@@ -11,6 +17,7 @@ mod fixed_filter_tests {
        assert_eq!(result, input);
    }
    /// Test detection and removal of a complete tool call in a single chunk.
    #[test]
    fn test_simple_tool_call_detection() {
        reset_fixed_json_tool_state();
@@ -23,6 +30,7 @@ Some text after"#;
        assert_eq!(result, expected);
    }
    /// Test handling of tool calls that arrive across multiple streaming chunks.
    #[test]
    fn test_streaming_chunks() {
        reset_fixed_json_tool_state();
@@ -48,6 +56,7 @@ Some text after"#;
        assert_eq!(final_result, expected);
    }
    /// Test correct handling of nested braces within JSON strings.
    #[test]
    fn test_nested_braces_in_tool_call() {
        reset_fixed_json_tool_state();
@@ -61,6 +70,7 @@ Text after"#;
        assert_eq!(result, expected);
    }
    /// Verify the regex pattern matches the specification with flexible whitespace.
    #[test]
    fn test_regex_pattern_specification() {
        // Test the corrected regex pattern that's more flexible with whitespace
@@ -84,11 +94,6 @@ Text after"#;
            ), // Space after { DOES match with \s*
            (
                r#"line
 abc{"tool":"#,
                true,
            ),
            (
                r#"line
 {"tool123":"#,
                false,
            ), // "tool123" is not exactly "tool"
@@ -109,6 +114,7 @@ abc{"tool":"#,
        }
    }
    /// Test that tool calls must appear at the start of a line (after newline).
    #[test]
    fn test_newline_requirement() {
        reset_fixed_json_tool_state();
@@ -122,13 +128,14 @@ abc{"tool":"#,
        reset_fixed_json_tool_state();
        let result2 = fixed_filter_json_tool_calls(input_without_newline);
-        // Both cases currently trigger suppression due to regex pattern
+        // With the new aggressive filtering, only the newline case should trigger suppression
-        // TODO: Fix regex to only match after actual newlines
+        // The pattern requires { to be at the start of a line (after ^)
        assert_eq!(result1, "Text\n");
-        // This currently fails because our regex matches both cases
+        // Without newline before {, it should pass through unchanged
-        assert_eq!(result2, "Text ");
+        assert_eq!(result2, input_without_newline);
    }
    /// Test handling of escaped quotes within JSON strings.
    #[test]
    fn test_json_with_escaped_quotes() {
        reset_fixed_json_tool_state();
@@ -142,6 +149,7 @@ More text"#;
        assert_eq!(result, expected);
    }
    /// Test graceful handling of incomplete/malformed JSON.
    #[test]
    fn test_edge_case_malformed_json() {
        reset_fixed_json_tool_state();
@@ -157,6 +165,7 @@ More text"#;
        assert_eq!(result, expected);
    }
    /// Test processing multiple independent tool calls sequentially.
    #[test]
    fn test_multiple_tool_calls_sequential() {
        reset_fixed_json_tool_state();
@@ -179,6 +188,7 @@ Final text"#;
        assert_eq!(result2, expected2);
    }
    /// Test tool calls with complex multi-line arguments.
    #[test]
    fn test_tool_call_with_complex_args() {
        reset_fixed_json_tool_state();
@@ -192,6 +202,7 @@ After"#;
        assert_eq!(result, expected);
    }
    /// Test input containing only a tool call with no surrounding text.
    #[test]
    fn test_tool_call_only() {
        reset_fixed_json_tool_state();
@@ -204,6 +215,7 @@ After"#;
        assert_eq!(result, expected);
    }
    /// Test accurate brace counting with deeply nested structures.
    #[test]
    fn test_brace_counting_accuracy() {
        reset_fixed_json_tool_state();
@@ -218,6 +230,7 @@ End"#;
        assert_eq!(result, expected);
    }
    /// Test that braces within strings don't affect brace counting.
    #[test]
    fn test_string_escaping_in_json() {
        reset_fixed_json_tool_state();
@@ -232,6 +245,7 @@ More"#;
        assert_eq!(result, expected);
    }
    /// Verify compliance with the exact specification requirements.
    #[test]
    fn test_specification_compliance() {
        reset_fixed_json_tool_state();
@@ -248,6 +262,7 @@ More"#;
        assert_eq!(result, expected);
    }
    /// Test that non-tool JSON objects are not filtered.
    #[test]
    fn test_no_false_positives() {
        reset_fixed_json_tool_state();
@@ -261,6 +276,7 @@ More text"#;
        assert_eq!(result, input);
    }
    /// Test patterns that look similar to tool calls but aren't exact matches.
    #[test]
    fn test_partial_tool_patterns() {
        reset_fixed_json_tool_state();
@@ -280,6 +296,7 @@ More text"#;
        }
    }
    /// Test streaming with very small chunks (character-by-character).
    #[test]
    fn test_streaming_edge_cases() {
        reset_fixed_json_tool_state();
@@ -296,12 +313,13 @@ More text"#;
        }
        let final_result: String = results.join("");
-        // This test currently fails because the JSON is incomplete across chunks
+        // With the new aggressive filtering, the JSON should be completely filtered out
-        // The function doesn't handle this edge case properly yet
+        // even when it arrives in very small chunks
-        let expected = "Text\n{\"tool\": \nAfter";
+        let expected = "Text\n\nAfter";
        assert_eq!(final_result, expected);
    }
    /// Debug test with detailed logging for streaming behavior.
    #[test]
    fn test_streaming_debug() {
        reset_fixed_json_tool_state();
@@ -329,4 +347,38 @@ More text"#;
        let expected = "Some text before\n\nText after";
        assert_eq!(final_result, expected);
    }
    /// Test handling of truncated JSON followed by complete JSON (the json_err pattern)
    #[test]
    fn test_truncated_then_complete_json() {
        reset_fixed_json_tool_state();
        // Simulate the pattern from json_err trace:
        // 1. Incomplete/truncated JSON appears
        // 2. Then the same complete JSON appears
        let chunks = vec![
            "Some text\n",
            r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli"#,  // Truncated
            r#"{"tool": "str_replace", "args": {"diff":"...","file_path":"./crates/g3-cli/src/lib.rs"}}"#,  // Complete
            "\nMore text",
        ];
        let mut results = Vec::new();
        for (i, chunk) in chunks.iter().enumerate() {
            let result = fixed_filter_json_tool_calls(chunk);
            println!("Chunk {}: {:?} -> {:?}", i, chunk, result);
            results.push(result);
        }
        let final_result: String = results.join("");
        println!("Final result: {:?}", final_result);
        // The truncated JSON should be discarded when the complete one appears
        // Both JSONs should be filtered out, leaving only the text
        let expected = "Some text\n\nMore text";
        assert_eq!(
            final_result, expected,
            "Failed to handle truncated JSON followed by complete JSON"
        );
    }
 }
--- a/crates/g3-core/src/gitignore_prompt_tests.rs
+++ b/crates/g3-core/src/gitignore_prompt_tests.rs
@@ -1,76 +0,0 @@
 #[cfg(test)]
 mod gitignore_prompt_tests {
    use crate::Agent;
    use crate::ui_writer::UiWriter;
    // Mock UI writer for testing
    struct MockUiWriter;
    impl UiWriter for MockUiWriter {
        fn print_agent_prompt(&self) {}
        fn print_agent_response(&self, _text: &str) {}
        fn print(&self, _message: &str) {}
        fn print_inline(&self, _message: &str) {}
        fn print_tool_output_line(&self, _line: &str) {}
        fn print_system_prompt(&self, _text: &str) {}
        fn print_tool_header(&self, _tool_name: &str) {}
        fn print_tool_arg(&self, _key: &str, _value: &str) {}
        fn print_tool_output_header(&self) {}
        fn update_tool_output_line(&self, _line: &str) {}
        fn print_tool_output_summary(&self, _total_lines: usize) {}
        fn print_tool_timing(&self, _duration: &str) {}
        fn print_context_status(&self, _message: &str) {}
        fn print_context_thinning(&self, _message: &str) {}
        fn println(&self, _text: &str) {}
        fn flush(&self) {}
        fn notify_sse_received(&self) {}
        fn wants_full_output(&self) -> bool { false }
    }
    #[test]
    fn test_gitignore_prompt_snippet_with_file() {
        // Create a temporary .gitignore file
        let test_gitignore = "# Test comment\ntarget/\n*.log\n\n# Another comment\nlogs/\n";
        std::fs::write(".gitignore.test", test_gitignore).unwrap();
        // Temporarily rename actual .gitignore if it exists
        let has_real_gitignore = std::path::Path::new(".gitignore").exists();
        if has_real_gitignore {
            std::fs::rename(".gitignore", ".gitignore.backup").unwrap();
        }
        // Rename test file to .gitignore
        std::fs::rename(".gitignore.test", ".gitignore").unwrap();
        let snippet = Agent::<MockUiWriter>::get_gitignore_prompt_snippet();
        // Restore original .gitignore
        std::fs::remove_file(".gitignore").unwrap();
        if has_real_gitignore {
            std::fs::rename(".gitignore.backup", ".gitignore").unwrap();
        }
        assert!(snippet.contains("IMPORTANT"));
        assert!(snippet.contains(".gitignore"));
        assert!(snippet.contains("target/"));
        assert!(snippet.contains("*.log"));
    }
    #[test]
    fn test_gitignore_prompt_snippet_without_file() {
        // Temporarily rename .gitignore if it exists
        let has_gitignore = std::path::Path::new(".gitignore").exists();
        if has_gitignore {
            std::fs::rename(".gitignore", ".gitignore.backup").unwrap();
        }
        let snippet = Agent::<MockUiWriter>::get_gitignore_prompt_snippet();
        // Restore .gitignore
        if has_gitignore {
            std::fs::rename(".gitignore.backup", ".gitignore").unwrap();
        }
        assert_eq!(snippet, "");
    }
 }
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -276,6 +276,7 @@ impl AnthropicProvider {
        let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls
        let mut accumulated_usage: Option<Usage> = None;
        let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
        let mut message_stopped = false; // Track if we've received message_stop
        while let Some(chunk_result) = stream.next().await {
            match chunk_result {
@@ -316,6 +317,12 @@ impl AnthropicProvider {
                            continue;
                        }
                        // If we've already sent the final chunk, skip processing more events
                        if message_stopped {
                            debug!("Skipping event after message_stop: {}", line);
                            continue;
                        }
                        // Parse Server-Sent Events format
                        if let Some(data) = line.strip_prefix("data: ") {
                            if data == "[DONE]" {
@@ -451,6 +458,7 @@ impl AnthropicProvider {
                                        }
                                        "message_stop" => {
                                            debug!("Received message stop event");
                                            message_stopped = true;
                                            let final_chunk = CompletionChunk {
                                                content: String::new(),
                                                finished: true,
@@ -460,7 +468,8 @@ impl AnthropicProvider {
                                            if tx.send(Ok(final_chunk)).await.is_err() {
                                                debug!("Receiver dropped, stopping stream");
                                            }
-                                            return accumulated_usage;
+                                            // Don't return here - let the stream naturally exhaust
                                            // This prevents dropping the sender prematurely
                                        }
                                        "error" => {
                                            if let Some(error) = event.error {
@@ -468,7 +477,7 @@ impl AnthropicProvider {
                                                let _ = tx
                                                    .send(Err(anyhow!("Anthropic API error: {:?}", error)))
                                                    .await;
-                                                return accumulated_usage;
+                                                break; // Break to let stream exhaust naturally
                                            }
                                        }
                                        _ => {
@@ -487,7 +496,10 @@ impl AnthropicProvider {
                Err(e) => {
                    error!("Stream error: {}", e);
                    let _ = tx.send(Err(anyhow!("Stream error: {}", e))).await;
-                    return accumulated_usage;
+                    // Don't return here either - let the stream exhaust naturally
                    // The error has been sent to the receiver, so it will handle it
                    // Breaking here ensures we clean up properly
                    break;
                }
            }
        }
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -298,6 +298,7 @@ impl DatabricksProvider {
        let mut current_tool_calls: std::collections::HashMap<usize, (String, String, String)> =
            std::collections::HashMap::new(); // index -> (id, name, args)
        let mut incomplete_data_line = String::new(); // Buffer for incomplete data: lines
        let mut chunk_count = 0;
        let accumulated_usage: Option<Usage> = None;
        let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
@@ -305,6 +306,8 @@ impl DatabricksProvider {
            match chunk_result {
                Ok(chunk) => {
                    // Debug: Log raw bytes received
                    chunk_count += 1;
                    debug!("Processing chunk #{}", chunk_count);
                    debug!("Raw SSE bytes received: {} bytes", chunk.len());
                    // Append new bytes to our buffer
@@ -589,13 +592,39 @@ impl DatabricksProvider {
                    }
                }
                Err(e) => {
-                    error!("Stream error: {}", e);
+                    error!("Stream error at chunk {}: {}", chunk_count, e);
-                    let _ = tx.send(Err(anyhow!("Stream error: {}", e))).await;
+                    
                    // Check if this is a connection error that might be recoverable
                    let error_msg = e.to_string();
                    if error_msg.contains("unexpected EOF") || error_msg.contains("connection") {
                        warn!("Connection terminated unexpectedly at chunk {}, treating as end of stream", chunk_count);
                        // Don't send error, just break and finalize
                        break;
                    } else {
                        let _ = tx.send(Err(anyhow!("Stream error: {}", e))).await;
                    }
                    return accumulated_usage;
                }
            }
        }
        // Log final state
        debug!("Stream ended after {} chunks", chunk_count);
        debug!("Final state: buffer_len={}, incomplete_data_line_len={}, byte_buffer_len={}",
               buffer.len(), incomplete_data_line.len(), byte_buffer.len());
        debug!("Accumulated tool calls: {}", current_tool_calls.len());
        // If we have any remaining data in buffers, log it for debugging
        if !buffer.is_empty() {
            debug!("Remaining buffer content: {:?}", buffer);
        }
        if !byte_buffer.is_empty() {
            debug!("Remaining byte buffer: {} bytes", byte_buffer.len());
        }
        if !incomplete_data_line.is_empty() {
            debug!("Remaining incomplete data line: {:?}", incomplete_data_line);
        }
        // If we have any incomplete data line at the end, try to process it
        if !incomplete_data_line.is_empty() {
            debug!(
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -88,11 +88,13 @@ pub mod anthropic;
 pub mod databricks;
 pub mod embedded;
 pub mod oauth;
 pub mod ollama;
 pub mod openai;
 pub use anthropic::AnthropicProvider;
 pub use databricks::DatabricksProvider;
 pub use embedded::EmbeddedProvider;
 pub use ollama::OllamaProvider;
 pub use openai::OpenAIProvider;
 /// Provider registry for managing multiple LLM providers
--- a/crates/g3-providers/src/ollama.rs
+++ b/crates/g3-providers/src/ollama.rs
@@ -0,0 +1,751 @@
 //! Ollama LLM provider implementation for the g3-providers crate.
 //!
 //! This module provides an implementation of the `LLMProvider` trait for Ollama,
 //! supporting both completion and streaming modes with native tool calling.
 //!
 //! # Features
 //!
 //! - Support for any Ollama model (llama3.2, mistral, qwen, etc.)
 //! - Both completion and streaming response modes
 //! - Native tool calling support for compatible models
 //! - Configurable base URL (defaults to http://localhost:11434)
 //! - Simple configuration with no authentication required
 //!
 //! # Usage
 //!
 //! ```rust,no_run
 //! use g3_providers::{OllamaProvider, LLMProvider, CompletionRequest, Message, MessageRole};
 //!
 //! #[tokio::main]
 //! async fn main() -> anyhow::Result<()> {
 //!     // Create the provider with default settings (localhost:11434)
 //!     let provider = OllamaProvider::new(
 //!         "llama3.2".to_string(),
 //!         None, // Optional: base_url
 //!         None, // Optional: max tokens
 //!         None, // Optional: temperature
 //!     )?;
 //!
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
 //!             Message {
 //!                 role: MessageRole::User,
 //!                 content: "Hello! How are you?".to_string(),
 //!             },
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
 //!         stream: false,
 //!         tools: None,
 //!     };
 //!
 //!     // Get a completion
 //!     let response = provider.complete(request).await?;
 //!     println!("Response: {}", response.content);
 //!
 //!     Ok(())
 //! }
 //! ```
 use anyhow::{anyhow, Result};
 use bytes::Bytes;
 use futures_util::stream::StreamExt;
 use reqwest::Client;
 use serde::{Deserialize, Serialize};
 use std::time::Duration;
 use tokio::sync::mpsc;
 use tokio_stream::wrappers::ReceiverStream;
 use tracing::{debug, error, info, warn};
 use crate::{
    CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
    MessageRole, Tool, ToolCall, Usage,
 };
 const DEFAULT_BASE_URL: &str = "http://localhost:11434";
 const DEFAULT_TIMEOUT_SECS: u64 = 600;
 pub const OLLAMA_DEFAULT_MODEL: &str = "llama3.2";
 pub const OLLAMA_KNOWN_MODELS: &[&str] = &[
    "llama3.2",
    "llama3.2:1b",
    "llama3.2:3b",
    "llama3.1",
    "llama3.1:8b",
    "llama3.1:70b",
    "mistral",
    "mistral-nemo",
    "mixtral",
    "qwen2.5",
    "qwen2.5:7b",
    "qwen2.5:14b",
    "qwen2.5:32b",
    "qwen2.5-coder",
    "qwen2.5-coder:7b",
    "qwen3-coder",
    "phi3",
    "gemma2",
 ];
 #[derive(Debug, Clone)]
 pub struct OllamaProvider {
    client: Client,
    base_url: String,
    model: String,
    max_tokens: Option<u32>,
    temperature: f32,
 }
 impl OllamaProvider {
    pub fn new(
        model: String,
        base_url: Option<String>,
        max_tokens: Option<u32>,
        temperature: Option<f32>,
    ) -> Result<Self> {
        let client = Client::builder()
            .timeout(Duration::from_secs(DEFAULT_TIMEOUT_SECS))
            .build()
            .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
        let base_url = base_url
            .unwrap_or_else(|| DEFAULT_BASE_URL.to_string())
            .trim_end_matches('/')
            .to_string();
        info!(
            "Initialized Ollama provider with model: {} at {}",
            model, base_url
        );
        Ok(Self {
            client,
            base_url,
            model,
            max_tokens,
            temperature: temperature.unwrap_or(0.7),
        })
    }
    fn convert_tools(&self, tools: &[Tool]) -> Vec<OllamaTool> {
        tools
            .iter()
            .map(|tool| OllamaTool {
                r#type: "function".to_string(),
                function: OllamaFunction {
                    name: tool.name.clone(),
                    description: tool.description.clone(),
                    parameters: tool.input_schema.clone(),
                },
            })
            .collect()
    }
    fn convert_messages(&self, messages: &[Message]) -> Result<Vec<OllamaMessage>> {
        let mut ollama_messages = Vec::new();
        for message in messages {
            let role = match message.role {
                MessageRole::System => "system",
                MessageRole::User => "user",
                MessageRole::Assistant => "assistant",
            };
            ollama_messages.push(OllamaMessage {
                role: role.to_string(),
                content: message.content.clone(),
                tool_calls: None, // Only used in responses
            });
        }
        if ollama_messages.is_empty() {
            return Err(anyhow!("At least one message is required"));
        }
        Ok(ollama_messages)
    }
    fn create_request_body(
        &self,
        messages: &[Message],
        tools: Option<&[Tool]>,
        streaming: bool,
        max_tokens: Option<u32>,
        temperature: f32,
    ) -> Result<OllamaRequest> {
        let ollama_messages = self.convert_messages(messages)?;
        let ollama_tools = tools.map(|t| self.convert_tools(t));
        let mut options = OllamaOptions {
            temperature,
            num_predict: max_tokens,
        };
        // If max_tokens is provided, use it; otherwise use the instance default
        if max_tokens.is_none() {
            options.num_predict = self.max_tokens;
        }
        let request = OllamaRequest {
            model: self.model.clone(),
            messages: ollama_messages,
            tools: ollama_tools,
            stream: streaming,
            options,
        };
        Ok(request)
    }
    async fn parse_streaming_response(
        &self,
        mut stream: impl futures_util::Stream<Item = reqwest::Result<Bytes>> + Unpin,
        tx: mpsc::Sender<Result<CompletionChunk>>,
    ) -> Option<Usage> {
        let mut buffer = String::new();
        let mut accumulated_usage: Option<Usage> = None;
        let mut current_tool_calls: Vec<OllamaToolCall> = Vec::new();
        let mut byte_buffer = Vec::new();
        while let Some(chunk_result) = stream.next().await {
            match chunk_result {
                Ok(chunk) => {
                    // Append new bytes to our buffer
                    byte_buffer.extend_from_slice(&chunk);
                    // Try to convert the entire buffer to UTF-8
                    let chunk_str = match std::str::from_utf8(&byte_buffer) {
                        Ok(s) => {
                            let result = s.to_string();
                            byte_buffer.clear();
                            result
                        }
                        Err(e) => {
                            let valid_up_to = e.valid_up_to();
                            if valid_up_to > 0 {
                                let valid_bytes =
                                    byte_buffer.drain(..valid_up_to).collect::<Vec<_>>();
                                std::str::from_utf8(&valid_bytes).unwrap().to_string()
                            } else {
                                continue;
                            }
                        }
                    };
                    buffer.push_str(&chunk_str);
                    // Process complete lines
                    while let Some(line_end) = buffer.find('\n') {
                        let line = buffer[..line_end].trim().to_string();
                        buffer.drain(..line_end + 1);
                        if line.is_empty() {
                            continue;
                        }
                        // Ollama streaming sends JSON objects per line
                        match serde_json::from_str::<OllamaStreamChunk>(&line) {
                            Ok(chunk) => {
                                // Handle text content
                                if let Some(message) = &chunk.message {
                                    let content = &message.content;
                                    if !content.is_empty() {
                                        debug!("Sending text chunk: '{}'", content);
                                        let chunk = CompletionChunk {
                                            content: content.clone(),
                                            finished: false,
                                            usage: None,
                                            tool_calls: None,
                                        };
                                        if tx.send(Ok(chunk)).await.is_err() {
                                            debug!("Receiver dropped, stopping stream");
                                            return accumulated_usage;
                                        }
                                    }
                                    // Handle tool calls
                                    if let Some(tool_calls) = &message.tool_calls {
                                        current_tool_calls.extend(tool_calls.clone());
                                    }
                                }
                                // Check if stream is done
                                if chunk.done.unwrap_or(false) {
                                    debug!("Stream completed");
                                    // Update usage if available
                                    if let Some(eval_count) = chunk.eval_count {
                                        accumulated_usage = Some(Usage {
                                            prompt_tokens: chunk.prompt_eval_count.unwrap_or(0),
                                            completion_tokens: eval_count,
                                            total_tokens: chunk.prompt_eval_count.unwrap_or(0)
                                                + eval_count,
                                        });
                                    }
                                    // Send final chunk with tool calls if any
                                    let final_tool_calls: Vec<ToolCall> = current_tool_calls
                                        .iter()
                                        .map(|tc| ToolCall {
                                            id: tc.function.name.clone(), // Ollama doesn't provide IDs
                                            tool: tc.function.name.clone(),
                                            args: tc.function.arguments.clone(),
                                        })
                                        .collect();
                                    let final_chunk = CompletionChunk {
                                        content: String::new(),
                                        finished: true,
                                        usage: accumulated_usage.clone(),
                                        tool_calls: if final_tool_calls.is_empty() {
                                            None
                                        } else {
                                            Some(final_tool_calls)
                                        },
                                    };
                                    if tx.send(Ok(final_chunk)).await.is_err() {
                                        debug!("Receiver dropped, stopping stream");
                                    }
                                    return accumulated_usage;
                                }
                            }
                            Err(e) => {
                                debug!("Failed to parse Ollama stream chunk: {} - Line: {}", e, line);
                                // Don't error out, just continue
                            }
                        }
                    }
                }
                Err(e) => {
                    error!("Stream error: {}", e);
                    let error_msg = e.to_string();
                    if error_msg.contains("unexpected EOF") || error_msg.contains("connection") {
                        warn!("Connection terminated unexpectedly, treating as end of stream");
                        break;
                    } else {
                        let _ = tx.send(Err(anyhow!("Stream error: {}", e))).await;
                    }
                    return accumulated_usage;
                }
            }
        }
        // Send final chunk if we haven't already
        let final_tool_calls: Vec<ToolCall> = current_tool_calls
            .iter()
            .map(|tc| ToolCall {
                id: tc.function.name.clone(),
                tool: tc.function.name.clone(),
                args: tc.function.arguments.clone(),
            })
            .collect();
        let final_chunk = CompletionChunk {
            content: String::new(),
            finished: true,
            usage: accumulated_usage.clone(),
            tool_calls: if final_tool_calls.is_empty() {
                None
            } else {
                Some(final_tool_calls)
            },
        };
        let _ = tx.send(Ok(final_chunk)).await;
        accumulated_usage
    }
    /// Fetch available models from the Ollama instance
    pub async fn fetch_available_models(&self) -> Result<Vec<String>> {
        let response = self
            .client
            .get(format!("{}/api/tags", self.base_url))
            .send()
            .await
            .map_err(|e| anyhow!("Failed to fetch Ollama models: {}", e))?;
        if !response.status().is_success() {
            let status = response.status();
            let error_text = response
                .text()
                .await
                .unwrap_or_else(|_| "Unknown error".to_string());
            return Err(anyhow!(
                "Failed to fetch Ollama models: {} - {}",
                status,
                error_text
            ));
        }
        let json: serde_json::Value = response.json().await?;
        let models = json
            .get("models")
            .and_then(|v| v.as_array())
            .ok_or_else(|| anyhow!("Unexpected response format: missing 'models' array"))?;
        let model_names: Vec<String> = models
            .iter()
            .filter_map(|model| model.get("name").and_then(|n| n.as_str()).map(String::from))
            .collect();
        debug!("Found {} models in Ollama", model_names.len());
        Ok(model_names)
    }
 }
 #[async_trait::async_trait]
 impl LLMProvider for OllamaProvider {
    async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse> {
        debug!(
            "Processing Ollama completion request with {} messages",
            request.messages.len()
        );
        let max_tokens = request.max_tokens.or(self.max_tokens);
        let temperature = request.temperature.unwrap_or(self.temperature);
        let request_body = self.create_request_body(
            &request.messages,
            request.tools.as_deref(),
            false,
            max_tokens,
            temperature,
        )?;
        debug!(
            "Sending request to Ollama API: model={}, temperature={}",
            self.model, request_body.options.temperature
        );
        let response = self
            .client
            .post(format!("{}/api/chat", self.base_url))
            .json(&request_body)
            .send()
            .await
            .map_err(|e| anyhow!("Failed to send request to Ollama API: {}", e))?;
        let status = response.status();
        if !status.is_success() {
            let error_text = response
                .text()
                .await
                .unwrap_or_else(|_| "Unknown error".to_string());
            return Err(anyhow!("Ollama API error {}: {}", status, error_text));
        }
        let response_text = response.text().await?;
        debug!("Raw Ollama API response: {}", response_text);
        let ollama_response: OllamaResponse =
            serde_json::from_str(&response_text).map_err(|e| {
                anyhow!(
                    "Failed to parse Ollama response: {} - Response: {}",
                    e,
                    response_text
                )
            })?;
        let content = ollama_response.message.content.clone();
        let usage = Usage {
            prompt_tokens: ollama_response.prompt_eval_count.unwrap_or(0),
            completion_tokens: ollama_response.eval_count.unwrap_or(0),
            total_tokens: ollama_response.prompt_eval_count.unwrap_or(0)
                + ollama_response.eval_count.unwrap_or(0),
        };
        debug!(
            "Ollama completion successful: {} tokens generated",
            usage.completion_tokens
        );
        Ok(CompletionResponse {
            content,
            usage,
            model: self.model.clone(),
        })
    }
    async fn stream(&self, request: CompletionRequest) -> Result<CompletionStream> {
        debug!(
            "Processing Ollama request (non-streaming) with {} messages",
            request.messages.len()
        );
        if let Some(ref tools) = request.tools {
            debug!("Request has {} tools", tools.len());
            for tool in tools.iter().take(5) {
                debug!("  Tool: {}", tool.name);
            }
        }
        let max_tokens = request.max_tokens.or(self.max_tokens);
        let temperature = request.temperature.unwrap_or(self.temperature);
        let request_body = self.create_request_body(
            &request.messages,
            request.tools.as_deref(),
            false, // Use non-streaming mode to avoid streaming bugs
            max_tokens,
            temperature,
        )?;
        debug!(
            "Sending request to Ollama API (stream=false): model={}, temperature={}",
            self.model, request_body.options.temperature
        );
        let response = self
            .client
            .post(format!("{}/api/chat", self.base_url))
            .json(&request_body)
            .send()
            .await
            .map_err(|e| anyhow!("Failed to send request to Ollama API: {}", e))?;
        let status = response.status();
        if !status.is_success() {
            let error_text = response
                .text()
                .await
                .unwrap_or_else(|_| "Unknown error".to_string());
            return Err(anyhow!("Ollama API error {}: {}", status, error_text));
        }
        // For non-streaming, parse the complete JSON response
        let response_text = response.text().await?;
        debug!("Raw Ollama API response: {}", response_text);
        let ollama_response: OllamaResponse =
            serde_json::from_str(&response_text).map_err(|e| {
                anyhow!(
                    "Failed to parse Ollama response: {} - Response: {}",
                    e,
                    response_text
                )
            })?;
        let (tx, rx) = mpsc::channel(100);
        tokio::spawn(async move {
            let content = ollama_response.message.content;
            let usage = Usage {
                prompt_tokens: ollama_response.prompt_eval_count.unwrap_or(0),
                completion_tokens: ollama_response.eval_count.unwrap_or(0),
                total_tokens: ollama_response.prompt_eval_count.unwrap_or(0)
                    + ollama_response.eval_count.unwrap_or(0),
            };
            // Extract tool calls if present
            let tool_calls: Option<Vec<ToolCall>> = ollama_response.message.tool_calls.map(|tcs| {
                tcs.iter()
                    .map(|tc| ToolCall {
                        id: tc.function.name.clone(),
                        tool: tc.function.name.clone(),
                        args: tc.function.arguments.clone(),
                    })
                    .collect()
            });
            // Send content if any
            if !content.is_empty() {
                let _ = tx.send(Ok(CompletionChunk {
                    content,
                    finished: false,
                    usage: None,
                    tool_calls: None,
                })).await;
            }
            // Send final chunk with usage and tool calls
            let _ = tx.send(Ok(CompletionChunk {
                content: String::new(),
                finished: true,
                usage: Some(usage),
                tool_calls,
            })).await;
        });
        Ok(ReceiverStream::new(rx))
    }
    fn name(&self) -> &str {
        "ollama"
    }
    fn model(&self) -> &str {
        &self.model
    }
    fn has_native_tool_calling(&self) -> bool {
        // Most modern Ollama models support tool calling
        // Models like llama3.2, qwen2.5, mistral, etc. have good tool support
        true
    }
 }
 // Ollama API request/response structures
 #[derive(Debug, Serialize)]
 struct OllamaRequest {
    model: String,
    messages: Vec<OllamaMessage>,
    #[serde(skip_serializing_if = "Option::is_none")]
    tools: Option<Vec<OllamaTool>>,
    stream: bool,
    options: OllamaOptions,
 }
 #[derive(Debug, Serialize)]
 struct OllamaOptions {
    temperature: f32,
    #[serde(skip_serializing_if = "Option::is_none")]
    num_predict: Option<u32>, // Ollama's equivalent of max_tokens
 }
 #[derive(Debug, Serialize)]
 struct OllamaTool {
    r#type: String,
    function: OllamaFunction,
 }
 #[derive(Debug, Serialize)]
 struct OllamaFunction {
    name: String,
    description: String,
    parameters: serde_json::Value,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct OllamaMessage {
    role: String,
    content: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    tool_calls: Option<Vec<OllamaToolCall>>,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct OllamaToolCall {
    function: OllamaToolCallFunction,
 }
 #[derive(Debug, Clone, Serialize, Deserialize)]
 struct OllamaToolCallFunction {
    name: String,
    arguments: serde_json::Value,
 }
 #[derive(Debug, Deserialize)]
 struct OllamaResponse {
    message: OllamaMessage,
    #[allow(dead_code)]
    done: bool,
    #[allow(dead_code)]
    total_duration: Option<u64>,
    #[allow(dead_code)]
    load_duration: Option<u64>,
    prompt_eval_count: Option<u32>,
    eval_count: Option<u32>,
 }
 #[derive(Debug, Deserialize)]
 struct OllamaStreamChunk {
    message: Option<OllamaMessage>,
    done: Option<bool>,
    prompt_eval_count: Option<u32>,
    eval_count: Option<u32>,
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_provider_creation() {
        let provider = OllamaProvider::new(
            "llama3.2".to_string(),
            None,
            Some(1000),
            Some(0.7),
        )
        .unwrap();
        assert_eq!(provider.model(), "llama3.2");
        assert_eq!(provider.name(), "ollama");
        assert!(provider.has_native_tool_calling());
    }
    #[test]
    fn test_message_conversion() {
        let provider = OllamaProvider::new(
            "llama3.2".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        let messages = vec![
            Message {
                role: MessageRole::System,
                content: "You are a helpful assistant.".to_string(),
            },
            Message {
                role: MessageRole::User,
                content: "Hello!".to_string(),
            },
        ];
        let ollama_messages = provider.convert_messages(&messages).unwrap();
        assert_eq!(ollama_messages.len(), 2);
        assert_eq!(ollama_messages[0].role, "system");
        assert_eq!(ollama_messages[1].role, "user");
    }
    #[test]
    fn test_tool_conversion() {
        let provider = OllamaProvider::new(
            "llama3.2".to_string(),
            None,
            None,
            None,
        )
        .unwrap();
        let tools = vec![Tool {
            name: "get_weather".to_string(),
            description: "Get the current weather".to_string(),
            input_schema: serde_json::json!({
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state"
                    }
                },
                "required": ["location"]
            }),
        }];
        let ollama_tools = provider.convert_tools(&tools);
        assert_eq!(ollama_tools.len(), 1);
        assert_eq!(ollama_tools[0].r#type, "function");
        assert_eq!(ollama_tools[0].function.name, "get_weather");
    }
    #[test]
    fn test_custom_base_url() {
        let provider = OllamaProvider::new(
            "llama3.2".to_string(),
            Some("http://custom:11434".to_string()),
            None,
            None,
        )
        .unwrap();
        assert_eq!(provider.base_url, "http://custom:11434");
    }
 }
--- a/docs/ACCUMULATIVE_MODE.md
+++ b/docs/ACCUMULATIVE_MODE.md
@@ -0,0 +1,389 @@
 # Accumulative Autonomous Mode
 ## Overview
 Accumulative Autonomous Mode is the **new default interactive mode** for G3. It combines the ease of interactive chat with the power of autonomous implementation, allowing you to build projects iteratively by describing what you want, one requirement at a time.
 ## How It Works
 ### The Flow
 1. **Start G3** in any directory (no arguments needed)
 2. **Describe** what you want to build
 3. **G3 automatically**:
   - Adds your input to accumulated requirements
   - Runs autonomous mode (coach-player feedback loop)
   - Implements your requirements with quality checks
 4. **Continue** adding more requirements or refinements
 5. **Repeat** until your project is complete
 ### Example Session
 ```bash
 $ cd ~/projects/my-new-app
 $ g3
 🪿 G3 AI Coding Agent - Accumulative Mode
      >> describe what you want, I'll build it iteratively
 📁 Workspace: /Users/you/projects/my-new-app
 💡 Each input you provide will be added to requirements
   and I'll automatically work on implementing them.
   Type 'exit' or 'quit' to stop, Ctrl+D to finish
 ============================================================
 📝 What would you like me to build? (describe your requirements)
 ============================================================
 requirement> create a simple web server in Python with Flask that serves a homepage
 📋 Current instructions and requirements (Turn 1):
   create a simple web server in Python with Flask that serves a homepage
 🚀 Starting autonomous implementation...
 🤖 G3 AI Coding Agent - Autonomous Mode
 📁 Using workspace: /Users/you/projects/my-new-app
 📋 Requirements loaded from --requirements flag
 🔄 Starting coach-player feedback loop...
 📂 No existing implementation files detected
 🎯 Starting with player implementation
 === TURN 1/5 - PLAYER MODE ===
 🎯 Starting player implementation...
 📋 Player starting initial implementation (no prior coach feedback)
 [Player creates files, writes code...]
 === TURN 1/5 - COACH MODE ===
 🎓 Starting coach review...
 🎓 Coach review completed
 Coach feedback:
 The Flask server is implemented correctly with a homepage route. 
 The code follows best practices and meets the requirements.
 IMPLEMENTATION_APPROVED
 === SESSION COMPLETED - IMPLEMENTATION APPROVED ===
 ✅ Coach approved the implementation!
 ============================================================
 📊 AUTONOMOUS MODE SESSION REPORT
 ============================================================
 ⏱️  Total Duration: 12.34s
 🔄 Turns Taken: 1/5
 📝 Final Status: ✅ APPROVED
 ...
 ============================================================
 ✅ Autonomous run completed
 ============================================================
 📝 Turn 2 - What's next? (add more requirements or refinements)
 ============================================================
 requirement> add a /api/users endpoint that returns a list of users as JSON
 📋 Current instructions and requirements (Turn 2):
   add a /api/users endpoint that returns a list of users as JSON
 🚀 Starting autonomous implementation...
 [Autonomous mode runs again with BOTH requirements...]
 ============================================================
 📝 Turn 3 - What's next? (add more requirements or refinements)
 ============================================================
 requirement> exit
 👋 Goodbye!
 ```
 ## Key Features
 ### 1. Requirement Accumulation
 Each input you provide is:
 - **Numbered sequentially** (1, 2, 3, ...)
 - **Stored in memory** for the session
 - **Included in all subsequent runs**
 This means the agent always has the full context of what you've asked for.
 ### 2. Automatic Requirements Document
 G3 automatically generates a structured requirements document:
 ```markdown
 # Project Requirements
 ## Current Instructions and Requirements:
 1. create a simple web server in Python with Flask that serves a homepage
 2. add a /api/users endpoint that returns a list of users as JSON
 3. add error handling for 404 and 500 errors
 ## Latest Requirement (Turn 3):
 add error handling for 404 and 500 errors
 ```
 This document is passed to autonomous mode, ensuring the agent:
 - Knows all previous requirements
 - Focuses on the latest addition
 - Maintains consistency across iterations
 ### 3. Full Autonomous Quality
 Each requirement triggers a complete autonomous run with:
 - **Coach-Player Feedback Loop**: Quality assurance built-in
 - **Multiple Turns**: Up to 5 iterations per requirement (configurable with `--max-turns`)
 - **Compilation Checks**: Ensures code actually works
 - **Testing**: Coach can run tests to verify functionality
 ### 4. Error Recovery
 If an autonomous run fails:
 - You're notified of the error
 - You can provide additional requirements to fix issues
 - The session continues (doesn't crash)
 ### 5. Workspace Management
 - Uses **current directory** as workspace
 - All files created in current directory
 - No need to specify workspace path
 - Works with existing projects or empty directories
 ## Command-Line Options
 ### Default (Accumulative Mode)
 ```bash
 g3
 ```
 Starts accumulative autonomous mode in the current directory.
 ### With Options
 ```bash
 # Use a specific workspace
 g3 --workspace ~/projects/my-app
 # Limit autonomous turns per requirement
 g3 --max-turns 3
 # Enable macOS Accessibility tools
 g3 --macax
 # Enable WebDriver browser automation
 g3 --webdriver
 # Use a specific provider/model
 g3 --provider anthropic --model claude-3-5-sonnet-20241022
 # Show prompts and code during execution
 g3 --show-prompt --show-code
 # Disable log files
 g3 --quiet
 ```
 ### Disable Accumulative Mode
 To use the traditional chat mode (without automatic autonomous runs):
 ```bash
 g3 --chat
 # Alternative: legacy flag also works
 g3 --accumulative
 ```
 This gives you the old behavior where you chat with the agent without automatic autonomous runs.
 ## Use Cases
 ### 1. Rapid Prototyping
 ```bash
 requirement> create a REST API for a todo app
 requirement> add SQLite database storage
 requirement> add authentication with JWT
 requirement> add rate limiting
 ```
 ### 2. Iterative Refinement
 ```bash
 requirement> create a data visualization dashboard
 requirement> make the charts interactive
 requirement> add dark mode support
 requirement> optimize for mobile devices
 ```
 ### 3. Bug Fixing
 ```bash
 requirement> fix the login form validation
 requirement> handle edge case when username is empty
 requirement> add better error messages
 ```
 ### 4. Feature Addition
 ```bash
 requirement> add export to CSV functionality
 requirement> add email notifications
 requirement> add admin dashboard
 ```
 ## Tips and Best Practices
 ### 1. Start Simple
 Begin with a basic requirement, let it be implemented, then add complexity:
 ```bash
 ✅ Good:
 requirement> create a basic Flask web server
 requirement> add a homepage with a form
 requirement> add form validation
 ❌ Too Complex:
 requirement> create a full-stack web app with authentication, database, API, and frontend
 ```
 ### 2. Be Specific
 The more specific you are, the better the results:
 ```bash
 ✅ Good:
 requirement> add a /api/users endpoint that returns JSON with id, name, and email fields
 ❌ Vague:
 requirement> add users
 ```
 ### 3. One Thing at a Time
 Focus each requirement on a single feature or fix:
 ```bash
 ✅ Good:
 requirement> add error handling for database connections
 requirement> add logging for all API requests
 ❌ Multiple Things:
 requirement> add error handling and logging and monitoring and alerts
 ```
 ### 4. Review Between Turns
 After each autonomous run completes:
 - Check the generated files
 - Test the functionality
 - Decide what to add or fix next
 ### 5. Use Exit Commands
 When done:
 - Type `exit` or `quit`
 - Press `Ctrl+D` (EOF)
 - Press `Ctrl+C` to cancel current input
 ## Comparison with Other Modes
 | Feature | Accumulative (Default) | Traditional Interactive | Autonomous | Single-Shot |
 |---------|----------------------|------------------------|------------|-------------|
 | **Command** | `g3` | `g3 --accumulative` | `g3 --autonomous` | `g3 "task"` |
 | **Input Style** | Iterative prompts | Chat messages | requirements.md file | Command-line arg |
 | **Auto-Autonomous** | ✅ Yes | ❌ No | ✅ Yes | ❌ No |
 | **Coach-Player Loop** | ✅ Yes | ❌ No | ✅ Yes | ❌ No |
 | **Accumulates Requirements** | ✅ Yes | ❌ No | ❌ No | ❌ No |
 | **Multiple Iterations** | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No |
 | **Best For** | Iterative development | Quick questions | Pre-planned projects | One-off tasks |
 ## Technical Details
 ### Requirements Storage
 - Stored in memory (not persisted to disk)
 - Numbered sequentially starting from 1
 - Formatted as markdown list
 - Passed to autonomous mode as `--requirements` override
 ### History
 - Saved to `~/.g3_accumulative_history`
 - Separate from traditional interactive history
 - Persists across sessions
 - Uses rustyline for readline support
 ### Workspace
 - Defaults to current directory
 - Can be overridden with `--workspace`
 - All files created in workspace
 - Logs saved to `workspace/logs/`
 ### Autonomous Execution
 - Full coach-player feedback loop
 - Configurable max turns (default: 5)
 - Respects all CLI flags (--macax, --webdriver, etc.)
 - Error handling allows continuation
 ## Troubleshooting
 ### "No requirements provided"
 This shouldn't happen in accumulative mode, but if it does:
 - Check that you entered a requirement
 - Ensure the requirement isn't empty
 - Try restarting G3
 ### "Autonomous run failed"
 If an autonomous run fails:
 - Read the error message
 - Provide a new requirement to fix the issue
 - Or type `exit` and investigate manually
 ### "Context window full"
 If you hit token limits:
 - The agent will auto-summarize
 - Or you can start a new session
 - Consider using `--max-turns` to limit iterations
 ### "Coach never approves"
 If the coach keeps rejecting:
 - Check the coach feedback for specific issues
 - Provide more specific requirements
 - Consider increasing `--max-turns`
 ## Future Enhancements
 Planned improvements:
 1. **Persistence**: Save accumulated requirements to disk
 2. **Editing**: Edit or remove previous requirements
 3. **Branching**: Try different approaches
 4. **Templates**: Pre-defined requirement sets
 5. **Review**: Show all accumulated requirements
 6. **Export**: Save to requirements.md
 7. **Undo**: Remove last requirement
 8. **Replay**: Re-run with same requirements
 ## Feedback
 This is a new feature! Please provide feedback:
 - What works well?
 - What's confusing?
 - What features would you like?
 - Any bugs or issues?
 Open an issue on GitHub or contribute improvements!
Author	SHA1	Message	Date
Michael Neale	79b375519b	use proper context for qwen3-coder	2025-11-05 13:55:45 +11:00
Michael Neale	88c3cc23fe	works better without streaming	2025-11-05 12:55:21 +11:00
Michael Neale	4622507f37	gpt context aware	2025-11-05 12:25:02 +11:00
Michael Neale	217df2f2af	ollama support	2025-11-05 12:17:01 +11:00
Dhanji Prasanna	22a0090cdc	fix unexpected EOF on streams	2025-11-04 16:28:41 +11:00
Dhanji Prasanna	631f3c16ca	compact on tool call if > 90%	2025-11-04 14:35:11 +11:00
Dhanji Prasanna	1f9fef5f18	more json filtering	2025-11-03 11:56:16 +11:00
Dhanji Prasanna	57d473c19d	mild json filtering improvement	2025-11-03 11:54:27 +11:00
Jochen	e59ce2f93f	Merge pull request #16 from dhanji/jochen-ast-tool adds ast-grep tool for faster code exploration	2025-11-02 21:04:11 +11:00
Jochen	a1ad94ed75	Added comment & example for native flow detailed examples for using code_search tool for native tool use.	2025-11-02 21:02:43 +11:00
Jochen	982c0bbfb3	amend instructions for tool use	2025-11-01 15:52:08 +11:00
Jochen	ad9ba5e5d8	added ast-grep use g3 tool use of ast-grep command with batching for faster code exploration.	2025-11-01 14:59:55 +11:00
Dhanji Prasanna	f89bbfc89a	fix final_output bug	2025-10-31 14:48:36 +11:00
Dhanji Prasanna	11eb01e04d	add back progress bar to cli	2025-10-31 14:28:35 +11:00
Dhanji Prasanna	bdaacfd051	fix for duplicate messages at end	2025-10-31 13:34:36 +11:00
Dhanji R. Prasanna	92ae776510	Merge pull request #14 from dhanji/micn/always-coach-player always coach player	2025-10-31 12:52:17 +11:00
Michael Neale	c42e0bce54	use --auto flag	2025-10-31 11:42:42 +11:00
Michael Neale	b529d7f814	add ability to use slash commands, and also go back to chat in context of player auto mode	2025-10-29 18:09:13 +11:00
Michael Neale	9752e81489	cleanup	2025-10-29 14:53:10 +11:00
Michael Neale	63c2aff7ba	clearer	2025-10-29 14:47:25 +11:00
Michael Neale	aa4a0267ea	can interrupt now	2025-10-29 13:29:03 +11:00
Michael Neale	6cfa1e225c	can cancell acc mode	2025-10-29 13:13:41 +11:00
Michael Neale	f53cd8e8f3	requirements always	2025-10-29 13:09:15 +11:00
Michael Neale	45bffc40da	coach player always when starting	2025-10-29 13:04:16 +11:00