From 3128b5d8b9b51fdebfc95951b66d566c850d254f Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Wed, 22 Oct 2025 14:19:00 +1100 Subject: [PATCH] can choose per mode models for auto mode --- CHANGELOG.md | 29 ++ README.md | 323 ++++++++++-------- crates/g3-cli/src/lib.rs | 48 ++- .../g3-config/src/autonomous_config_tests.rs | 131 +++++++ crates/g3-config/src/lib.rs | 94 +++++ 5 files changed, 484 insertions(+), 141 deletions(-) create mode 100644 CHANGELOG.md create mode 100644 crates/g3-config/src/autonomous_config_tests.rs diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..145734d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,29 @@ +# Changelog + +## [Unreleased] + +### Added + +**Interactive Requirements Mode** +- **Interactive Requirements Entry**: New `--interactive-requirements` flag for autonomous mode + - Prompts user to enter requirements via stdin (multi-line support) + - Automatically saves requirements to `requirements.md` in workspace + - Shows preview of entered requirements + - Seamlessly transitions to autonomous mode + +**Autonomous Mode Configuration** +- **Autonomous Mode Configuration**: Added ability to specify different models for coach and player agents in autonomous mode + - New `[autonomous]` configuration section in `g3.toml` + - `coach_provider` and `coach_model` options for coach agent + - `player_provider` and `player_model` options for player agent + - `Config::for_coach()` and `Config::for_player()` methods to generate role-specific configurations + - Comprehensive test suite for autonomous configuration + +### Changed +- Autonomous mode now uses `config.for_player()` for the player agent +- Coach agent creation now uses `config.for_coach()` for the coach agent + +### Benefits +- **Cost Optimization**: Use cheaper models for execution, expensive models for review +- **Speed Optimization**: Use faster models for iteration, thorough models for validation +- **Specialization**: Leverage different providers' strengths for different roles diff --git a/README.md b/README.md index a205213..5428098 100644 --- a/README.md +++ b/README.md @@ -2,122 +2,14 @@ G3 is a coding AI agent designed to help you complete tasks by writing code and executing commands. Built in Rust, it provides a flexible architecture for interacting with various Large Language Model (LLM) providers while offering powerful code generation and task automation capabilities. -## Architecture Overview - -G3 follows a modular architecture organized as a Rust workspace with multiple crates, each responsible for specific functionality: - -### Core Components - -#### **g3-core** -The heart of the agent system, containing: -- **Agent Engine**: Main orchestration logic for handling conversations, tool execution, and task management -- **Context Window Management**: Intelligent tracking of token usage with context thinning (50-80%) and auto-summarization at 80% capacity -- **Tool System**: Built-in tools for file operations, shell commands, computer control, TODO management, and structured output -- **Streaming Response Parser**: Real-time parsing of LLM responses with tool call detection and execution -- **Task Execution**: Support for single and iterative task execution with automatic retry logic - -#### **g3-providers** -Abstraction layer for LLM providers: -- **Provider Interface**: Common trait-based API for different LLM backends -- **Multiple Provider Support**: - - Anthropic (Claude models) - - Databricks (DBRX and other models) - - Local/embedded models via llama.cpp with Metal acceleration on macOS -- **OAuth Authentication**: Built-in OAuth flow support for secure provider authentication -- **Provider Registry**: Dynamic provider management and selection - -#### **g3-config** -Configuration management system: -- Environment-based configuration -- Provider credentials and settings -- Model selection and parameters -- Runtime configuration options - -#### **g3-execution** -Task execution framework: -- Task planning and decomposition -- Execution strategies (sequential, parallel) -- Error handling and retry mechanisms -- Progress tracking and reporting - -#### **g3-computer-control** -Computer control capabilities: -- Mouse and keyboard automation -- UI element inspection and interaction -- Screenshot capture and window management -- OCR text extraction via Tesseract - -#### **g3-cli** -Command-line interface: -- Interactive terminal interface -- Task submission and monitoring -- Configuration management commands -- Session management - -### Error Handling & Resilience - -G3 includes robust error handling with automatic retry logic: -- **Recoverable Error Detection**: Automatically identifies recoverable errors (rate limits, network issues, server errors, timeouts) -- **Exponential Backoff with Jitter**: Implements intelligent retry delays to avoid overwhelming services -- **Detailed Error Logging**: Captures comprehensive error context including stack traces, request/response data, and session information -- **Error Persistence**: Saves detailed error logs to `logs/errors/` for post-mortem analysis -- **Graceful Degradation**: Non-recoverable errors are logged with full context before terminating - ## Key Features -### Intelligent Context Management -- Automatic context window monitoring with percentage-based tracking -- Smart auto-summarization when approaching token limits -- **Context thinning** at 50%, 60%, 70%, 80% thresholds - automatically replaces large tool results with file references -- Conversation history preservation through summaries -- Dynamic token allocation for different providers (4k to 200k+ tokens) - -### Tool Ecosystem -- **File Operations**: Read, write, and edit files with line-range precision -- **Shell Integration**: Execute system commands with output capture -- **Code Generation**: Structured code generation with syntax awareness -- **TODO Management**: Read and write TODO lists with markdown checkbox format -- **Computer Control** (Experimental): Automate desktop applications - - Mouse and keyboard control - - UI element inspection - - Screenshot capture and window management - - OCR text extraction from images and screen regions - - Window listing and identification -- **Final Output**: Formatted result presentation - -### Provider Flexibility -- Support for multiple LLM providers through a unified interface -- Hot-swappable providers without code changes -- Provider-specific optimizations and feature support -- Local model support for offline operation - -### Task Automation -- Single-shot task execution for quick operations -- Iterative task mode for complex, multi-step workflows -- Automatic error recovery and retry logic -- Progress tracking and intermediate result handling - -## Language & Technology Stack - -- **Language**: Rust (2021 edition) -- **Async Runtime**: Tokio for concurrent operations -- **HTTP Client**: Reqwest for API communications -- **Serialization**: Serde for JSON handling -- **CLI Framework**: Clap for command-line parsing -- **Logging**: Tracing for structured logging -- **Local Models**: llama.cpp with Metal acceleration support - -## Use Cases - -G3 is designed for: -- Automated code generation and refactoring -- File manipulation and project scaffolding -- System administration tasks -- Data processing and transformation -- API integration and testing -- Documentation generation -- Complex multi-step workflows -- Desktop application automation and testing +- **Multiple LLM Providers**: Anthropic (Claude), Databricks, OpenAI, and local models via llama.cpp +- **Autonomous Mode**: Coach-player feedback loop for complex tasks +- **Intelligent Context Management**: Auto-summarization and context thinning at 50-80% thresholds +- **Rich Tool Ecosystem**: File operations, shell commands, computer control, browser automation +- **Streaming Responses**: Real-time output with tool call detection +- **Error Recovery**: Automatic retry logic with exponential backoff ## Getting Started @@ -125,56 +17,211 @@ G3 is designed for: # Build the project cargo build --release -# Run G3 -cargo run - -# Execute a task +# Execute a single task g3 "implement a function to calculate fibonacci numbers" + +# Start autonomous mode with interactive requirements +g3 --autonomous --interactive-requirements ``` +## Configuration + +Create `~/.config/g3/config.toml`: + +```toml +[providers] +default_provider = "databricks" + +[providers.anthropic] +api_key = "sk-ant-..." +model = "claude-3-5-sonnet-20241022" +max_tokens = 4096 + +[providers.databricks] +host = "https://your-workspace.cloud.databricks.com" +model = "databricks-meta-llama-3-1-70b-instruct" +max_tokens = 4096 +use_oauth = true + +[agent] +max_context_length = 8192 +enable_streaming = true + +# Optional: Use different models for coach and player in autonomous mode +[autonomous] +coach_provider = "anthropic" +coach_model = "claude-3-5-sonnet-20241022" # Thorough review +player_provider = "databricks" +player_model = "databricks-meta-llama-3-1-70b-instruct" # Fast execution +``` + +## Autonomous Mode (Coach-Player Loop) + +G3 features an autonomous mode where two agents collaborate: +- **Player Agent**: Executes tasks and implements solutions +- **Coach Agent**: Reviews work and provides feedback + +### Option 1: Interactive Requirements (Recommended) + +```bash +g3 --autonomous --interactive-requirements +``` + +Enter your requirements (multi-line), then press **Ctrl+D** (Unix/Mac) or **Ctrl+Z** (Windows) to start. + +### Option 2: Direct Requirements + +```bash +g3 --autonomous --requirements "Build a REST API with CRUD operations for user management" +``` + +### Option 3: Requirements File + +Create `requirements.md` in your workspace: + +```markdown +# Project Requirements + +1. Create a REST API with user endpoints +2. Use SQLite for storage +3. Include input validation +4. Write unit tests +``` + +Then run: + +```bash +g3 --autonomous +``` + +### Why Different Models for Coach and Player? + +Configure different models in the `[autonomous]` section to: +- **Optimize Cost**: Use cheaper model for execution, expensive for review +- **Optimize Speed**: Use fast model for iteration, thorough for validation +- **Specialize**: Leverage provider strengths (e.g., Claude for analysis, Llama for code) + +If not configured, both agents use the `default_provider` and its model. + +## Command-Line Options + +```bash +# Autonomous mode +g3 --autonomous --interactive-requirements +g3 --autonomous --requirements "Your requirements" +g3 --autonomous --max-turns 10 + +# Single-shot mode +g3 "your task here" + +# Options +--workspace # Set workspace directory +--provider # Override provider (anthropic, databricks, openai) +--model # Override model +--quiet # Disable log files +--webdriver # Enable browser automation +--show-prompt # Show system prompt +--show-code # Show generated code +``` + +## Architecture Overview + +G3 is organized as a Rust workspace with multiple crates: + +- **g3-core**: Agent engine, context management, tool system, streaming parser +- **g3-providers**: LLM provider abstraction (Anthropic, Databricks, OpenAI, local models) +- **g3-config**: Configuration management +- **g3-execution**: Task execution framework +- **g3-computer-control**: Mouse/keyboard automation, OCR, screenshots +- **g3-cli**: Command-line interface + +### Key Capabilities + +**Intelligent Context Management** +- Automatic context window monitoring with percentage-based tracking +- Smart auto-summarization when approaching token limits +- Context thinning at 50%, 60%, 70%, 80% thresholds +- Dynamic token allocation (4k to 200k+ tokens) + +**Tool Ecosystem** +- File operations (read, write, edit with line-range precision) +- Shell command execution +- TODO management +- Computer control (experimental): mouse, keyboard, OCR, screenshots +- Browser automation via WebDriver (Safari) + +**Error Handling** +- Automatic retry logic with exponential backoff +- Recoverable error detection (rate limits, network issues, timeouts) +- Detailed error logging to `logs/errors/` + ## WebDriver Browser Automation -G3 includes WebDriver support for browser automation tasks using Safari. - -**One-Time Setup** (macOS only): - -Safari Remote Automation must be enabled before using WebDriver tools. Run this once: +**One-Time Setup** (macOS): ```bash -# Option 1: Use the provided script -./scripts/enable-safari-automation.sh - -# Option 2: Enable manually +# Enable Safari Remote Automation safaridriver --enable # Requires password -# Option 3: Enable via Safari UI +# Or via Safari UI: # Safari → Preferences → Advanced → Show Develop menu # Then: Develop → Allow Remote Automation ``` -**For detailed setup instructions and troubleshooting**, see [WebDriver Setup Guide](docs/webdriver-setup.md). +**Usage**: -**Usage**: Run G3 with the `--webdriver` flag to enable browser automation tools. +```bash +g3 --webdriver "scrape the top stories from Hacker News" +``` + +See [docs/webdriver-setup.md](docs/webdriver-setup.md) for detailed setup. ## Computer Control (Experimental) -G3 can interact with your computer's GUI for automation tasks: +Enable in config: + +```toml +[computer_control] +enabled = true +require_confirmation = true +``` + +Grant accessibility permissions: +- **macOS**: System Preferences → Security & Privacy → Accessibility +- **Linux**: Ensure X11 or Wayland access +- **Windows**: Run as administrator (first time) **Available Tools**: `mouse_click`, `type_text`, `find_element`, `take_screenshot`, `extract_text`, `find_text_on_screen`, `list_windows` -**Setup**: Enable in config with `computer_control.enabled = true` and grant OS accessibility permissions: -- **macOS**: System Preferences → Security & Privacy → Accessibility -- **Linux**: Ensure X11 or Wayland access -- **Windows**: Run as administrator (first time only) +## Use Cases + +- Automated code generation and refactoring +- File manipulation and project scaffolding +- System administration tasks +- Data processing and transformation +- API integration and testing +- Documentation generation +- Complex multi-step workflows +- Desktop application automation ## Session Logs -G3 automatically saves session logs for each interaction in the `logs/` directory. These logs contain: +G3 automatically saves session logs to `logs/` directory: - Complete conversation history - Token usage statistics - Timestamps and session status -The `logs/` directory is created automatically on first use and is excluded from version control. +Disable with `--quiet` flag. + +## Technology Stack + +- **Language**: Rust (2021 edition) +- **Async Runtime**: Tokio +- **HTTP Client**: Reqwest +- **Serialization**: Serde +- **CLI Framework**: Clap +- **Logging**: Tracing +- **Local Models**: llama.cpp with Metal acceleration ## License @@ -182,4 +229,4 @@ MIT License - see LICENSE file for details ## Contributing -G3 is an open-source project. Contributions are welcome! Please see CONTRIBUTING.md for guidelines. +Contributions welcome! Please see CONTRIBUTING.md for guidelines. diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 29d2f2a..5748478 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -302,6 +302,10 @@ pub struct Cli { #[arg(long, value_name = "TEXT")] pub requirements: Option, + /// Interactive mode: prompt for requirements and save to requirements.md before starting autonomous mode + #[arg(long)] + pub interactive_requirements: bool, + /// Use retro terminal UI (inspired by 80s sci-fi) #[arg(long)] pub retro: bool, @@ -393,6 +397,42 @@ pub async fn run() -> Result<()> { // Create project model let project = if cli.autonomous { + // Handle interactive requirements mode + if cli.interactive_requirements { + println!("\n📝 Interactive Requirements Mode"); + println!("================================\n"); + println!("Please enter your project requirements."); + println!("You can enter multiple lines. Press Ctrl+D (Unix) or Ctrl+Z (Windows) when done.\n"); + + use std::io::{self, Read}; + let mut requirements_input = String::new(); + io::stdin().read_to_string(&mut requirements_input)?; + + if requirements_input.trim().is_empty() { + anyhow::bail!("No requirements provided. Exiting."); + } + + // Save to requirements.md in workspace + let requirements_path = workspace_dir.join("requirements.md"); + std::fs::write(&requirements_path, &requirements_input)?; + + println!("\n✅ Requirements saved to: {}", requirements_path.display()); + println!("📏 Length: {} characters\n", requirements_input.len()); + + // Show a preview + let preview_lines: Vec<&str> = requirements_input.lines().take(5).collect(); + println!("Preview (first 5 lines):"); + println!("---"); + for line in preview_lines { + println!("{}", line); + } + if requirements_input.lines().count() > 5 { + println!("... ({} more lines)", requirements_input.lines().count() - 5); + } + println!("---\n"); + println!("🚀 Starting autonomous mode...\n"); + } + if let Some(requirements_text) = cli.requirements { // Use requirements text override Project::new_autonomous_with_requirements(workspace_dir.clone(), requirements_text)? @@ -451,7 +491,8 @@ pub async fn run() -> Result<()> { let mut agent = if cli.autonomous { Agent::new_autonomous_with_readme_and_quiet( - config.clone(), + // Use player-specific config in autonomous mode + config.for_player()?, ui_writer, combined_content.clone(), cli.quiet, @@ -1522,14 +1563,15 @@ async fn run_autonomous( // Create a new agent instance for coach mode to ensure fresh context // Use the same config with overrides that was passed to the player agent - let config = agent.get_config().clone(); + let base_config = agent.get_config().clone(); + let coach_config = base_config.for_coach()?; // Reset filter suppression state before creating coach agent g3_core::fixed_filter_json::reset_fixed_json_tool_state(); let ui_writer = ConsoleUiWriter::new(); let mut coach_agent = - Agent::new_autonomous_with_readme_and_quiet(config, ui_writer, None, quiet).await?; + Agent::new_autonomous_with_readme_and_quiet(coach_config, ui_writer, None, quiet).await?; // Ensure coach agent is also in the workspace directory project.enter_workspace()?; diff --git a/crates/g3-config/src/autonomous_config_tests.rs b/crates/g3-config/src/autonomous_config_tests.rs new file mode 100644 index 0000000..0cbcb4f --- /dev/null +++ b/crates/g3-config/src/autonomous_config_tests.rs @@ -0,0 +1,131 @@ +#[cfg(test)] +mod autonomous_config_tests { + use crate::{Config, AnthropicConfig, DatabricksConfig}; + + #[test] + fn test_default_autonomous_config() { + let config = Config::default(); + assert!(config.autonomous.coach_provider.is_none()); + assert!(config.autonomous.coach_model.is_none()); + assert!(config.autonomous.player_provider.is_none()); + assert!(config.autonomous.player_model.is_none()); + } + + #[test] + fn test_for_coach_with_overrides() { + let mut config = Config::default(); + + // Set up base config with anthropic + config.providers.anthropic = Some(AnthropicConfig { + api_key: "test-key".to_string(), + model: "claude-3-5-sonnet-20241022".to_string(), + max_tokens: Some(4096), + temperature: Some(0.1), + }); + + // Set coach overrides + config.autonomous.coach_provider = Some("anthropic".to_string()); + config.autonomous.coach_model = Some("claude-3-opus-20240229".to_string()); + + let coach_config = config.for_coach().unwrap(); + + // Verify coach uses overridden provider and model + assert_eq!(coach_config.providers.default_provider, "anthropic"); + assert_eq!( + coach_config.providers.anthropic.as_ref().unwrap().model, + "claude-3-opus-20240229" + ); + } + + #[test] + fn test_for_player_with_overrides() { + let mut config = Config::default(); + + // Set up base config with databricks + config.providers.databricks = Some(DatabricksConfig { + host: "https://test.databricks.com".to_string(), + token: Some("test-token".to_string()), + model: "databricks-meta-llama-3-1-70b-instruct".to_string(), + max_tokens: Some(4096), + temperature: Some(0.1), + use_oauth: Some(false), + }); + + // Set player overrides + config.autonomous.player_provider = Some("databricks".to_string()); + config.autonomous.player_model = Some("databricks-dbrx-instruct".to_string()); + + let player_config = config.for_player().unwrap(); + + // Verify player uses overridden provider and model + assert_eq!(player_config.providers.default_provider, "databricks"); + assert_eq!( + player_config.providers.databricks.as_ref().unwrap().model, + "databricks-dbrx-instruct" + ); + } + + #[test] + fn test_no_overrides_uses_defaults() { + let mut config = Config::default(); + config.providers.default_provider = "databricks".to_string(); + + let coach_config = config.for_coach().unwrap(); + let player_config = config.for_player().unwrap(); + + // Both should use the default provider when no overrides + assert_eq!(coach_config.providers.default_provider, "databricks"); + assert_eq!(player_config.providers.default_provider, "databricks"); + } + + #[test] + fn test_provider_override_only() { + let mut config = Config::default(); + + config.providers.anthropic = Some(AnthropicConfig { + api_key: "test-key".to_string(), + model: "claude-3-5-sonnet-20241022".to_string(), + max_tokens: Some(4096), + temperature: Some(0.1), + }); + + // Only override provider, not model + config.autonomous.coach_provider = Some("anthropic".to_string()); + + let coach_config = config.for_coach().unwrap(); + + // Should use overridden provider with its default model + assert_eq!(coach_config.providers.default_provider, "anthropic"); + assert_eq!( + coach_config.providers.anthropic.as_ref().unwrap().model, + "claude-3-5-sonnet-20241022" + ); + } + + #[test] + fn test_model_override_only() { + let mut config = Config::default(); + config.providers.default_provider = "databricks".to_string(); + + config.providers.databricks = Some(DatabricksConfig { + host: "https://test.databricks.com".to_string(), + token: Some("test-token".to_string()), + model: "databricks-meta-llama-3-1-70b-instruct".to_string(), + max_tokens: Some(4096), + temperature: Some(0.1), + use_oauth: Some(false), + }); + + // Only override model, not provider + config.autonomous.player_model = Some("databricks-dbrx-instruct".to_string()); + + let player_config = config.for_player().unwrap(); + + // Should use default provider with overridden model + assert_eq!(player_config.providers.default_provider, "databricks"); + assert_eq!( + player_config.providers.databricks.as_ref().unwrap().model, + "databricks-dbrx-instruct" + ); + } +} diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs index 99a0b87..6eec998 100644 --- a/crates/g3-config/src/lib.rs +++ b/crates/g3-config/src/lib.rs @@ -2,12 +2,16 @@ use serde::{Deserialize, Serialize}; use anyhow::Result; use std::path::Path; +#[cfg(test)] +mod autonomous_config_tests; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Config { pub providers: ProvidersConfig, pub agent: AgentConfig, pub computer_control: ComputerControlConfig, pub webdriver: WebDriverConfig, + pub autonomous: AutonomousConfig, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -86,6 +90,20 @@ impl Default for WebDriverConfig { } } +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct AutonomousConfig { + pub coach_provider: Option, + pub coach_model: Option, + pub player_provider: Option, + pub player_model: Option, +} + +impl Default for AutonomousConfig { + fn default() -> Self { + Self { coach_provider: None, coach_model: None, player_provider: None, player_model: None } + } +} + impl Default for ComputerControlConfig { fn default() -> Self { Self { @@ -120,6 +138,7 @@ impl Default for Config { }, computer_control: ComputerControlConfig::default(), webdriver: WebDriverConfig::default(), + autonomous: AutonomousConfig::default(), } } } @@ -232,6 +251,7 @@ impl Config { }, computer_control: ComputerControlConfig::default(), webdriver: WebDriverConfig::default(), + autonomous: AutonomousConfig::default(), } } @@ -300,4 +320,78 @@ impl Config { Ok(config) } + + /// Create a config for the coach agent in autonomous mode + pub fn for_coach(&self) -> Result { + let mut config = self.clone(); + + // Apply coach-specific overrides if configured + if let Some(ref coach_provider) = self.autonomous.coach_provider { + config.providers.default_provider = coach_provider.clone(); + } + + if let Some(ref coach_model) = self.autonomous.coach_model { + // Apply model override to the coach's provider + match config.providers.default_provider.as_str() { + "anthropic" => { + if let Some(ref mut anthropic) = config.providers.anthropic { + anthropic.model = coach_model.clone(); + } else { + return Err(anyhow::anyhow!( + "Coach provider 'anthropic' is not configured. Please add anthropic configuration to your config file." + )); + } + } + "databricks" => { + if let Some(ref mut databricks) = config.providers.databricks { + databricks.model = coach_model.clone(); + } else { + return Err(anyhow::anyhow!( + "Coach provider 'databricks' is not configured. Please add databricks configuration to your config file." + )); + } + } + _ => {} + } + } + + Ok(config) + } + + /// Create a config for the player agent in autonomous mode + pub fn for_player(&self) -> Result { + let mut config = self.clone(); + + // Apply player-specific overrides if configured + if let Some(ref player_provider) = self.autonomous.player_provider { + config.providers.default_provider = player_provider.clone(); + } + + if let Some(ref player_model) = self.autonomous.player_model { + // Apply model override to the player's provider + match config.providers.default_provider.as_str() { + "anthropic" => { + if let Some(ref mut anthropic) = config.providers.anthropic { + anthropic.model = player_model.clone(); + } else { + return Err(anyhow::anyhow!( + "Player provider 'anthropic' is not configured. Please add anthropic configuration to your config file." + )); + } + } + "databricks" => { + if let Some(ref mut databricks) = config.providers.databricks { + databricks.model = player_model.clone(); + } else { + return Err(anyhow::anyhow!( + "Player provider 'databricks' is not configured. Please add databricks configuration to your config file." + )); + } + } + _ => {} + } + } + + Ok(config) + } }