From bb90cc782615772bc7381f0c7ea12b786317bb07 Mon Sep 17 00:00:00 2001 From: Dhanji Prasanna Date: Tue, 14 Oct 2025 12:44:02 +1100 Subject: [PATCH] some fixes --- DESIGN.md | 66 +++++++++++++++++++-------- crates/g3-cli/src/ui_writer_impl.rs | 1 - crates/g3-providers/src/anthropic.rs | 31 +++++++++---- crates/g3-providers/src/databricks.rs | 48 ++++++++++++------- 4 files changed, 101 insertions(+), 45 deletions(-) diff --git a/DESIGN.md b/DESIGN.md index 05c9230..410d6ab 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -1,4 +1,4 @@ -# G3 General Purpose AI Agent - Design Document +# G3 - AI Coding Agent - Design Document ## Overview @@ -8,7 +8,7 @@ The agent follows a **tool-first philosophy**: instead of just providing advice, ## Core Principles -1. **Tool-First Philosophy**: Solve problems by actively using tools rather than just describing solutions +1. **Tool-First Philosophy**: Solve problems by actively using tools rather than just providing advice 2. **Modular Architecture**: Clear separation of concerns across multiple Rust crates 3. **Provider Flexibility**: Support multiple LLM providers through a unified interface 4. **Modularity**: Clear separation of concerns @@ -23,11 +23,11 @@ G3 is organized as a Rust workspace with the following crates: ``` g3/ -├── src/main.rs # Main entry point +├── src/main.rs # Main entry point (delegates to g3-cli) ├── crates/ -│ ├── g3-cli/ # Command-line interface and TUI -│ ├── g3-core/ # Core agent engine and logic -│ ├── g3-providers/ # LLM provider abstractions +│ ├── g3-cli/ # Command-line interface, TUI, and retro mode +│ ├── g3-core/ # Core agent engine, tools, and streaming logic +│ ├── g3-providers/ # LLM provider abstractions and implementations │ ├── g3-config/ # Configuration management │ └── g3-execution/ # Code execution engine ├── logs/ # Session logs (auto-created) @@ -74,7 +74,7 @@ g3/ - Error handling with automatic retry logic **Key Features:** -- **Context Window Intelligence**: Automatic monitoring with percentage-based tracking (~80% capacity triggers auto-summarization) +- **Context Window Intelligence**: Automatic monitoring with percentage-based tracking (80% capacity triggers auto-summarization) - **Tool System**: Built-in tools for file operations (read, write, edit), shell commands, and structured output - **Streaming Parser**: Real-time parsing of LLM responses with tool call detection and execution - **Session Management**: Automatic session logging with detailed conversation history and token usage @@ -86,6 +86,7 @@ g3/ - `write_file`: Create or overwrite files with content - `str_replace`: Apply unified diffs to files with precise editing - `final_output`: Signal task completion with detailed summaries +- **Project Management**: Workspace handling, requirements.md processing for autonomous mode ### 2. g3-providers: LLM Provider Abstraction @@ -97,7 +98,7 @@ g3/ **Supported Providers:** - **Anthropic**: Claude models via API with native tool calling support -- **Databricks**: Foundation Model APIs with OAuth and token-based authentication +- **Databricks**: Foundation Model APIs with OAuth and token-based authentication (default provider) - **Embedded**: Local models via llama.cpp with GPU acceleration (Metal/CUDA) - **Provider Registry**: Dynamic provider management and hot-swapping @@ -119,7 +120,7 @@ g3/ **Execution Modes:** - **Single-shot**: Execute one task and exit -- **Interactive**: REPL-style conversation with the agent +- **Interactive**: REPL-style conversation with the agent (default mode) - **Autonomous**: Coach-player feedback loop for complex projects - **Retro TUI**: Full-screen terminal interface with real-time updates @@ -139,11 +140,10 @@ g3/ - Multi-language code execution support - Error handling and result formatting -**Supported Languages:** -- **Bash/Shell**: Direct command execution with streaming output -- **Python**: Script execution via temporary files -- **JavaScript**: Node.js-based execution -- **Extensible**: Framework for adding additional language support +**Supported Execution:** +- **Bash/Shell**: Direct command execution with streaming output (primary use case) +- **Python**: Script execution via temporary files (legacy support) +- **JavaScript**: Node.js-based execution (legacy support) **Key Features:** - **Streaming Output**: Real-time command output display @@ -161,7 +161,7 @@ g3/ - CLI argument integration **Configuration Hierarchy:** -1. Default configuration (embedded in code) +1. Default configuration (Databricks provider with OAuth) 2. Configuration files (`~/.config/g3/config.toml`, `./g3.toml`) 3. Environment variables (`G3_*`) 4. CLI arguments (highest priority) @@ -216,7 +216,7 @@ Advanced autonomous operation with coach-player feedback: ## Provider Comparison -| Feature | Anthropic | Databricks | Embedded | +| Feature | Anthropic | Databricks (Default) | Embedded | |---------|-----------|------------|----------| | **Cost** | Pay per token | Pay per token | Free after download | | **Privacy** | Data sent to API | Data sent to API | Completely local | @@ -242,7 +242,7 @@ max_tokens = 8192 temperature = 0.1 ``` -### Enterprise Setup (Databricks) +### Enterprise Setup (Databricks - Default) ```toml [providers] default_provider = "databricks" @@ -314,7 +314,7 @@ g3 --retro --theme dracula # Full-screen terminal interface ``` -## Future Enhancements +## Implementation Details ### Planned Features - **Plugin System**: Custom tool and provider plugins @@ -341,10 +341,38 @@ g3 --retro --theme dracula - **Testing**: Unit tests, integration tests, and property-based testing ### Performance Considerations -- **Async-First**: All I/O operations are asynchronous +- **Async-First**: All I/O operations are asynchronous (Tokio runtime) - **Streaming**: Real-time response processing where possible - **Memory Efficiency**: Careful memory management for large contexts - **Caching**: Strategic caching of expensive operations - **Profiling**: Regular performance profiling and optimization This design document reflects the current state of G3 as a mature, production-ready AI coding agent with sophisticated architecture and comprehensive feature set. + +## Current Implementation Status + +### Fully Implemented +- ✅ **Core Agent Engine**: Complete with streaming, tool execution, and context management +- ✅ **Provider System**: Anthropic, Databricks, and Embedded providers with OAuth support +- ✅ **Tool System**: All 5 core tools (shell, read_file, write_file, str_replace, final_output) +- ✅ **CLI Interface**: Interactive mode, single-shot mode, retro TUI +- ✅ **Autonomous Mode**: Coach-player feedback loop with requirements.md processing +- ✅ **Configuration**: TOML-based config with environment overrides +- ✅ **Error Handling**: Comprehensive retry logic and error classification +- ✅ **Session Logging**: Automatic session tracking and JSON logs +- ✅ **Context Management**: Auto-summarization at 80% capacity + +### Architecture Highlights +- **Workspace**: 5 crates with clear separation of concerns +- **Dependencies**: Modern Rust ecosystem (Tokio, Clap, Serde, etc.) +- **Streaming**: Real-time response processing with tool call detection +- **Cross-Platform**: Works on macOS, Linux, and Windows +- **GPU Support**: Metal acceleration for local models on macOS + +### Key Files +- `src/main.rs`: 6-line entry point delegating to g3-cli +- `crates/g3-core/src/lib.rs`: 2953 lines - main agent implementation +- `crates/g3-cli/src/lib.rs`: 1354 lines - CLI and interaction modes +- `crates/g3-providers/src/lib.rs`: 144 lines - provider trait and registry +- `crates/g3-config/src/lib.rs`: 265 lines - configuration management +- `crates/g3-execution/src/lib.rs`: 284 lines - code execution engine diff --git a/crates/g3-cli/src/ui_writer_impl.rs b/crates/g3-cli/src/ui_writer_impl.rs index dc60958..a041ce8 100644 --- a/crates/g3-cli/src/ui_writer_impl.rs +++ b/crates/g3-cli/src/ui_writer_impl.rs @@ -149,7 +149,6 @@ impl UiWriter for ConsoleUiWriter { } fn print_agent_prompt(&self) { - print!(" "); let _ = io::stdout().flush(); } diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs index b84e92e..ae140f4 100644 --- a/crates/g3-providers/src/anthropic.rs +++ b/crates/g3-providers/src/anthropic.rs @@ -274,22 +274,37 @@ impl AnthropicProvider { let mut current_tool_calls: Vec = Vec::new(); let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls let mut accumulated_usage: Option = None; + let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences while let Some(chunk_result) = stream.next().await { match chunk_result { Ok(chunk) => { - let chunk_str = match std::str::from_utf8(&chunk) { - Ok(s) => s, + // Append new bytes to our buffer + byte_buffer.extend_from_slice(&chunk); + + // Try to convert the entire buffer to UTF-8 + let chunk_str = match std::str::from_utf8(&byte_buffer) { + Ok(s) => { + // Successfully converted entire buffer, clear it and use the string + let result = s.to_string(); + byte_buffer.clear(); + result + } Err(e) => { - error!("Invalid UTF-8 in stream chunk: {}", e); - let _ = tx - .send(Err(anyhow!("Invalid UTF-8 in stream chunk: {}", e))) - .await; - return accumulated_usage; + // Check if this is an incomplete sequence at the end + let valid_up_to = e.valid_up_to(); + if valid_up_to > 0 { + // We have some valid UTF-8, extract it and keep the rest for next iteration + let valid_bytes = byte_buffer.drain(..valid_up_to).collect::>(); + std::str::from_utf8(&valid_bytes).unwrap().to_string() + } else { + // No valid UTF-8 at all, skip this chunk and continue + continue; + } } }; - buffer.push_str(chunk_str); + buffer.push_str(&chunk_str); // Process complete lines while let Some(line_end) = buffer.find('\n') { diff --git a/crates/g3-providers/src/databricks.rs b/crates/g3-providers/src/databricks.rs index 47b361a..68fa413 100644 --- a/crates/g3-providers/src/databricks.rs +++ b/crates/g3-providers/src/databricks.rs @@ -299,6 +299,7 @@ impl DatabricksProvider { std::collections::HashMap::new(); // index -> (id, name, args) let mut incomplete_data_line = String::new(); // Buffer for incomplete data: lines let accumulated_usage: Option = None; + let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences while let Some(chunk_result) = stream.next().await { match chunk_result { @@ -306,29 +307,42 @@ impl DatabricksProvider { // Debug: Log raw bytes received debug!("Raw SSE bytes received: {} bytes", chunk.len()); - let chunk_str = match std::str::from_utf8(&chunk) { + // Append new bytes to our buffer + byte_buffer.extend_from_slice(&chunk); + + // Try to convert the entire buffer to UTF-8 + let chunk_str = match std::str::from_utf8(&byte_buffer) { Ok(s) => { - // Debug: Log raw string content (truncated for large chunks) - if s.len() > 1000 { - debug!( - "Raw SSE string content (first 500 chars): {:?}...", - &s[..500] - ); - } else { - debug!("Raw SSE string content: {:?}", s); - } - s + // Successfully converted entire buffer, clear it and use the string + let result = s.to_string(); + byte_buffer.clear(); + result } Err(e) => { - error!("Invalid UTF-8 in stream chunk: {}", e); - let _ = tx - .send(Err(anyhow!("Invalid UTF-8 in stream chunk: {}", e))) - .await; - return accumulated_usage; + // Check if this is an incomplete sequence at the end + let valid_up_to = e.valid_up_to(); + if valid_up_to > 0 { + // We have some valid UTF-8, extract it and keep the rest for next iteration + let valid_bytes = byte_buffer.drain(..valid_up_to).collect::>(); + std::str::from_utf8(&valid_bytes).unwrap().to_string() + } else { + // No valid UTF-8 at all, skip this chunk and continue + continue; + } } }; - buffer.push_str(chunk_str); + // Debug: Log raw string content (truncated for large chunks) + if chunk_str.len() > 1000 { + debug!( + "Raw SSE string content (first 500 chars): {:?}...", + &chunk_str[..500] + ); + } else { + debug!("Raw SSE string content: {:?}", chunk_str); + } + + buffer.push_str(&chunk_str); // Process complete lines, but handle incomplete data: lines specially while let Some(line_end) = buffer.find('\n') {