gitignore awareness

suggest rg when available for performance
Merge pull request #12 from dhanji/libvision
2025-10-28 17:55:12 +11:00 · 2025-10-28 15:27:22 +11:00 · 2025-10-28 15:12:51 +11:00 · 2025-10-28 15:07:24 +11:00 · 2025-10-28 14:51:32 +11:00
7 changed files with 808 additions and 696 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -0,0 +1,5 @@
 [target.aarch64-apple-darwin]
 rustflags = ["-C", "link-args=-Wl,-rpath,@executable_path"]
 [target.x86_64-apple-darwin]
 rustflags = ["-C", "link-args=-Wl,-rpath,@executable_path"]
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@ The heart of the agent system, containing:
 - **Context Window Management**: Intelligent tracking of token usage with context thinning (50-80%) and auto-summarization at 80% capacity
 - **Tool System**: Built-in tools for file operations, shell commands, computer control, TODO management, and structured output
 - **Streaming Response Parser**: Real-time parsing of LLM responses with tool call detection and execution
 - **Smart Project Awareness**: Automatically detects and respects `.gitignore` patterns, informing the agent about ignored files
 - **Task Execution**: Support for single and iterative task execution with automatic retry logic
 #### **g3-providers**
@@ -97,7 +98,10 @@ These commands give you fine-grained control over context management, allowing y
 - **Final Output**: Formatted result presentation
 ### Provider Flexibility
- Support for multiple LLM providers through a unified interface
+
 ### Smart Project Awareness
 - Automatically detects and respects `.gitignore` when present
 - Hot-swappable providers without code changes
 - Provider-specific optimizations and feature support
 - Local model support for offline operation
@@ -136,8 +140,12 @@ G3 is designed for:
 # Build the project
 cargo build --release
-# Run G3
+# Run from the build directory
-cargo run
+./target/release/g3
 # Or copy both files to somewhere in your PATH (macOS only needs both files)
 cp target/release/g3 ~/.local/bin/
 cp target/release/libVisionBridge.dylib ~/.local/bin/  # macOS only
 # Execute a task
 g3 "implement a function to calculate fibonacci numbers"
--- a/crates/g3-core/src/gitignore_prompt_tests.rs
+++ b/crates/g3-core/src/gitignore_prompt_tests.rs
@@ -0,0 +1,76 @@
 #[cfg(test)]
 mod gitignore_prompt_tests {
    use crate::Agent;
    use crate::ui_writer::UiWriter;
    // Mock UI writer for testing
    struct MockUiWriter;
    impl UiWriter for MockUiWriter {
        fn print_agent_prompt(&self) {}
        fn print_agent_response(&self, _text: &str) {}
        fn print(&self, _message: &str) {}
        fn print_inline(&self, _message: &str) {}
        fn print_tool_output_line(&self, _line: &str) {}
        fn print_system_prompt(&self, _text: &str) {}
        fn print_tool_header(&self, _tool_name: &str) {}
        fn print_tool_arg(&self, _key: &str, _value: &str) {}
        fn print_tool_output_header(&self) {}
        fn update_tool_output_line(&self, _line: &str) {}
        fn print_tool_output_summary(&self, _total_lines: usize) {}
        fn print_tool_timing(&self, _duration: &str) {}
        fn print_context_status(&self, _message: &str) {}
        fn print_context_thinning(&self, _message: &str) {}
        fn println(&self, _text: &str) {}
        fn flush(&self) {}
        fn notify_sse_received(&self) {}
        fn wants_full_output(&self) -> bool { false }
    }
    #[test]
    fn test_gitignore_prompt_snippet_with_file() {
        // Create a temporary .gitignore file
        let test_gitignore = "# Test comment\ntarget/\n*.log\n\n# Another comment\nlogs/\n";
        std::fs::write(".gitignore.test", test_gitignore).unwrap();
        // Temporarily rename actual .gitignore if it exists
        let has_real_gitignore = std::path::Path::new(".gitignore").exists();
        if has_real_gitignore {
            std::fs::rename(".gitignore", ".gitignore.backup").unwrap();
        }
        // Rename test file to .gitignore
        std::fs::rename(".gitignore.test", ".gitignore").unwrap();
        let snippet = Agent::<MockUiWriter>::get_gitignore_prompt_snippet();
        // Restore original .gitignore
        std::fs::remove_file(".gitignore").unwrap();
        if has_real_gitignore {
            std::fs::rename(".gitignore.backup", ".gitignore").unwrap();
        }
        assert!(snippet.contains("IMPORTANT"));
        assert!(snippet.contains(".gitignore"));
        assert!(snippet.contains("target/"));
        assert!(snippet.contains("*.log"));
    }
    #[test]
    fn test_gitignore_prompt_snippet_without_file() {
        // Temporarily rename .gitignore if it exists
        let has_gitignore = std::path::Path::new(".gitignore").exists();
        if has_gitignore {
            std::fs::rename(".gitignore", ".gitignore.backup").unwrap();
        }
        let snippet = Agent::<MockUiWriter>::get_gitignore_prompt_snippet();
        // Restore .gitignore
        if has_gitignore {
            std::fs::rename(".gitignore.backup", ".gitignore").unwrap();
        }
        assert_eq!(snippet, "");
    }
 }
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -276,7 +276,6 @@ impl AnthropicProvider {
        let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls
        let mut accumulated_usage: Option<Usage> = None;
        let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
        let mut actual_completion_tokens: u32 = 0; // Track actual completion tokens
        while let Some(chunk_result) = stream.next().await {
            match chunk_result {
@@ -324,12 +323,7 @@ impl AnthropicProvider {
                                let final_chunk = CompletionChunk {
                                    content: String::new(),
                                    finished: true,
-                                    usage: accumulated_usage.as_ref().map(|u| Usage {
+                                    usage: accumulated_usage.clone(),
                                        prompt_tokens: u.prompt_tokens,
                                        // Use actual completion tokens if we tracked them, otherwise use the estimate
                                        completion_tokens: if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
                                        total_tokens: u.prompt_tokens + if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
                                    }),
                                    tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
                                };
                                if tx.send(Ok(final_chunk)).await.is_err() {
@@ -343,7 +337,6 @@ impl AnthropicProvider {
                            match serde_json::from_str::<AnthropicStreamEvent>(data) {
                                Ok(event) => {
                                    debug!("Parsed event type: {}, event: {:?}", event.event_type, event);
                                    match event.event_type.as_str() {
                                        "message_start" => {
                                            // Extract usage data from message_start event
@@ -354,10 +347,7 @@ impl AnthropicProvider {
                                                        completion_tokens: usage.output_tokens,
                                                        total_tokens: usage.input_tokens + usage.output_tokens,
                                                    });
-                                                    debug!("Captured initial usage from message_start - prompt: {}, completion: {} (estimated), total: {}",
+                                                    debug!("Captured usage from message_start: {:?}", accumulated_usage);
                                                        usage.input_tokens,
                                                        usage.output_tokens,
                                                        usage.input_tokens + usage.output_tokens);
                                                }
                                            }
                                        }
@@ -406,9 +396,6 @@ impl AnthropicProvider {
                                        "content_block_delta" => {
                                            if let Some(delta) = event.delta {
                                                if let Some(text) = delta.text {
                                                    // Track actual completion tokens (rough estimate: 4 chars per token)
                                                    actual_completion_tokens += (text.len() as f32 / 4.0).ceil() as u32;
                                                    debug!("Sending text chunk of length {}: '{}'", text.len(), text);
                                                    let chunk = CompletionChunk {
                                                        content: text,
@@ -429,19 +416,6 @@ impl AnthropicProvider {
                                                }
                                            }
                                        }
                                        "message_delta" => {
                                            // Check if message_delta contains updated usage data
                                            if let Some(delta) = event.delta {
                                                if let Some(usage) = delta.usage {
                                                    accumulated_usage = Some(Usage {
                                                        prompt_tokens: usage.input_tokens,
                                                        completion_tokens: usage.output_tokens,
                                                        total_tokens: usage.input_tokens + usage.output_tokens,
                                                    });
                                                    debug!("Updated usage from message_delta - prompt: {}, completion: {}, total: {}", usage.input_tokens, usage.output_tokens, usage.input_tokens + usage.output_tokens);
                                                }
                                            }
                                        }
                                        "content_block_stop" => {
                                            // Tool call block is complete - now parse the accumulated JSON
                                            if !current_tool_calls.is_empty() && !partial_tool_json.is_empty() {
@@ -476,44 +450,11 @@ impl AnthropicProvider {
                                            }
                                        }
                                        "message_stop" => {
-                                            debug!("Received message_stop event: {:?}", event);
+                                            debug!("Received message stop event");
                                            // Check if message_stop contains final usage data
                                            if let Some(message) = event.message {
                                                if let Some(usage) = message.usage {
                                                    // Update with final accurate usage data from message_stop
                                                    // This should have the actual completion token count
                                                    accumulated_usage = Some(Usage {
                                                        prompt_tokens: usage.input_tokens,
                                                        // Prefer the actual output_tokens from message_stop if available
                                                        // Otherwise use our tracked count, and as last resort the initial estimate
                                                        completion_tokens: if usage.output_tokens > 0 {
                                                            usage.output_tokens
                                                        } else if actual_completion_tokens > 0 {
                                                            actual_completion_tokens
                                                        } else { usage.output_tokens },
                                                        total_tokens: usage.input_tokens + usage.output_tokens,
                                                    });
                                                    debug!("Updated with final usage from message_stop - prompt: {}, completion: {}, total: {}",
                                                        usage.input_tokens,
                                                        usage.output_tokens,
                                                        usage.input_tokens + usage.output_tokens);
                                                }
                                            }
                                            let final_chunk = CompletionChunk {
                                                content: String::new(),
                                                finished: true,
-                                                usage: accumulated_usage.as_ref().map(|u| Usage {
+                                                usage: accumulated_usage.clone(),
                                                    prompt_tokens: u.prompt_tokens,
                                                    // Use actual completion tokens if we tracked them and they're higher
                                                    completion_tokens: if actual_completion_tokens > u.completion_tokens {
                                                        actual_completion_tokens
                                                    } else {
                                                        u.completion_tokens
                                                    },
                                                    total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
                                                }),
                                                tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
                                            };
                                            if tx.send(Ok(final_chunk)).await.is_err() {
@@ -555,27 +496,10 @@ impl AnthropicProvider {
        let final_chunk = CompletionChunk {
            content: String::new(),
            finished: true,
-            usage: accumulated_usage.as_ref().map(|u| Usage {
+            usage: accumulated_usage.clone(),
                prompt_tokens: u.prompt_tokens,
                completion_tokens: if actual_completion_tokens > u.completion_tokens {
                    actual_completion_tokens
                } else {
                    u.completion_tokens
                },
                total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
            }),
            tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls) },
        };
        let _ = tx.send(Ok(final_chunk)).await;
        // Log final usage for debugging
        if let Some(ref usage) = accumulated_usage {
            info!("Anthropic stream completed with final usage - prompt: {}, completion: {}, total: {}",
                usage.prompt_tokens, usage.completion_tokens, usage.total_tokens);
        } else {
            warn!("Anthropic stream completed without usage data - token accounting will fall back to estimation");
        }
        accumulated_usage
    }
 }
@@ -813,8 +737,6 @@ struct AnthropicStreamMessage {
 struct AnthropicDelta {
    text: Option<String>,
    partial_json: Option<String>,
    #[serde(default)]
    usage: Option<AnthropicUsage>,
 }
 #[derive(Debug, Deserialize)]
--- a/test_token_accounting.py
+++ b/test_token_accounting.py
@@ -1,164 +0,0 @@
 #!/usr/bin/env python3
 """
 Test script to verify token accounting is working correctly with the Anthropic provider.
 This script will send multiple messages and verify that token counts accumulate properly.
 """
 import subprocess
 import json
 import re
 import sys
 import time
 def run_g3_command(prompt, provider="anthropic"):
    """Run a g3 command and capture the output."""
    cmd = [
        "cargo", "run", "--release", "--",
        "--provider", provider,
        prompt
    ]
    env = {
        "RUST_LOG": "g3_providers=debug,g3_core=info",
        "RUST_BACKTRACE": "1"
    }
    result = subprocess.run(
        cmd,
        capture_output=True,
        text=True,
        env={**subprocess.os.environ, **env}
    )
    return result.stdout + result.stderr
 def extract_token_info(output):
    """Extract token usage information from the output."""
    token_info = {}
    # Look for token usage updates
    usage_pattern = r"Updated token usage.*was: (\d+), now: (\d+).*prompt=(\d+), completion=(\d+), total=(\d+)"
    matches = re.findall(usage_pattern, output)
    if matches:
        last_match = matches[-1]
        token_info['was'] = int(last_match[0])
        token_info['now'] = int(last_match[1])
        token_info['prompt'] = int(last_match[2])
        token_info['completion'] = int(last_match[3])
        token_info['total'] = int(last_match[4])
    # Look for context percentage
    context_pattern = r"Context usage at (\d+)%.*\((\d+)/(\d+) tokens\)"
    matches = re.findall(context_pattern, output)
    if matches:
        last_match = matches[-1]
        token_info['percentage'] = int(last_match[0])
        token_info['used'] = int(last_match[1])
        token_info['total_context'] = int(last_match[2])
    # Look for thinning triggers
    thinning_pattern = r"Context thinning triggered.*usage: (\d+)%.*\((\d+)/(\d+) tokens\)"
    matches = re.findall(thinning_pattern, output)
    if matches:
        token_info['thinning_triggered'] = True
        token_info['thinning_percentage'] = int(matches[-1][0])
    # Look for final usage from Anthropic
    final_usage_pattern = r"Anthropic stream completed with final usage.*prompt: (\d+), completion: (\d+), total: (\d+)"
    matches = re.findall(final_usage_pattern, output)
    if matches:
        last_match = matches[-1]
        token_info['final_prompt'] = int(last_match[0])
        token_info['final_completion'] = int(last_match[1])
        token_info['final_total'] = int(last_match[2])
    return token_info
 def main():
    print("Testing Anthropic Provider Token Accounting")
    print("="*50)
    # Build the project first
    print("Building project...")
    subprocess.run(["cargo", "build", "--release"], capture_output=True)
    # Test 1: Simple prompt
    print("\nTest 1: Simple prompt")
    print("-"*30)
    output = run_g3_command("Say 'Hello, World!' and nothing else.")
    tokens = extract_token_info(output)
    if tokens:
        print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
        print(f"  Prompt tokens: {tokens.get('prompt', 'N/A')}")
        print(f"  Completion tokens: {tokens.get('completion', 'N/A')}")
        print(f"  Total from provider: {tokens.get('total', 'N/A')}")
        if 'final_total' in tokens:
            print(f"  Final total from stream: {tokens['final_total']}")
            if tokens.get('now') != tokens['final_total']:
                print(f"  ⚠️  WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
        # Check if the completion tokens are reasonable (should be small for "Hello, World!")
        if tokens.get('completion', 0) > 50:
            print(f"  ⚠️  WARNING: Completion tokens seem high for a simple response: {tokens.get('completion')}")
    else:
        print("  ❌ No token information found in output")
    # Test 2: Longer response
    print("\nTest 2: Longer response")
    print("-"*30)
    output = run_g3_command("Write a 3-paragraph essay about the importance of accurate token counting in LLM applications.")
    tokens = extract_token_info(output)
    if tokens:
        print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
        print(f"  Prompt tokens: {tokens.get('prompt', 'N/A')}")
        print(f"  Completion tokens: {tokens.get('completion', 'N/A')}")
        print(f"  Total from provider: {tokens.get('total', 'N/A')}")
        if 'final_total' in tokens:
            print(f"  Final total from stream: {tokens['final_total']}")
            if tokens.get('now') != tokens['final_total']:
                print(f"  ⚠️  WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
        # Check if completion tokens are reasonable for a longer response
        if tokens.get('completion', 0) < 100:
            print(f"  ⚠️  WARNING: Completion tokens seem low for a 3-paragraph essay: {tokens.get('completion')}")
    else:
        print("  ❌ No token information found in output")
    # Test 3: Check for proper accumulation
    print("\nTest 3: Token accumulation (multiple messages)")
    print("-"*30)
    # First message
    output1 = run_g3_command("Count from 1 to 5.")
    tokens1 = extract_token_info(output1)
    # Second message (this would need to be in a conversation, but for now we test separately)
    output2 = run_g3_command("Now count from 6 to 10.")
    tokens2 = extract_token_info(output2)
    if tokens1 and tokens2:
        print(f"First message: {tokens1.get('now', 'N/A')} tokens")
        print(f"Second message: {tokens2.get('now', 'N/A')} tokens")
        # In a real conversation, tokens2['now'] should be greater than tokens1['now']
        # But since these are separate invocations, we just check they're both reasonable
        if tokens1.get('now', 0) > 0 and tokens2.get('now', 0) > 0:
            print("  ✅ Both messages have token counts")
        else:
            print("  ❌ Missing token counts")
    print("\n" + "="*50)
    print("Test Summary:")
    print("Check the output above for any warnings or errors.")
    print("Key things to verify:")
    print("  1. Token counts are being captured from the provider")
    print("  2. Completion tokens are reasonable for the response length")
    print("  3. No mismatch between tracked and final token counts")
    print("  4. Context thinning triggers at appropriate thresholds")
 if __name__ == "__main__":
    main()
--- a/test_token_accounting.sh
+++ b/test_token_accounting.sh
@@ -1,46 +0,0 @@
 #!/bin/bash
 # Test script to verify token accounting with Anthropic provider
 echo "Testing token accounting with Anthropic provider..."
 echo "This test will send a few messages and check if token counts are properly tracked."
 echo ""
 # Set up environment for testing
 export RUST_LOG=g3_providers=debug,g3_core=info
 export RUST_BACKTRACE=1
 # Build the project first
 echo "Building project..."
 cargo build --release 2>&1 | grep -E "(Compiling|Finished)" || true
 echo ""
 echo "Running test with Anthropic provider..."
 echo "Watch for these log messages:"
 echo "  - 'Captured initial usage from message_start'"
 echo "  - 'Updated usage from message_delta' (if available)"
 echo "  - 'Updated with final usage from message_stop' (if available)"
 echo "  - 'Anthropic stream completed with final usage'"
 echo "  - 'Updated token usage from provider'"
 echo "  - 'Context thinning triggered' (when reaching thresholds)"
 echo ""
 # Create a simple test that will generate some tokens
 cat << 'EOF' > /tmp/test_prompt.txt
 Please write a short paragraph about the importance of accurate token counting in LLM applications. Then list 3 reasons why token accounting might fail.
 EOF
 # Run the test
 echo "Sending test prompt..."
 cargo run --release -- --provider anthropic "$(cat /tmp/test_prompt.txt)" 2>&1 | tee /tmp/token_test.log
 echo ""
 echo "Analyzing results..."
 echo ""
 # Check for token accounting messages
 echo "Token accounting messages found:"
 grep -E "(usage from|token usage|Context thinning|Context usage)" /tmp/token_test.log | head -20
 echo ""
 echo "Test complete. Check /tmp/token_test.log for full output."
Author	SHA1	Message	Date
Michael Neale	6a4be9ddd7	gitignore awareness	2025-10-28 17:55:12 +11:00
Michael Neale	2a44fbb7b2	suggest rg when available for performance	2025-10-28 15:27:22 +11:00
Dhanji R. Prasanna	4bf0f71bbd	Merge pull request #12 from dhanji/libvision will need this for it to work	2025-10-28 15:12:51 +11:00
Michael Neale	c1ce3038d8	will need this for it to work	2025-10-28 15:07:24 +11:00
Dhanji Prasanna	4b1694b308	machine mode	2025-10-28 14:51:32 +11:00