Compare commits
7 Commits
micn/alway
...
micn/fix-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a457d46446 | ||
|
|
7c2c433746 | ||
|
|
98f4220544 | ||
|
|
a4476a555c | ||
|
|
b3d18d02ea | ||
|
|
442ca76cd6 | ||
|
|
738c3ac53e |
@@ -1,5 +0,0 @@
|
||||
[target.aarch64-apple-darwin]
|
||||
rustflags = ["-C", "link-args=-Wl,-rpath,@executable_path"]
|
||||
|
||||
[target.x86_64-apple-darwin]
|
||||
rustflags = ["-C", "link-args=-Wl,-rpath,@executable_path"]
|
||||
12
Cargo.lock
generated
12
Cargo.lock
generated
@@ -990,7 +990,7 @@ dependencies = [
|
||||
"libc",
|
||||
"option-ext",
|
||||
"redox_users 0.5.2",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1062,7 +1062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2333,7 +2333,7 @@ version = "0.50.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2904,7 +2904,7 @@ dependencies = [
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.11.0",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3292,7 +3292,7 @@ dependencies = [
|
||||
"getrandom 0.3.4",
|
||||
"once_cell",
|
||||
"rustix 1.1.2",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3935,7 +3935,7 @@ version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
42
README.md
42
README.md
@@ -132,50 +132,12 @@ G3 is designed for:
|
||||
|
||||
## Getting Started
|
||||
|
||||
### Default Mode: Accumulative Autonomous
|
||||
|
||||
The default interactive mode now uses **accumulative autonomous mode**, which combines the best of interactive and autonomous workflows:
|
||||
|
||||
```bash
|
||||
# Simply run g3 in any directory
|
||||
g3
|
||||
|
||||
# You'll be prompted to describe what you want to build
|
||||
# Each input you provide:
|
||||
# 1. Gets added to accumulated requirements
|
||||
# 2. Automatically triggers autonomous mode (coach-player loop)
|
||||
# 3. Implements your requirements iteratively
|
||||
|
||||
# Example session:
|
||||
requirement> create a simple web server in Python with Flask
|
||||
# ... autonomous mode runs and implements it ...
|
||||
requirement> add a /health endpoint that returns JSON
|
||||
# ... autonomous mode runs again with both requirements ...
|
||||
```
|
||||
|
||||
### Other Modes
|
||||
|
||||
```bash
|
||||
# Single-shot mode (one task, then exit)
|
||||
g3 "implement a function to calculate fibonacci numbers"
|
||||
|
||||
# Traditional autonomous mode (reads requirements.md)
|
||||
g3 --autonomous
|
||||
|
||||
# Traditional chat mode (simple interactive chat without autonomous runs)
|
||||
g3 --chat
|
||||
```
|
||||
|
||||
```bash
|
||||
# Build the project
|
||||
cargo build --release
|
||||
|
||||
# Run from the build directory
|
||||
./target/release/g3
|
||||
|
||||
# Or copy both files to somewhere in your PATH (macOS only needs both files)
|
||||
cp target/release/g3 ~/.local/bin/
|
||||
cp target/release/libVisionBridge.dylib ~/.local/bin/ # macOS only
|
||||
# Run G3
|
||||
cargo run
|
||||
|
||||
# Execute a task
|
||||
g3 "implement a function to calculate fibonacci numbers"
|
||||
|
||||
@@ -174,7 +174,7 @@ mod machine_ui_writer;
|
||||
use machine_ui_writer::MachineUiWriter;
|
||||
use ui_writer_impl::ConsoleUiWriter;
|
||||
|
||||
#[derive(Parser, Clone)]
|
||||
#[derive(Parser)]
|
||||
#[command(name = "g3")]
|
||||
#[command(about = "A modular, composable AI coding agent")]
|
||||
#[command(version)]
|
||||
@@ -214,9 +214,9 @@ pub struct Cli {
|
||||
#[arg(long, value_name = "TEXT")]
|
||||
pub requirements: Option<String>,
|
||||
|
||||
/// Enable accumulative autonomous mode (default is chat mode)
|
||||
/// Interactive mode: prompt for requirements and save to requirements.md before starting autonomous mode
|
||||
#[arg(long)]
|
||||
pub auto: bool,
|
||||
pub interactive_requirements: bool,
|
||||
|
||||
/// Enable machine-friendly output mode with JSON markers and stats
|
||||
#[arg(long)]
|
||||
@@ -309,6 +309,112 @@ pub async fn run() -> Result<()> {
|
||||
|
||||
// Create project model
|
||||
let project = if cli.autonomous {
|
||||
// Handle interactive requirements mode with AI enhancement
|
||||
if cli.interactive_requirements {
|
||||
println!("\n📝 Interactive Requirements Mode");
|
||||
println!("================================\n");
|
||||
println!("Describe what you want to build (can be brief):");
|
||||
println!("Press Ctrl+D (Unix) or Ctrl+Z (Windows) when done.\n");
|
||||
|
||||
use std::io::{self, Read, Write};
|
||||
let mut requirements_input = String::new();
|
||||
io::stdin().read_to_string(&mut requirements_input)?;
|
||||
|
||||
if requirements_input.trim().is_empty() {
|
||||
anyhow::bail!("No requirements provided. Exiting.");
|
||||
}
|
||||
|
||||
println!("\n🤖 Enhancing your requirements with AI...\n");
|
||||
|
||||
// Create a temporary agent to enhance the requirements
|
||||
let temp_config = Config::load_with_overrides(
|
||||
cli.config.as_deref(),
|
||||
cli.provider.clone(),
|
||||
cli.model.clone(),
|
||||
)?;
|
||||
|
||||
let ui_writer = ConsoleUiWriter::new();
|
||||
let mut temp_agent = Agent::new_with_readme_and_quiet(
|
||||
temp_config,
|
||||
ui_writer,
|
||||
None,
|
||||
true, // quiet mode
|
||||
).await?;
|
||||
|
||||
// Craft the enhancement prompt
|
||||
let enhancement_prompt = format!(
|
||||
r#"You are a requirements analyst. Take this brief user input and expand it into a structured requirements document.
|
||||
|
||||
USER INPUT:
|
||||
{}
|
||||
|
||||
Create a professional requirements document with:
|
||||
1. A clear project title (# heading)
|
||||
2. An overview section explaining what will be built
|
||||
3. Organized requirements (functional, technical, quality)
|
||||
4. Acceptance criteria
|
||||
5. Any technical constraints or preferences mentioned
|
||||
|
||||
Format as proper markdown. Be specific and actionable. If the user's input is vague, make reasonable assumptions but keep it focused on what they described.
|
||||
|
||||
Output ONLY the markdown content, no explanations or meta-commentary."#,
|
||||
requirements_input.trim()
|
||||
);
|
||||
|
||||
// Execute enhancement task
|
||||
let result = temp_agent
|
||||
.execute_task_with_timing(&enhancement_prompt, None, false, false, false, false)
|
||||
.await?;
|
||||
|
||||
let enhanced_requirements = result.response.trim().to_string();
|
||||
|
||||
// Show the enhanced requirements
|
||||
println!("\n📋 Enhanced Requirements Document:");
|
||||
println!("{}\n", "=".repeat(60));
|
||||
println!("{}", enhanced_requirements);
|
||||
println!("{}\n", "=".repeat(60));
|
||||
|
||||
// Ask for confirmation
|
||||
println!("\n❓ Is this requirements document acceptable?");
|
||||
println!(" [y] Yes, proceed with autonomous mode");
|
||||
println!(" [e] Edit and save manually");
|
||||
println!(" [n] No, cancel\n");
|
||||
|
||||
print!("Your choice (y/e/n): ");
|
||||
io::stdout().flush()?;
|
||||
|
||||
let mut choice = String::new();
|
||||
io::stdin().read_line(&mut choice)?;
|
||||
let choice = choice.trim().to_lowercase();
|
||||
|
||||
let requirements_path = workspace_dir.join("requirements.md");
|
||||
|
||||
match choice.as_str() {
|
||||
"y" | "yes" => {
|
||||
// Save enhanced requirements
|
||||
std::fs::write(&requirements_path, &enhanced_requirements)?;
|
||||
println!("\n✅ Requirements saved to: {}", requirements_path.display());
|
||||
println!("🚀 Starting autonomous mode...\n");
|
||||
}
|
||||
"e" | "edit" => {
|
||||
// Save enhanced requirements for manual editing
|
||||
std::fs::write(&requirements_path, &enhanced_requirements)?;
|
||||
println!("\n✅ Requirements saved to: {}", requirements_path.display());
|
||||
println!("📝 Please edit the file and run: g3 --autonomous");
|
||||
println!(" Exiting for now.\n");
|
||||
return Ok(());
|
||||
}
|
||||
"n" | "no" => {
|
||||
println!("\n❌ Cancelled. No files were saved.\n");
|
||||
return Ok(());
|
||||
}
|
||||
_ => {
|
||||
println!("\n❌ Invalid choice. Cancelled.\n");
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(requirements_text) = &cli.requirements {
|
||||
// Use requirements text override
|
||||
Project::new_autonomous_with_requirements(workspace_dir.clone(), requirements_text.clone())?
|
||||
@@ -376,7 +482,6 @@ pub async fn run() -> Result<()> {
|
||||
// Execute task, autonomous mode, or start interactive mode based on machine mode
|
||||
if cli.machine {
|
||||
// Machine mode - use MachineUiWriter
|
||||
|
||||
let ui_writer = MachineUiWriter::new();
|
||||
|
||||
let agent = if cli.autonomous {
|
||||
@@ -400,20 +505,6 @@ pub async fn run() -> Result<()> {
|
||||
run_with_machine_mode(agent, cli, project).await?;
|
||||
} else {
|
||||
// Normal mode - use ConsoleUiWriter
|
||||
|
||||
// DEFAULT: Chat mode for interactive sessions
|
||||
// It runs when:
|
||||
// 1. No task is provided (not single-shot)
|
||||
// 2. Not in autonomous mode
|
||||
// 3. Not explicitly enabled with --auto flag
|
||||
let use_accumulative = cli.task.is_none() && !cli.autonomous && cli.auto;
|
||||
|
||||
if use_accumulative {
|
||||
// Run accumulative mode and return early
|
||||
run_accumulative_mode(workspace_dir.clone(), cli.clone(), combined_content.clone()).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let ui_writer = ConsoleUiWriter::new();
|
||||
|
||||
let agent = if cli.autonomous {
|
||||
@@ -440,273 +531,6 @@ pub async fn run() -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Accumulative autonomous mode: accumulates requirements from user input
|
||||
/// and runs autonomous mode after each input
|
||||
async fn run_accumulative_mode(
|
||||
workspace_dir: PathBuf,
|
||||
cli: Cli,
|
||||
combined_content: Option<String>,
|
||||
) -> Result<()> {
|
||||
let output = SimpleOutput::new();
|
||||
|
||||
output.print("");
|
||||
output.print("🪿 G3 AI Coding Agent - Autonomous Mode");
|
||||
output.print(" >> describe what you want, I'll build it iteratively");
|
||||
output.print("");
|
||||
output.print(&format!("📁 Workspace: {}", workspace_dir.display()));
|
||||
output.print("");
|
||||
output.print("💡 Each input you provide will be added to requirements");
|
||||
output.print(" and I'll automatically work on implementing them. You can");
|
||||
output.print(" interrupt at any time (Ctrl+C) to add clarifications or more requirements.");
|
||||
output.print("");
|
||||
output.print(" Type '/help' for commands, 'exit' or 'quit' to stop, Ctrl+D to finish");
|
||||
output.print("");
|
||||
|
||||
// Initialize rustyline editor with history
|
||||
let mut rl = DefaultEditor::new()?;
|
||||
let history_file = dirs::home_dir().map(|mut path| {
|
||||
path.push(".g3_accumulative_history");
|
||||
path
|
||||
});
|
||||
|
||||
if let Some(ref history_path) = history_file {
|
||||
let _ = rl.load_history(history_path);
|
||||
}
|
||||
|
||||
// Accumulated requirements stored in memory
|
||||
let mut accumulated_requirements = Vec::new();
|
||||
let mut turn_number = 0;
|
||||
|
||||
loop {
|
||||
output.print(&format!("\n{}", "=".repeat(60)));
|
||||
if accumulated_requirements.is_empty() {
|
||||
output.print("📝 What would you like me to build? (describe your requirements)");
|
||||
} else {
|
||||
output.print(&format!("📝 Turn {} - What's next? (add more requirements or refinements)", turn_number + 1));
|
||||
}
|
||||
output.print(&format!("{}", "=".repeat(60)));
|
||||
|
||||
let readline = rl.readline("requirement> ");
|
||||
match readline {
|
||||
Ok(line) => {
|
||||
let input = line.trim().to_string();
|
||||
|
||||
if input.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if input == "exit" || input == "quit" {
|
||||
output.print("\n👋 Goodbye!");
|
||||
break;
|
||||
}
|
||||
|
||||
// Check for slash commands
|
||||
if input.starts_with('/') {
|
||||
match input.as_str() {
|
||||
"/help" => {
|
||||
output.print("");
|
||||
output.print("📖 Available Commands:");
|
||||
output.print(" /requirements - Show all accumulated requirements");
|
||||
output.print(" /chat - Switch to interactive chat mode");
|
||||
output.print(" /help - Show this help message");
|
||||
output.print(" exit/quit - Exit the session");
|
||||
output.print("");
|
||||
continue;
|
||||
}
|
||||
"/requirements" => {
|
||||
output.print("");
|
||||
if accumulated_requirements.is_empty() {
|
||||
output.print("📋 No requirements accumulated yet");
|
||||
} else {
|
||||
output.print("📋 Accumulated Requirements:");
|
||||
output.print("");
|
||||
for req in &accumulated_requirements {
|
||||
output.print(&format!(" {}", req));
|
||||
}
|
||||
}
|
||||
output.print("");
|
||||
continue;
|
||||
}
|
||||
"/chat" => {
|
||||
output.print("");
|
||||
output.print("🔄 Switching to interactive chat mode...");
|
||||
output.print("");
|
||||
|
||||
// Build context message with accumulated requirements
|
||||
let requirements_context = if accumulated_requirements.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(format!(
|
||||
"📋 Context from Accumulative Mode:\n\n\
|
||||
We were working on these requirements. There may be unstaged or in-progress changes or recent changes to this branch. This is for your information.\n\n\
|
||||
Requirements:\n{}\n",
|
||||
accumulated_requirements.join("\n")
|
||||
))
|
||||
};
|
||||
|
||||
// Combine with existing content (README/AGENTS.md)
|
||||
let chat_combined_content = match (requirements_context, combined_content.clone()) {
|
||||
(Some(req_ctx), Some(existing)) => Some(format!("{}\n\n{}", req_ctx, existing)),
|
||||
(Some(req_ctx), None) => Some(req_ctx),
|
||||
(None, existing) => existing,
|
||||
};
|
||||
|
||||
// Load configuration
|
||||
let mut config = Config::load_with_overrides(
|
||||
cli.config.as_deref(),
|
||||
cli.provider.clone(),
|
||||
cli.model.clone(),
|
||||
)?;
|
||||
|
||||
// Apply macax flag override
|
||||
if cli.macax {
|
||||
config.macax.enabled = true;
|
||||
}
|
||||
|
||||
// Apply webdriver flag override
|
||||
if cli.webdriver {
|
||||
config.webdriver.enabled = true;
|
||||
}
|
||||
|
||||
// Create agent for interactive mode with requirements context
|
||||
let ui_writer = ConsoleUiWriter::new();
|
||||
let agent = Agent::new_with_readme_and_quiet(
|
||||
config,
|
||||
ui_writer,
|
||||
chat_combined_content.clone(),
|
||||
cli.quiet,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Run interactive mode
|
||||
run_interactive(agent, cli.show_prompt, cli.show_code, chat_combined_content).await?;
|
||||
|
||||
// After returning from interactive mode, exit
|
||||
output.print("\n👋 Goodbye!");
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
output.print(&format!("❌ Unknown command: {}. Type /help for available commands.", input));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add to history
|
||||
rl.add_history_entry(&input)?;
|
||||
|
||||
// Add this requirement to accumulated list
|
||||
turn_number += 1;
|
||||
accumulated_requirements.push(format!("{}. {}", turn_number, input));
|
||||
|
||||
// Build the complete requirements document
|
||||
let requirements_doc = format!(
|
||||
"# Project Requirements\n\n\
|
||||
## Current Instructions and Requirements:\n\n\
|
||||
{}\n\n\
|
||||
## Latest Requirement (Turn {}):\n\n\
|
||||
{}",
|
||||
accumulated_requirements.join("\n"),
|
||||
turn_number,
|
||||
input
|
||||
);
|
||||
|
||||
output.print("");
|
||||
output.print(&format!("📋 Current instructions and requirements (Turn {}):", turn_number));
|
||||
output.print(&format!(" {}", input));
|
||||
output.print("");
|
||||
output.print("🚀 Starting autonomous implementation...");
|
||||
output.print("");
|
||||
|
||||
// Create a project with the accumulated requirements
|
||||
let project = Project::new_autonomous_with_requirements(
|
||||
workspace_dir.clone(),
|
||||
requirements_doc.clone()
|
||||
)?;
|
||||
|
||||
// Ensure workspace exists and enter it
|
||||
project.ensure_workspace_exists()?;
|
||||
project.enter_workspace()?;
|
||||
|
||||
// Load configuration with CLI overrides
|
||||
let mut config = Config::load_with_overrides(
|
||||
cli.config.as_deref(),
|
||||
cli.provider.clone(),
|
||||
cli.model.clone(),
|
||||
)?;
|
||||
|
||||
// Apply macax flag override
|
||||
if cli.macax {
|
||||
config.macax.enabled = true;
|
||||
}
|
||||
|
||||
// Apply webdriver flag override
|
||||
if cli.webdriver {
|
||||
config.webdriver.enabled = true;
|
||||
}
|
||||
|
||||
// Create agent for this autonomous run
|
||||
let ui_writer = ConsoleUiWriter::new();
|
||||
let agent = Agent::new_autonomous_with_readme_and_quiet(
|
||||
config.clone(),
|
||||
ui_writer,
|
||||
combined_content.clone(),
|
||||
cli.quiet,
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Run autonomous mode with the accumulated requirements
|
||||
let autonomous_result = tokio::select! {
|
||||
result = run_autonomous(
|
||||
agent,
|
||||
project,
|
||||
cli.show_prompt,
|
||||
cli.show_code,
|
||||
cli.max_turns,
|
||||
cli.quiet,
|
||||
) => result,
|
||||
_ = tokio::signal::ctrl_c() => {
|
||||
output.print("\n⚠️ Autonomous run cancelled by user (Ctrl+C)");
|
||||
Ok(())
|
||||
}
|
||||
};
|
||||
|
||||
match autonomous_result
|
||||
{
|
||||
Ok(_) => {
|
||||
output.print("");
|
||||
output.print("✅ Autonomous run completed");
|
||||
}
|
||||
Err(e) => {
|
||||
output.print("");
|
||||
output.print(&format!("❌ Autonomous run failed: {}", e));
|
||||
output.print(" You can provide more requirements to continue.");
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(ReadlineError::Interrupted) => {
|
||||
output.print("\n👋 Interrupted. Goodbye!");
|
||||
break;
|
||||
}
|
||||
Err(ReadlineError::Eof) => {
|
||||
output.print("\n👋 Goodbye!");
|
||||
break;
|
||||
}
|
||||
Err(err) => {
|
||||
error!("Error: {:?}", err);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save history before exiting
|
||||
if let Some(ref history_path) = history_file {
|
||||
let _ = rl.save_history(history_path);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Simplified machine mode version of autonomous mode
|
||||
async fn run_autonomous_machine(
|
||||
mut agent: Agent<MachineUiWriter>,
|
||||
|
||||
@@ -325,10 +325,19 @@ impl ContextWindow {
|
||||
|
||||
/// Update token usage from provider response
|
||||
pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
|
||||
// Add the tokens from this response to our running total
|
||||
// The usage.total_tokens represents tokens used in this single API call
|
||||
self.used_tokens += usage.total_tokens;
|
||||
self.cumulative_tokens += usage.total_tokens;
|
||||
// Always use the provider's count as the authoritative value
|
||||
// The provider knows best how many tokens were actually used
|
||||
|
||||
let old_used = self.used_tokens;
|
||||
|
||||
// Use the provider's total as the current used tokens
|
||||
self.used_tokens = usage.total_tokens;
|
||||
self.cumulative_tokens += usage.total_tokens - old_used;
|
||||
|
||||
info!(
|
||||
"Updated token usage from provider - was: {}, now: {} (prompt={}, completion={}, total={})",
|
||||
old_used, self.used_tokens, usage.prompt_tokens, usage.completion_tokens, usage.total_tokens
|
||||
);
|
||||
|
||||
debug!(
|
||||
"Added {} tokens from provider response (used: {}/{}, cumulative: {})",
|
||||
@@ -445,8 +454,18 @@ Format this as a detailed but concise summary that can be used to resume the con
|
||||
if current_percentage >= 50 {
|
||||
let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
|
||||
if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
|
||||
info!(
|
||||
"Context thinning triggered - usage: {}% ({}/{} tokens), threshold: {}%, last thinned at: {}%",
|
||||
current_percentage,
|
||||
self.used_tokens,
|
||||
self.total_tokens,
|
||||
current_threshold,
|
||||
self.last_thinning_percentage
|
||||
);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
debug!("Context usage at {}% ({}/{} tokens) - no thinning needed", current_percentage, self.used_tokens, self.total_tokens);
|
||||
}
|
||||
|
||||
false
|
||||
@@ -2675,7 +2694,12 @@ Template:
|
||||
|
||||
// Display tool execution result with proper indentation
|
||||
if tool_call.tool != "final_output" {
|
||||
let output_lines: Vec<&str> = tool_result.lines().collect();
|
||||
// Skip displaying output for shell tool since it was already streamed
|
||||
let should_display_output = tool_call.tool != "shell";
|
||||
|
||||
let output_lines: Vec<&str> = if should_display_output {
|
||||
tool_result.lines().collect()
|
||||
} else { vec![] };
|
||||
|
||||
// Check if UI wants full output (machine mode) or truncated (human mode)
|
||||
let wants_full = self.ui_writer.wants_full_output();
|
||||
@@ -3187,13 +3211,16 @@ Template:
|
||||
{
|
||||
Ok(result) => {
|
||||
if result.success {
|
||||
Ok(if result.stdout.is_empty() {
|
||||
"✅ Command executed successfully".to_string()
|
||||
// Don't return stdout - it was already streamed to the UI
|
||||
// Returning it would cause duplicate output
|
||||
Ok("✅ Command executed successfully".to_string())
|
||||
} else {
|
||||
result.stdout.trim().to_string()
|
||||
// For errors, return stderr since it wasn't streamed
|
||||
Ok(if result.stderr.is_empty() {
|
||||
"❌ Command failed".to_string()
|
||||
} else {
|
||||
format!("❌ Command failed: {}", result.stderr.trim())
|
||||
})
|
||||
} else {
|
||||
Ok(format!("❌ Command failed: {}", result.stderr.trim()))
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Execution error: {}", e)),
|
||||
|
||||
@@ -276,6 +276,7 @@ impl AnthropicProvider {
|
||||
let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls
|
||||
let mut accumulated_usage: Option<Usage> = None;
|
||||
let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
|
||||
let mut actual_completion_tokens: u32 = 0; // Track actual completion tokens
|
||||
|
||||
while let Some(chunk_result) = stream.next().await {
|
||||
match chunk_result {
|
||||
@@ -323,7 +324,12 @@ impl AnthropicProvider {
|
||||
let final_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: true,
|
||||
usage: accumulated_usage.clone(),
|
||||
usage: accumulated_usage.as_ref().map(|u| Usage {
|
||||
prompt_tokens: u.prompt_tokens,
|
||||
// Use actual completion tokens if we tracked them, otherwise use the estimate
|
||||
completion_tokens: if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
|
||||
total_tokens: u.prompt_tokens + if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
|
||||
}),
|
||||
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
|
||||
};
|
||||
if tx.send(Ok(final_chunk)).await.is_err() {
|
||||
@@ -337,6 +343,7 @@ impl AnthropicProvider {
|
||||
match serde_json::from_str::<AnthropicStreamEvent>(data) {
|
||||
Ok(event) => {
|
||||
debug!("Parsed event type: {}, event: {:?}", event.event_type, event);
|
||||
|
||||
match event.event_type.as_str() {
|
||||
"message_start" => {
|
||||
// Extract usage data from message_start event
|
||||
@@ -347,7 +354,10 @@ impl AnthropicProvider {
|
||||
completion_tokens: usage.output_tokens,
|
||||
total_tokens: usage.input_tokens + usage.output_tokens,
|
||||
});
|
||||
debug!("Captured usage from message_start: {:?}", accumulated_usage);
|
||||
debug!("Captured initial usage from message_start - prompt: {}, completion: {} (estimated), total: {}",
|
||||
usage.input_tokens,
|
||||
usage.output_tokens,
|
||||
usage.input_tokens + usage.output_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -396,6 +406,9 @@ impl AnthropicProvider {
|
||||
"content_block_delta" => {
|
||||
if let Some(delta) = event.delta {
|
||||
if let Some(text) = delta.text {
|
||||
// Track actual completion tokens (rough estimate: 4 chars per token)
|
||||
actual_completion_tokens += (text.len() as f32 / 4.0).ceil() as u32;
|
||||
|
||||
debug!("Sending text chunk of length {}: '{}'", text.len(), text);
|
||||
let chunk = CompletionChunk {
|
||||
content: text,
|
||||
@@ -416,6 +429,19 @@ impl AnthropicProvider {
|
||||
}
|
||||
}
|
||||
}
|
||||
"message_delta" => {
|
||||
// Check if message_delta contains updated usage data
|
||||
if let Some(delta) = event.delta {
|
||||
if let Some(usage) = delta.usage {
|
||||
accumulated_usage = Some(Usage {
|
||||
prompt_tokens: usage.input_tokens,
|
||||
completion_tokens: usage.output_tokens,
|
||||
total_tokens: usage.input_tokens + usage.output_tokens,
|
||||
});
|
||||
debug!("Updated usage from message_delta - prompt: {}, completion: {}, total: {}", usage.input_tokens, usage.output_tokens, usage.input_tokens + usage.output_tokens);
|
||||
}
|
||||
}
|
||||
}
|
||||
"content_block_stop" => {
|
||||
// Tool call block is complete - now parse the accumulated JSON
|
||||
if !current_tool_calls.is_empty() && !partial_tool_json.is_empty() {
|
||||
@@ -450,11 +476,44 @@ impl AnthropicProvider {
|
||||
}
|
||||
}
|
||||
"message_stop" => {
|
||||
debug!("Received message stop event");
|
||||
debug!("Received message_stop event: {:?}", event);
|
||||
|
||||
// Check if message_stop contains final usage data
|
||||
if let Some(message) = event.message {
|
||||
if let Some(usage) = message.usage {
|
||||
// Update with final accurate usage data from message_stop
|
||||
// This should have the actual completion token count
|
||||
accumulated_usage = Some(Usage {
|
||||
prompt_tokens: usage.input_tokens,
|
||||
// Prefer the actual output_tokens from message_stop if available
|
||||
// Otherwise use our tracked count, and as last resort the initial estimate
|
||||
completion_tokens: if usage.output_tokens > 0 {
|
||||
usage.output_tokens
|
||||
} else if actual_completion_tokens > 0 {
|
||||
actual_completion_tokens
|
||||
} else { usage.output_tokens },
|
||||
total_tokens: usage.input_tokens + usage.output_tokens,
|
||||
});
|
||||
debug!("Updated with final usage from message_stop - prompt: {}, completion: {}, total: {}",
|
||||
usage.input_tokens,
|
||||
usage.output_tokens,
|
||||
usage.input_tokens + usage.output_tokens);
|
||||
}
|
||||
}
|
||||
|
||||
let final_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: true,
|
||||
usage: accumulated_usage.clone(),
|
||||
usage: accumulated_usage.as_ref().map(|u| Usage {
|
||||
prompt_tokens: u.prompt_tokens,
|
||||
// Use actual completion tokens if we tracked them and they're higher
|
||||
completion_tokens: if actual_completion_tokens > u.completion_tokens {
|
||||
actual_completion_tokens
|
||||
} else {
|
||||
u.completion_tokens
|
||||
},
|
||||
total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
|
||||
}),
|
||||
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
|
||||
};
|
||||
if tx.send(Ok(final_chunk)).await.is_err() {
|
||||
@@ -496,10 +555,27 @@ impl AnthropicProvider {
|
||||
let final_chunk = CompletionChunk {
|
||||
content: String::new(),
|
||||
finished: true,
|
||||
usage: accumulated_usage.clone(),
|
||||
usage: accumulated_usage.as_ref().map(|u| Usage {
|
||||
prompt_tokens: u.prompt_tokens,
|
||||
completion_tokens: if actual_completion_tokens > u.completion_tokens {
|
||||
actual_completion_tokens
|
||||
} else {
|
||||
u.completion_tokens
|
||||
},
|
||||
total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
|
||||
}),
|
||||
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls) },
|
||||
};
|
||||
let _ = tx.send(Ok(final_chunk)).await;
|
||||
|
||||
// Log final usage for debugging
|
||||
if let Some(ref usage) = accumulated_usage {
|
||||
info!("Anthropic stream completed with final usage - prompt: {}, completion: {}, total: {}",
|
||||
usage.prompt_tokens, usage.completion_tokens, usage.total_tokens);
|
||||
} else {
|
||||
warn!("Anthropic stream completed without usage data - token accounting will fall back to estimation");
|
||||
}
|
||||
|
||||
accumulated_usage
|
||||
}
|
||||
}
|
||||
@@ -737,6 +813,8 @@ struct AnthropicStreamMessage {
|
||||
struct AnthropicDelta {
|
||||
text: Option<String>,
|
||||
partial_json: Option<String>,
|
||||
#[serde(default)]
|
||||
usage: Option<AnthropicUsage>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
|
||||
@@ -1,389 +0,0 @@
|
||||
# Accumulative Autonomous Mode
|
||||
|
||||
## Overview
|
||||
|
||||
Accumulative Autonomous Mode is the **new default interactive mode** for G3. It combines the ease of interactive chat with the power of autonomous implementation, allowing you to build projects iteratively by describing what you want, one requirement at a time.
|
||||
|
||||
## How It Works
|
||||
|
||||
### The Flow
|
||||
|
||||
1. **Start G3** in any directory (no arguments needed)
|
||||
2. **Describe** what you want to build
|
||||
3. **G3 automatically**:
|
||||
- Adds your input to accumulated requirements
|
||||
- Runs autonomous mode (coach-player feedback loop)
|
||||
- Implements your requirements with quality checks
|
||||
4. **Continue** adding more requirements or refinements
|
||||
5. **Repeat** until your project is complete
|
||||
|
||||
### Example Session
|
||||
|
||||
```bash
|
||||
$ cd ~/projects/my-new-app
|
||||
$ g3
|
||||
|
||||
🪿 G3 AI Coding Agent - Accumulative Mode
|
||||
>> describe what you want, I'll build it iteratively
|
||||
|
||||
📁 Workspace: /Users/you/projects/my-new-app
|
||||
|
||||
💡 Each input you provide will be added to requirements
|
||||
and I'll automatically work on implementing them.
|
||||
|
||||
Type 'exit' or 'quit' to stop, Ctrl+D to finish
|
||||
|
||||
============================================================
|
||||
📝 What would you like me to build? (describe your requirements)
|
||||
============================================================
|
||||
requirement> create a simple web server in Python with Flask that serves a homepage
|
||||
|
||||
📋 Current instructions and requirements (Turn 1):
|
||||
create a simple web server in Python with Flask that serves a homepage
|
||||
|
||||
🚀 Starting autonomous implementation...
|
||||
|
||||
🤖 G3 AI Coding Agent - Autonomous Mode
|
||||
📁 Using workspace: /Users/you/projects/my-new-app
|
||||
📋 Requirements loaded from --requirements flag
|
||||
🔄 Starting coach-player feedback loop...
|
||||
📂 No existing implementation files detected
|
||||
🎯 Starting with player implementation
|
||||
|
||||
=== TURN 1/5 - PLAYER MODE ===
|
||||
🎯 Starting player implementation...
|
||||
📋 Player starting initial implementation (no prior coach feedback)
|
||||
|
||||
[Player creates files, writes code...]
|
||||
|
||||
=== TURN 1/5 - COACH MODE ===
|
||||
🎓 Starting coach review...
|
||||
🎓 Coach review completed
|
||||
Coach feedback:
|
||||
The Flask server is implemented correctly with a homepage route.
|
||||
The code follows best practices and meets the requirements.
|
||||
IMPLEMENTATION_APPROVED
|
||||
|
||||
=== SESSION COMPLETED - IMPLEMENTATION APPROVED ===
|
||||
✅ Coach approved the implementation!
|
||||
|
||||
============================================================
|
||||
📊 AUTONOMOUS MODE SESSION REPORT
|
||||
============================================================
|
||||
⏱️ Total Duration: 12.34s
|
||||
🔄 Turns Taken: 1/5
|
||||
📝 Final Status: ✅ APPROVED
|
||||
...
|
||||
============================================================
|
||||
|
||||
✅ Autonomous run completed
|
||||
|
||||
============================================================
|
||||
📝 Turn 2 - What's next? (add more requirements or refinements)
|
||||
============================================================
|
||||
requirement> add a /api/users endpoint that returns a list of users as JSON
|
||||
|
||||
📋 Current instructions and requirements (Turn 2):
|
||||
add a /api/users endpoint that returns a list of users as JSON
|
||||
|
||||
🚀 Starting autonomous implementation...
|
||||
|
||||
[Autonomous mode runs again with BOTH requirements...]
|
||||
|
||||
============================================================
|
||||
📝 Turn 3 - What's next? (add more requirements or refinements)
|
||||
============================================================
|
||||
requirement> exit
|
||||
|
||||
👋 Goodbye!
|
||||
```
|
||||
|
||||
## Key Features
|
||||
|
||||
### 1. Requirement Accumulation
|
||||
|
||||
Each input you provide is:
|
||||
- **Numbered sequentially** (1, 2, 3, ...)
|
||||
- **Stored in memory** for the session
|
||||
- **Included in all subsequent runs**
|
||||
|
||||
This means the agent always has the full context of what you've asked for.
|
||||
|
||||
### 2. Automatic Requirements Document
|
||||
|
||||
G3 automatically generates a structured requirements document:
|
||||
|
||||
```markdown
|
||||
# Project Requirements
|
||||
|
||||
## Current Instructions and Requirements:
|
||||
|
||||
1. create a simple web server in Python with Flask that serves a homepage
|
||||
2. add a /api/users endpoint that returns a list of users as JSON
|
||||
3. add error handling for 404 and 500 errors
|
||||
|
||||
## Latest Requirement (Turn 3):
|
||||
|
||||
add error handling for 404 and 500 errors
|
||||
```
|
||||
|
||||
This document is passed to autonomous mode, ensuring the agent:
|
||||
- Knows all previous requirements
|
||||
- Focuses on the latest addition
|
||||
- Maintains consistency across iterations
|
||||
|
||||
### 3. Full Autonomous Quality
|
||||
|
||||
Each requirement triggers a complete autonomous run with:
|
||||
- **Coach-Player Feedback Loop**: Quality assurance built-in
|
||||
- **Multiple Turns**: Up to 5 iterations per requirement (configurable with `--max-turns`)
|
||||
- **Compilation Checks**: Ensures code actually works
|
||||
- **Testing**: Coach can run tests to verify functionality
|
||||
|
||||
### 4. Error Recovery
|
||||
|
||||
If an autonomous run fails:
|
||||
- You're notified of the error
|
||||
- You can provide additional requirements to fix issues
|
||||
- The session continues (doesn't crash)
|
||||
|
||||
### 5. Workspace Management
|
||||
|
||||
- Uses **current directory** as workspace
|
||||
- All files created in current directory
|
||||
- No need to specify workspace path
|
||||
- Works with existing projects or empty directories
|
||||
|
||||
## Command-Line Options
|
||||
|
||||
### Default (Accumulative Mode)
|
||||
|
||||
```bash
|
||||
g3
|
||||
```
|
||||
|
||||
Starts accumulative autonomous mode in the current directory.
|
||||
|
||||
### With Options
|
||||
|
||||
```bash
|
||||
# Use a specific workspace
|
||||
g3 --workspace ~/projects/my-app
|
||||
|
||||
# Limit autonomous turns per requirement
|
||||
g3 --max-turns 3
|
||||
|
||||
# Enable macOS Accessibility tools
|
||||
g3 --macax
|
||||
|
||||
# Enable WebDriver browser automation
|
||||
g3 --webdriver
|
||||
|
||||
# Use a specific provider/model
|
||||
g3 --provider anthropic --model claude-3-5-sonnet-20241022
|
||||
|
||||
# Show prompts and code during execution
|
||||
g3 --show-prompt --show-code
|
||||
|
||||
# Disable log files
|
||||
g3 --quiet
|
||||
```
|
||||
|
||||
### Disable Accumulative Mode
|
||||
|
||||
To use the traditional chat mode (without automatic autonomous runs):
|
||||
|
||||
```bash
|
||||
g3 --chat
|
||||
|
||||
# Alternative: legacy flag also works
|
||||
g3 --accumulative
|
||||
```
|
||||
|
||||
This gives you the old behavior where you chat with the agent without automatic autonomous runs.
|
||||
|
||||
## Use Cases
|
||||
|
||||
### 1. Rapid Prototyping
|
||||
|
||||
```bash
|
||||
requirement> create a REST API for a todo app
|
||||
requirement> add SQLite database storage
|
||||
requirement> add authentication with JWT
|
||||
requirement> add rate limiting
|
||||
```
|
||||
|
||||
### 2. Iterative Refinement
|
||||
|
||||
```bash
|
||||
requirement> create a data visualization dashboard
|
||||
requirement> make the charts interactive
|
||||
requirement> add dark mode support
|
||||
requirement> optimize for mobile devices
|
||||
```
|
||||
|
||||
### 3. Bug Fixing
|
||||
|
||||
```bash
|
||||
requirement> fix the login form validation
|
||||
requirement> handle edge case when username is empty
|
||||
requirement> add better error messages
|
||||
```
|
||||
|
||||
### 4. Feature Addition
|
||||
|
||||
```bash
|
||||
requirement> add export to CSV functionality
|
||||
requirement> add email notifications
|
||||
requirement> add admin dashboard
|
||||
```
|
||||
|
||||
## Tips and Best Practices
|
||||
|
||||
### 1. Start Simple
|
||||
|
||||
Begin with a basic requirement, let it be implemented, then add complexity:
|
||||
|
||||
```bash
|
||||
✅ Good:
|
||||
requirement> create a basic Flask web server
|
||||
requirement> add a homepage with a form
|
||||
requirement> add form validation
|
||||
|
||||
❌ Too Complex:
|
||||
requirement> create a full-stack web app with authentication, database, API, and frontend
|
||||
```
|
||||
|
||||
### 2. Be Specific
|
||||
|
||||
The more specific you are, the better the results:
|
||||
|
||||
```bash
|
||||
✅ Good:
|
||||
requirement> add a /api/users endpoint that returns JSON with id, name, and email fields
|
||||
|
||||
❌ Vague:
|
||||
requirement> add users
|
||||
```
|
||||
|
||||
### 3. One Thing at a Time
|
||||
|
||||
Focus each requirement on a single feature or fix:
|
||||
|
||||
```bash
|
||||
✅ Good:
|
||||
requirement> add error handling for database connections
|
||||
requirement> add logging for all API requests
|
||||
|
||||
❌ Multiple Things:
|
||||
requirement> add error handling and logging and monitoring and alerts
|
||||
```
|
||||
|
||||
### 4. Review Between Turns
|
||||
|
||||
After each autonomous run completes:
|
||||
- Check the generated files
|
||||
- Test the functionality
|
||||
- Decide what to add or fix next
|
||||
|
||||
### 5. Use Exit Commands
|
||||
|
||||
When done:
|
||||
- Type `exit` or `quit`
|
||||
- Press `Ctrl+D` (EOF)
|
||||
- Press `Ctrl+C` to cancel current input
|
||||
|
||||
## Comparison with Other Modes
|
||||
|
||||
| Feature | Accumulative (Default) | Traditional Interactive | Autonomous | Single-Shot |
|
||||
|---------|----------------------|------------------------|------------|-------------|
|
||||
| **Command** | `g3` | `g3 --accumulative` | `g3 --autonomous` | `g3 "task"` |
|
||||
| **Input Style** | Iterative prompts | Chat messages | requirements.md file | Command-line arg |
|
||||
| **Auto-Autonomous** | ✅ Yes | ❌ No | ✅ Yes | ❌ No |
|
||||
| **Coach-Player Loop** | ✅ Yes | ❌ No | ✅ Yes | ❌ No |
|
||||
| **Accumulates Requirements** | ✅ Yes | ❌ No | ❌ No | ❌ No |
|
||||
| **Multiple Iterations** | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No |
|
||||
| **Best For** | Iterative development | Quick questions | Pre-planned projects | One-off tasks |
|
||||
|
||||
## Technical Details
|
||||
|
||||
### Requirements Storage
|
||||
|
||||
- Stored in memory (not persisted to disk)
|
||||
- Numbered sequentially starting from 1
|
||||
- Formatted as markdown list
|
||||
- Passed to autonomous mode as `--requirements` override
|
||||
|
||||
### History
|
||||
|
||||
- Saved to `~/.g3_accumulative_history`
|
||||
- Separate from traditional interactive history
|
||||
- Persists across sessions
|
||||
- Uses rustyline for readline support
|
||||
|
||||
### Workspace
|
||||
|
||||
- Defaults to current directory
|
||||
- Can be overridden with `--workspace`
|
||||
- All files created in workspace
|
||||
- Logs saved to `workspace/logs/`
|
||||
|
||||
### Autonomous Execution
|
||||
|
||||
- Full coach-player feedback loop
|
||||
- Configurable max turns (default: 5)
|
||||
- Respects all CLI flags (--macax, --webdriver, etc.)
|
||||
- Error handling allows continuation
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### "No requirements provided"
|
||||
|
||||
This shouldn't happen in accumulative mode, but if it does:
|
||||
- Check that you entered a requirement
|
||||
- Ensure the requirement isn't empty
|
||||
- Try restarting G3
|
||||
|
||||
### "Autonomous run failed"
|
||||
|
||||
If an autonomous run fails:
|
||||
- Read the error message
|
||||
- Provide a new requirement to fix the issue
|
||||
- Or type `exit` and investigate manually
|
||||
|
||||
### "Context window full"
|
||||
|
||||
If you hit token limits:
|
||||
- The agent will auto-summarize
|
||||
- Or you can start a new session
|
||||
- Consider using `--max-turns` to limit iterations
|
||||
|
||||
### "Coach never approves"
|
||||
|
||||
If the coach keeps rejecting:
|
||||
- Check the coach feedback for specific issues
|
||||
- Provide more specific requirements
|
||||
- Consider increasing `--max-turns`
|
||||
|
||||
## Future Enhancements
|
||||
|
||||
Planned improvements:
|
||||
|
||||
1. **Persistence**: Save accumulated requirements to disk
|
||||
2. **Editing**: Edit or remove previous requirements
|
||||
3. **Branching**: Try different approaches
|
||||
4. **Templates**: Pre-defined requirement sets
|
||||
5. **Review**: Show all accumulated requirements
|
||||
6. **Export**: Save to requirements.md
|
||||
7. **Undo**: Remove last requirement
|
||||
8. **Replay**: Re-run with same requirements
|
||||
|
||||
## Feedback
|
||||
|
||||
This is a new feature! Please provide feedback:
|
||||
- What works well?
|
||||
- What's confusing?
|
||||
- What features would you like?
|
||||
- Any bugs or issues?
|
||||
|
||||
Open an issue on GitHub or contribute improvements!
|
||||
164
test_token_accounting.py
Normal file
164
test_token_accounting.py
Normal file
@@ -0,0 +1,164 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Test script to verify token accounting is working correctly with the Anthropic provider.
|
||||
This script will send multiple messages and verify that token counts accumulate properly.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
def run_g3_command(prompt, provider="anthropic"):
|
||||
"""Run a g3 command and capture the output."""
|
||||
cmd = [
|
||||
"cargo", "run", "--release", "--",
|
||||
"--provider", provider,
|
||||
prompt
|
||||
]
|
||||
|
||||
env = {
|
||||
"RUST_LOG": "g3_providers=debug,g3_core=info",
|
||||
"RUST_BACKTRACE": "1"
|
||||
}
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
env={**subprocess.os.environ, **env}
|
||||
)
|
||||
|
||||
return result.stdout + result.stderr
|
||||
|
||||
def extract_token_info(output):
|
||||
"""Extract token usage information from the output."""
|
||||
token_info = {}
|
||||
|
||||
# Look for token usage updates
|
||||
usage_pattern = r"Updated token usage.*was: (\d+), now: (\d+).*prompt=(\d+), completion=(\d+), total=(\d+)"
|
||||
matches = re.findall(usage_pattern, output)
|
||||
if matches:
|
||||
last_match = matches[-1]
|
||||
token_info['was'] = int(last_match[0])
|
||||
token_info['now'] = int(last_match[1])
|
||||
token_info['prompt'] = int(last_match[2])
|
||||
token_info['completion'] = int(last_match[3])
|
||||
token_info['total'] = int(last_match[4])
|
||||
|
||||
# Look for context percentage
|
||||
context_pattern = r"Context usage at (\d+)%.*\((\d+)/(\d+) tokens\)"
|
||||
matches = re.findall(context_pattern, output)
|
||||
if matches:
|
||||
last_match = matches[-1]
|
||||
token_info['percentage'] = int(last_match[0])
|
||||
token_info['used'] = int(last_match[1])
|
||||
token_info['total_context'] = int(last_match[2])
|
||||
|
||||
# Look for thinning triggers
|
||||
thinning_pattern = r"Context thinning triggered.*usage: (\d+)%.*\((\d+)/(\d+) tokens\)"
|
||||
matches = re.findall(thinning_pattern, output)
|
||||
if matches:
|
||||
token_info['thinning_triggered'] = True
|
||||
token_info['thinning_percentage'] = int(matches[-1][0])
|
||||
|
||||
# Look for final usage from Anthropic
|
||||
final_usage_pattern = r"Anthropic stream completed with final usage.*prompt: (\d+), completion: (\d+), total: (\d+)"
|
||||
matches = re.findall(final_usage_pattern, output)
|
||||
if matches:
|
||||
last_match = matches[-1]
|
||||
token_info['final_prompt'] = int(last_match[0])
|
||||
token_info['final_completion'] = int(last_match[1])
|
||||
token_info['final_total'] = int(last_match[2])
|
||||
|
||||
return token_info
|
||||
|
||||
def main():
|
||||
print("Testing Anthropic Provider Token Accounting")
|
||||
print("="*50)
|
||||
|
||||
# Build the project first
|
||||
print("Building project...")
|
||||
subprocess.run(["cargo", "build", "--release"], capture_output=True)
|
||||
|
||||
# Test 1: Simple prompt
|
||||
print("\nTest 1: Simple prompt")
|
||||
print("-"*30)
|
||||
output = run_g3_command("Say 'Hello, World!' and nothing else.")
|
||||
tokens = extract_token_info(output)
|
||||
|
||||
if tokens:
|
||||
print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
|
||||
print(f" Prompt tokens: {tokens.get('prompt', 'N/A')}")
|
||||
print(f" Completion tokens: {tokens.get('completion', 'N/A')}")
|
||||
print(f" Total from provider: {tokens.get('total', 'N/A')}")
|
||||
|
||||
if 'final_total' in tokens:
|
||||
print(f" Final total from stream: {tokens['final_total']}")
|
||||
if tokens.get('now') != tokens['final_total']:
|
||||
print(f" ⚠️ WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
|
||||
|
||||
# Check if the completion tokens are reasonable (should be small for "Hello, World!")
|
||||
if tokens.get('completion', 0) > 50:
|
||||
print(f" ⚠️ WARNING: Completion tokens seem high for a simple response: {tokens.get('completion')}")
|
||||
else:
|
||||
print(" ❌ No token information found in output")
|
||||
|
||||
# Test 2: Longer response
|
||||
print("\nTest 2: Longer response")
|
||||
print("-"*30)
|
||||
output = run_g3_command("Write a 3-paragraph essay about the importance of accurate token counting in LLM applications.")
|
||||
tokens = extract_token_info(output)
|
||||
|
||||
if tokens:
|
||||
print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
|
||||
print(f" Prompt tokens: {tokens.get('prompt', 'N/A')}")
|
||||
print(f" Completion tokens: {tokens.get('completion', 'N/A')}")
|
||||
print(f" Total from provider: {tokens.get('total', 'N/A')}")
|
||||
|
||||
if 'final_total' in tokens:
|
||||
print(f" Final total from stream: {tokens['final_total']}")
|
||||
if tokens.get('now') != tokens['final_total']:
|
||||
print(f" ⚠️ WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
|
||||
|
||||
# Check if completion tokens are reasonable for a longer response
|
||||
if tokens.get('completion', 0) < 100:
|
||||
print(f" ⚠️ WARNING: Completion tokens seem low for a 3-paragraph essay: {tokens.get('completion')}")
|
||||
else:
|
||||
print(" ❌ No token information found in output")
|
||||
|
||||
# Test 3: Check for proper accumulation
|
||||
print("\nTest 3: Token accumulation (multiple messages)")
|
||||
print("-"*30)
|
||||
|
||||
# First message
|
||||
output1 = run_g3_command("Count from 1 to 5.")
|
||||
tokens1 = extract_token_info(output1)
|
||||
|
||||
# Second message (this would need to be in a conversation, but for now we test separately)
|
||||
output2 = run_g3_command("Now count from 6 to 10.")
|
||||
tokens2 = extract_token_info(output2)
|
||||
|
||||
if tokens1 and tokens2:
|
||||
print(f"First message: {tokens1.get('now', 'N/A')} tokens")
|
||||
print(f"Second message: {tokens2.get('now', 'N/A')} tokens")
|
||||
|
||||
# In a real conversation, tokens2['now'] should be greater than tokens1['now']
|
||||
# But since these are separate invocations, we just check they're both reasonable
|
||||
if tokens1.get('now', 0) > 0 and tokens2.get('now', 0) > 0:
|
||||
print(" ✅ Both messages have token counts")
|
||||
else:
|
||||
print(" ❌ Missing token counts")
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Test Summary:")
|
||||
print("Check the output above for any warnings or errors.")
|
||||
print("Key things to verify:")
|
||||
print(" 1. Token counts are being captured from the provider")
|
||||
print(" 2. Completion tokens are reasonable for the response length")
|
||||
print(" 3. No mismatch between tracked and final token counts")
|
||||
print(" 4. Context thinning triggers at appropriate thresholds")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
46
test_token_accounting.sh
Executable file
46
test_token_accounting.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Test script to verify token accounting with Anthropic provider
|
||||
|
||||
echo "Testing token accounting with Anthropic provider..."
|
||||
echo "This test will send a few messages and check if token counts are properly tracked."
|
||||
echo ""
|
||||
|
||||
# Set up environment for testing
|
||||
export RUST_LOG=g3_providers=debug,g3_core=info
|
||||
export RUST_BACKTRACE=1
|
||||
|
||||
# Build the project first
|
||||
echo "Building project..."
|
||||
cargo build --release 2>&1 | grep -E "(Compiling|Finished)" || true
|
||||
|
||||
echo ""
|
||||
echo "Running test with Anthropic provider..."
|
||||
echo "Watch for these log messages:"
|
||||
echo " - 'Captured initial usage from message_start'"
|
||||
echo " - 'Updated usage from message_delta' (if available)"
|
||||
echo " - 'Updated with final usage from message_stop' (if available)"
|
||||
echo " - 'Anthropic stream completed with final usage'"
|
||||
echo " - 'Updated token usage from provider'"
|
||||
echo " - 'Context thinning triggered' (when reaching thresholds)"
|
||||
echo ""
|
||||
|
||||
# Create a simple test that will generate some tokens
|
||||
cat << 'EOF' > /tmp/test_prompt.txt
|
||||
Please write a short paragraph about the importance of accurate token counting in LLM applications. Then list 3 reasons why token accounting might fail.
|
||||
EOF
|
||||
|
||||
# Run the test
|
||||
echo "Sending test prompt..."
|
||||
cargo run --release -- --provider anthropic "$(cat /tmp/test_prompt.txt)" 2>&1 | tee /tmp/token_test.log
|
||||
|
||||
echo ""
|
||||
echo "Analyzing results..."
|
||||
echo ""
|
||||
|
||||
# Check for token accounting messages
|
||||
echo "Token accounting messages found:"
|
||||
grep -E "(usage from|token usage|Context thinning|Context usage)" /tmp/token_test.log | head -20
|
||||
|
||||
echo ""
|
||||
echo "Test complete. Check /tmp/token_test.log for full output."
|
||||
Reference in New Issue
Block a user