Compare commits

...

7 Commits

Author SHA1 Message Date
Michael Neale
a457d46446 Merge branch 'main' into micn/fix-anthropic-1p
* main:
  control commands for machine mode
  Fix duplicate dump at end
  minor
  --machine mode flag for verbose CLI output
  fixed x,y detection in vision click
  screenshotting bug fix
  test
  Native api for screen capture
  replace tesseract with apple vision
  more macax tooling
  coach rigor +++
  thinning message highlighted
  warnings fix
  macax tools
  control commands
  Add --interactive-requirements flag for AI-enhanced requirements mode
2025-10-28 13:55:01 +11:00
Dhanji Prasanna
7c2c433746 control commands for machine mode 2025-10-28 12:35:58 +11:00
Dhanji Prasanna
98f4220544 Fix duplicate dump at end 2025-10-27 13:48:46 +11:00
Dhanji Prasanna
a4476a555c minor 2025-10-27 13:32:14 +11:00
Michael Neale
b3d18d02ea prefer provider count 2025-10-22 15:09:47 +11:00
Michael Neale
442ca76cd6 Merge branch 'main' into micn/fix-anthropic-1p
* main:
  fix panic in CLI parser
  coach/player provider split + add OpenAI
2025-10-22 15:01:18 +11:00
Michael Neale
738c3ac53e to get anthropic provider more reliable with tokens 2025-10-22 09:47:24 +11:00
7 changed files with 394 additions and 53 deletions

75
Cargo.lock generated
View File

@@ -318,9 +318,9 @@ dependencies = [
[[package]] [[package]]
name = "cc" name = "cc"
version = "1.2.41" version = "1.2.43"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2"
dependencies = [ dependencies = [
"find-msvc-tools", "find-msvc-tools",
"jobserver", "jobserver",
@@ -900,9 +900,9 @@ dependencies = [
[[package]] [[package]]
name = "deranged" name = "deranged"
version = "0.5.4" version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
dependencies = [ dependencies = [
"powerfmt", "powerfmt",
] ]
@@ -990,7 +990,7 @@ dependencies = [
"libc", "libc",
"option-ext", "option-ext",
"redox_users 0.5.2", "redox_users 0.5.2",
"windows-sys 0.61.2", "windows-sys 0.59.0",
] ]
[[package]] [[package]]
@@ -1015,9 +1015,9 @@ dependencies = [
[[package]] [[package]]
name = "document-features" name = "document-features"
version = "0.2.11" version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d" checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
dependencies = [ dependencies = [
"litrs", "litrs",
] ]
@@ -1062,7 +1062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
dependencies = [ dependencies = [
"libc", "libc",
"windows-sys 0.61.2", "windows-sys 0.52.0",
] ]
[[package]] [[package]]
@@ -1144,9 +1144,9 @@ checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.1.4" version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
dependencies = [ dependencies = [
"crc32fast", "crc32fast",
"miniz_oxide", "miniz_oxide",
@@ -1571,11 +1571,11 @@ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
[[package]] [[package]]
name = "home" name = "home"
version = "0.5.11" version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
dependencies = [ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.52.0",
] ]
[[package]] [[package]]
@@ -1922,9 +1922,12 @@ dependencies = [
[[package]] [[package]]
name = "indoc" name = "indoc"
version = "2.0.6" version = "2.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
dependencies = [
"rustversion",
]
[[package]] [[package]]
name = "instability" name = "instability"
@@ -1947,9 +1950,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]] [[package]]
name = "is_terminal_polyfill" name = "is_terminal_polyfill"
version = "1.70.1" version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]] [[package]]
name = "itertools" name = "itertools"
@@ -2133,9 +2136,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
[[package]] [[package]]
name = "litrs" name = "litrs"
version = "0.4.2" version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed" checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
[[package]] [[package]]
name = "llama_cpp" name = "llama_cpp"
@@ -2251,14 +2254,14 @@ dependencies = [
[[package]] [[package]]
name = "mio" name = "mio"
version = "1.0.4" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
dependencies = [ dependencies = [
"libc", "libc",
"log", "log",
"wasi", "wasi",
"windows-sys 0.59.0", "windows-sys 0.61.2",
] ]
[[package]] [[package]]
@@ -2330,7 +2333,7 @@ version = "0.50.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
dependencies = [ dependencies = [
"windows-sys 0.61.2", "windows-sys 0.59.0",
] ]
[[package]] [[package]]
@@ -2406,9 +2409,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]] [[package]]
name = "once_cell_polyfill" name = "once_cell_polyfill"
version = "1.70.1" version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]] [[package]]
name = "openssl" name = "openssl"
@@ -2627,9 +2630,9 @@ dependencies = [
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.101" version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
dependencies = [ dependencies = [
"unicode-ident", "unicode-ident",
] ]
@@ -2901,7 +2904,7 @@ dependencies = [
"errno", "errno",
"libc", "libc",
"linux-raw-sys 0.11.0", "linux-raw-sys 0.11.0",
"windows-sys 0.61.2", "windows-sys 0.52.0",
] ]
[[package]] [[package]]
@@ -3122,9 +3125,9 @@ dependencies = [
[[package]] [[package]]
name = "signal-hook-mio" name = "signal-hook-mio"
version = "0.2.4" version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
dependencies = [ dependencies = [
"libc", "libc",
"mio", "mio",
@@ -3226,9 +3229,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.107" version = "2.0.108"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@@ -3289,7 +3292,7 @@ dependencies = [
"getrandom 0.3.4", "getrandom 0.3.4",
"once_cell", "once_cell",
"rustix 1.1.2", "rustix 1.1.2",
"windows-sys 0.61.2", "windows-sys 0.52.0",
] ]
[[package]] [[package]]
@@ -3631,9 +3634,9 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.19" version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
[[package]] [[package]]
name = "unicode-segmentation" name = "unicode-segmentation"
@@ -3932,7 +3935,7 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
dependencies = [ dependencies = [
"windows-sys 0.61.2", "windows-sys 0.48.0",
] ]
[[package]] [[package]]

View File

@@ -1145,6 +1145,27 @@ async fn run_interactive_machine(
println!("{}", summary); println!("{}", summary);
continue; continue;
} }
"/readme" => {
println!("COMMAND: readme");
match agent.reload_readme() {
Ok(true) => println!("RESULT: README content reloaded successfully"),
Ok(false) => println!("RESULT: No README was loaded at startup, cannot reload"),
Err(e) => println!("ERROR: {}", e),
}
continue;
}
"/stats" => {
println!("COMMAND: stats");
let stats = agent.get_stats();
// Emit stats as structured data (name: value pairs)
println!("{}", stats);
continue;
}
"/help" => {
println!("COMMAND: help");
println!("AVAILABLE_COMMANDS: /compact /thinnify /readme /stats /help");
continue;
}
_ => { _ => {
println!("ERROR: Unknown command: {}", input); println!("ERROR: Unknown command: {}", input);
continue; continue;

View File

@@ -71,6 +71,7 @@ impl UiWriter for MachineUiWriter {
} }
fn print_agent_prompt(&self) { fn print_agent_prompt(&self) {
println!("AGENT_RESPONSE:");
let _ = io::stdout().flush(); let _ = io::stdout().flush();
} }

View File

@@ -325,10 +325,19 @@ impl ContextWindow {
/// Update token usage from provider response /// Update token usage from provider response
pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) { pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
// Add the tokens from this response to our running total // Always use the provider's count as the authoritative value
// The usage.total_tokens represents tokens used in this single API call // The provider knows best how many tokens were actually used
self.used_tokens += usage.total_tokens;
self.cumulative_tokens += usage.total_tokens; let old_used = self.used_tokens;
// Use the provider's total as the current used tokens
self.used_tokens = usage.total_tokens;
self.cumulative_tokens += usage.total_tokens - old_used;
info!(
"Updated token usage from provider - was: {}, now: {} (prompt={}, completion={}, total={})",
old_used, self.used_tokens, usage.prompt_tokens, usage.completion_tokens, usage.total_tokens
);
debug!( debug!(
"Added {} tokens from provider response (used: {}/{}, cumulative: {})", "Added {} tokens from provider response (used: {}/{}, cumulative: {})",
@@ -445,8 +454,18 @@ Format this as a detailed but concise summary that can be used to resume the con
if current_percentage >= 50 { if current_percentage >= 50 {
let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10% let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
if current_threshold > self.last_thinning_percentage && current_threshold <= 80 { if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
info!(
"Context thinning triggered - usage: {}% ({}/{} tokens), threshold: {}%, last thinned at: {}%",
current_percentage,
self.used_tokens,
self.total_tokens,
current_threshold,
self.last_thinning_percentage
);
return true; return true;
} }
} else {
debug!("Context usage at {}% ({}/{} tokens) - no thinning needed", current_percentage, self.used_tokens, self.total_tokens);
} }
false false
@@ -2675,7 +2694,12 @@ Template:
// Display tool execution result with proper indentation // Display tool execution result with proper indentation
if tool_call.tool != "final_output" { if tool_call.tool != "final_output" {
let output_lines: Vec<&str> = tool_result.lines().collect(); // Skip displaying output for shell tool since it was already streamed
let should_display_output = tool_call.tool != "shell";
let output_lines: Vec<&str> = if should_display_output {
tool_result.lines().collect()
} else { vec![] };
// Check if UI wants full output (machine mode) or truncated (human mode) // Check if UI wants full output (machine mode) or truncated (human mode)
let wants_full = self.ui_writer.wants_full_output(); let wants_full = self.ui_writer.wants_full_output();
@@ -2722,7 +2746,8 @@ Template:
// Check if this was a final_output tool call // Check if this was a final_output tool call
if tool_call.tool == "final_output" { if tool_call.tool == "final_output" {
full_response.push_str(final_display_content); // Don't add final_display_content here - it was already added before tool execution
// Adding it again would duplicate the output
if let Some(summary) = tool_call.args.get("summary") { if let Some(summary) = tool_call.args.get("summary") {
if let Some(summary_str) = summary.as_str() { if let Some(summary_str) = summary.as_str() {
full_response.push_str(&format!("\n\n{}", summary_str)); full_response.push_str(&format!("\n\n{}", summary_str));
@@ -3186,13 +3211,16 @@ Template:
{ {
Ok(result) => { Ok(result) => {
if result.success { if result.success {
Ok(if result.stdout.is_empty() { // Don't return stdout - it was already streamed to the UI
"✅ Command executed successfully".to_string() // Returning it would cause duplicate output
} else { Ok("✅ Command executed successfully".to_string())
result.stdout.trim().to_string()
})
} else { } else {
Ok(format!("❌ Command failed: {}", result.stderr.trim())) // For errors, return stderr since it wasn't streamed
Ok(if result.stderr.is_empty() {
"❌ Command failed".to_string()
} else {
format!("❌ Command failed: {}", result.stderr.trim())
})
} }
} }
Err(e) => Ok(format!("❌ Execution error: {}", e)), Err(e) => Ok(format!("❌ Execution error: {}", e)),

View File

@@ -276,6 +276,7 @@ impl AnthropicProvider {
let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls
let mut accumulated_usage: Option<Usage> = None; let mut accumulated_usage: Option<Usage> = None;
let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
let mut actual_completion_tokens: u32 = 0; // Track actual completion tokens
while let Some(chunk_result) = stream.next().await { while let Some(chunk_result) = stream.next().await {
match chunk_result { match chunk_result {
@@ -323,7 +324,12 @@ impl AnthropicProvider {
let final_chunk = CompletionChunk { let final_chunk = CompletionChunk {
content: String::new(), content: String::new(),
finished: true, finished: true,
usage: accumulated_usage.clone(), usage: accumulated_usage.as_ref().map(|u| Usage {
prompt_tokens: u.prompt_tokens,
// Use actual completion tokens if we tracked them, otherwise use the estimate
completion_tokens: if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
total_tokens: u.prompt_tokens + if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
}),
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) }, tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
}; };
if tx.send(Ok(final_chunk)).await.is_err() { if tx.send(Ok(final_chunk)).await.is_err() {
@@ -337,6 +343,7 @@ impl AnthropicProvider {
match serde_json::from_str::<AnthropicStreamEvent>(data) { match serde_json::from_str::<AnthropicStreamEvent>(data) {
Ok(event) => { Ok(event) => {
debug!("Parsed event type: {}, event: {:?}", event.event_type, event); debug!("Parsed event type: {}, event: {:?}", event.event_type, event);
match event.event_type.as_str() { match event.event_type.as_str() {
"message_start" => { "message_start" => {
// Extract usage data from message_start event // Extract usage data from message_start event
@@ -347,7 +354,10 @@ impl AnthropicProvider {
completion_tokens: usage.output_tokens, completion_tokens: usage.output_tokens,
total_tokens: usage.input_tokens + usage.output_tokens, total_tokens: usage.input_tokens + usage.output_tokens,
}); });
debug!("Captured usage from message_start: {:?}", accumulated_usage); debug!("Captured initial usage from message_start - prompt: {}, completion: {} (estimated), total: {}",
usage.input_tokens,
usage.output_tokens,
usage.input_tokens + usage.output_tokens);
} }
} }
} }
@@ -396,6 +406,9 @@ impl AnthropicProvider {
"content_block_delta" => { "content_block_delta" => {
if let Some(delta) = event.delta { if let Some(delta) = event.delta {
if let Some(text) = delta.text { if let Some(text) = delta.text {
// Track actual completion tokens (rough estimate: 4 chars per token)
actual_completion_tokens += (text.len() as f32 / 4.0).ceil() as u32;
debug!("Sending text chunk of length {}: '{}'", text.len(), text); debug!("Sending text chunk of length {}: '{}'", text.len(), text);
let chunk = CompletionChunk { let chunk = CompletionChunk {
content: text, content: text,
@@ -416,6 +429,19 @@ impl AnthropicProvider {
} }
} }
} }
"message_delta" => {
// Check if message_delta contains updated usage data
if let Some(delta) = event.delta {
if let Some(usage) = delta.usage {
accumulated_usage = Some(Usage {
prompt_tokens: usage.input_tokens,
completion_tokens: usage.output_tokens,
total_tokens: usage.input_tokens + usage.output_tokens,
});
debug!("Updated usage from message_delta - prompt: {}, completion: {}, total: {}", usage.input_tokens, usage.output_tokens, usage.input_tokens + usage.output_tokens);
}
}
}
"content_block_stop" => { "content_block_stop" => {
// Tool call block is complete - now parse the accumulated JSON // Tool call block is complete - now parse the accumulated JSON
if !current_tool_calls.is_empty() && !partial_tool_json.is_empty() { if !current_tool_calls.is_empty() && !partial_tool_json.is_empty() {
@@ -450,11 +476,44 @@ impl AnthropicProvider {
} }
} }
"message_stop" => { "message_stop" => {
debug!("Received message stop event"); debug!("Received message_stop event: {:?}", event);
// Check if message_stop contains final usage data
if let Some(message) = event.message {
if let Some(usage) = message.usage {
// Update with final accurate usage data from message_stop
// This should have the actual completion token count
accumulated_usage = Some(Usage {
prompt_tokens: usage.input_tokens,
// Prefer the actual output_tokens from message_stop if available
// Otherwise use our tracked count, and as last resort the initial estimate
completion_tokens: if usage.output_tokens > 0 {
usage.output_tokens
} else if actual_completion_tokens > 0 {
actual_completion_tokens
} else { usage.output_tokens },
total_tokens: usage.input_tokens + usage.output_tokens,
});
debug!("Updated with final usage from message_stop - prompt: {}, completion: {}, total: {}",
usage.input_tokens,
usage.output_tokens,
usage.input_tokens + usage.output_tokens);
}
}
let final_chunk = CompletionChunk { let final_chunk = CompletionChunk {
content: String::new(), content: String::new(),
finished: true, finished: true,
usage: accumulated_usage.clone(), usage: accumulated_usage.as_ref().map(|u| Usage {
prompt_tokens: u.prompt_tokens,
// Use actual completion tokens if we tracked them and they're higher
completion_tokens: if actual_completion_tokens > u.completion_tokens {
actual_completion_tokens
} else {
u.completion_tokens
},
total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
}),
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) }, tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
}; };
if tx.send(Ok(final_chunk)).await.is_err() { if tx.send(Ok(final_chunk)).await.is_err() {
@@ -496,10 +555,27 @@ impl AnthropicProvider {
let final_chunk = CompletionChunk { let final_chunk = CompletionChunk {
content: String::new(), content: String::new(),
finished: true, finished: true,
usage: accumulated_usage.clone(), usage: accumulated_usage.as_ref().map(|u| Usage {
prompt_tokens: u.prompt_tokens,
completion_tokens: if actual_completion_tokens > u.completion_tokens {
actual_completion_tokens
} else {
u.completion_tokens
},
total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
}),
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls) }, tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls) },
}; };
let _ = tx.send(Ok(final_chunk)).await; let _ = tx.send(Ok(final_chunk)).await;
// Log final usage for debugging
if let Some(ref usage) = accumulated_usage {
info!("Anthropic stream completed with final usage - prompt: {}, completion: {}, total: {}",
usage.prompt_tokens, usage.completion_tokens, usage.total_tokens);
} else {
warn!("Anthropic stream completed without usage data - token accounting will fall back to estimation");
}
accumulated_usage accumulated_usage
} }
} }
@@ -737,6 +813,8 @@ struct AnthropicStreamMessage {
struct AnthropicDelta { struct AnthropicDelta {
text: Option<String>, text: Option<String>,
partial_json: Option<String>, partial_json: Option<String>,
#[serde(default)]
usage: Option<AnthropicUsage>,
} }
#[derive(Debug, Deserialize)] #[derive(Debug, Deserialize)]

164
test_token_accounting.py Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/env python3
"""
Test script to verify token accounting is working correctly with the Anthropic provider.
This script will send multiple messages and verify that token counts accumulate properly.
"""
import subprocess
import json
import re
import sys
import time
def run_g3_command(prompt, provider="anthropic"):
"""Run a g3 command and capture the output."""
cmd = [
"cargo", "run", "--release", "--",
"--provider", provider,
prompt
]
env = {
"RUST_LOG": "g3_providers=debug,g3_core=info",
"RUST_BACKTRACE": "1"
}
result = subprocess.run(
cmd,
capture_output=True,
text=True,
env={**subprocess.os.environ, **env}
)
return result.stdout + result.stderr
def extract_token_info(output):
"""Extract token usage information from the output."""
token_info = {}
# Look for token usage updates
usage_pattern = r"Updated token usage.*was: (\d+), now: (\d+).*prompt=(\d+), completion=(\d+), total=(\d+)"
matches = re.findall(usage_pattern, output)
if matches:
last_match = matches[-1]
token_info['was'] = int(last_match[0])
token_info['now'] = int(last_match[1])
token_info['prompt'] = int(last_match[2])
token_info['completion'] = int(last_match[3])
token_info['total'] = int(last_match[4])
# Look for context percentage
context_pattern = r"Context usage at (\d+)%.*\((\d+)/(\d+) tokens\)"
matches = re.findall(context_pattern, output)
if matches:
last_match = matches[-1]
token_info['percentage'] = int(last_match[0])
token_info['used'] = int(last_match[1])
token_info['total_context'] = int(last_match[2])
# Look for thinning triggers
thinning_pattern = r"Context thinning triggered.*usage: (\d+)%.*\((\d+)/(\d+) tokens\)"
matches = re.findall(thinning_pattern, output)
if matches:
token_info['thinning_triggered'] = True
token_info['thinning_percentage'] = int(matches[-1][0])
# Look for final usage from Anthropic
final_usage_pattern = r"Anthropic stream completed with final usage.*prompt: (\d+), completion: (\d+), total: (\d+)"
matches = re.findall(final_usage_pattern, output)
if matches:
last_match = matches[-1]
token_info['final_prompt'] = int(last_match[0])
token_info['final_completion'] = int(last_match[1])
token_info['final_total'] = int(last_match[2])
return token_info
def main():
print("Testing Anthropic Provider Token Accounting")
print("="*50)
# Build the project first
print("Building project...")
subprocess.run(["cargo", "build", "--release"], capture_output=True)
# Test 1: Simple prompt
print("\nTest 1: Simple prompt")
print("-"*30)
output = run_g3_command("Say 'Hello, World!' and nothing else.")
tokens = extract_token_info(output)
if tokens:
print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
print(f" Prompt tokens: {tokens.get('prompt', 'N/A')}")
print(f" Completion tokens: {tokens.get('completion', 'N/A')}")
print(f" Total from provider: {tokens.get('total', 'N/A')}")
if 'final_total' in tokens:
print(f" Final total from stream: {tokens['final_total']}")
if tokens.get('now') != tokens['final_total']:
print(f" ⚠️ WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
# Check if the completion tokens are reasonable (should be small for "Hello, World!")
if tokens.get('completion', 0) > 50:
print(f" ⚠️ WARNING: Completion tokens seem high for a simple response: {tokens.get('completion')}")
else:
print(" ❌ No token information found in output")
# Test 2: Longer response
print("\nTest 2: Longer response")
print("-"*30)
output = run_g3_command("Write a 3-paragraph essay about the importance of accurate token counting in LLM applications.")
tokens = extract_token_info(output)
if tokens:
print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
print(f" Prompt tokens: {tokens.get('prompt', 'N/A')}")
print(f" Completion tokens: {tokens.get('completion', 'N/A')}")
print(f" Total from provider: {tokens.get('total', 'N/A')}")
if 'final_total' in tokens:
print(f" Final total from stream: {tokens['final_total']}")
if tokens.get('now') != tokens['final_total']:
print(f" ⚠️ WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
# Check if completion tokens are reasonable for a longer response
if tokens.get('completion', 0) < 100:
print(f" ⚠️ WARNING: Completion tokens seem low for a 3-paragraph essay: {tokens.get('completion')}")
else:
print(" ❌ No token information found in output")
# Test 3: Check for proper accumulation
print("\nTest 3: Token accumulation (multiple messages)")
print("-"*30)
# First message
output1 = run_g3_command("Count from 1 to 5.")
tokens1 = extract_token_info(output1)
# Second message (this would need to be in a conversation, but for now we test separately)
output2 = run_g3_command("Now count from 6 to 10.")
tokens2 = extract_token_info(output2)
if tokens1 and tokens2:
print(f"First message: {tokens1.get('now', 'N/A')} tokens")
print(f"Second message: {tokens2.get('now', 'N/A')} tokens")
# In a real conversation, tokens2['now'] should be greater than tokens1['now']
# But since these are separate invocations, we just check they're both reasonable
if tokens1.get('now', 0) > 0 and tokens2.get('now', 0) > 0:
print(" ✅ Both messages have token counts")
else:
print(" ❌ Missing token counts")
print("\n" + "="*50)
print("Test Summary:")
print("Check the output above for any warnings or errors.")
print("Key things to verify:")
print(" 1. Token counts are being captured from the provider")
print(" 2. Completion tokens are reasonable for the response length")
print(" 3. No mismatch between tracked and final token counts")
print(" 4. Context thinning triggers at appropriate thresholds")
if __name__ == "__main__":
main()

46
test_token_accounting.sh Executable file
View File

@@ -0,0 +1,46 @@
#!/bin/bash
# Test script to verify token accounting with Anthropic provider
echo "Testing token accounting with Anthropic provider..."
echo "This test will send a few messages and check if token counts are properly tracked."
echo ""
# Set up environment for testing
export RUST_LOG=g3_providers=debug,g3_core=info
export RUST_BACKTRACE=1
# Build the project first
echo "Building project..."
cargo build --release 2>&1 | grep -E "(Compiling|Finished)" || true
echo ""
echo "Running test with Anthropic provider..."
echo "Watch for these log messages:"
echo " - 'Captured initial usage from message_start'"
echo " - 'Updated usage from message_delta' (if available)"
echo " - 'Updated with final usage from message_stop' (if available)"
echo " - 'Anthropic stream completed with final usage'"
echo " - 'Updated token usage from provider'"
echo " - 'Context thinning triggered' (when reaching thresholds)"
echo ""
# Create a simple test that will generate some tokens
cat << 'EOF' > /tmp/test_prompt.txt
Please write a short paragraph about the importance of accurate token counting in LLM applications. Then list 3 reasons why token accounting might fail.
EOF
# Run the test
echo "Sending test prompt..."
cargo run --release -- --provider anthropic "$(cat /tmp/test_prompt.txt)" 2>&1 | tee /tmp/token_test.log
echo ""
echo "Analyzing results..."
echo ""
# Check for token accounting messages
echo "Token accounting messages found:"
grep -E "(usage from|token usage|Context thinning|Context usage)" /tmp/token_test.log | head -20
echo ""
echo "Test complete. Check /tmp/token_test.log for full output."