Compare commits
7 Commits
micn/testi
...
micn/fix-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a457d46446 | ||
|
|
7c2c433746 | ||
|
|
98f4220544 | ||
|
|
a4476a555c | ||
|
|
b3d18d02ea | ||
|
|
442ca76cd6 | ||
|
|
738c3ac53e |
75
Cargo.lock
generated
75
Cargo.lock
generated
@@ -318,9 +318,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.2.41"
|
version = "1.2.43"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
|
checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"find-msvc-tools",
|
"find-msvc-tools",
|
||||||
"jobserver",
|
"jobserver",
|
||||||
@@ -900,9 +900,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "deranged"
|
name = "deranged"
|
||||||
version = "0.5.4"
|
version = "0.5.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071"
|
checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"powerfmt",
|
"powerfmt",
|
||||||
]
|
]
|
||||||
@@ -990,7 +990,7 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
"option-ext",
|
"option-ext",
|
||||||
"redox_users 0.5.2",
|
"redox_users 0.5.2",
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1015,9 +1015,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "document-features"
|
name = "document-features"
|
||||||
version = "0.2.11"
|
version = "0.2.12"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d"
|
checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"litrs",
|
"litrs",
|
||||||
]
|
]
|
||||||
@@ -1062,7 +1062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1144,9 +1144,9 @@ checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "flate2"
|
name = "flate2"
|
||||||
version = "1.1.4"
|
version = "1.1.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
|
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crc32fast",
|
"crc32fast",
|
||||||
"miniz_oxide",
|
"miniz_oxide",
|
||||||
@@ -1571,11 +1571,11 @@ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "home"
|
name = "home"
|
||||||
version = "0.5.11"
|
version = "0.5.9"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
|
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -1922,9 +1922,12 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indoc"
|
name = "indoc"
|
||||||
version = "2.0.6"
|
version = "2.0.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
|
||||||
|
dependencies = [
|
||||||
|
"rustversion",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "instability"
|
name = "instability"
|
||||||
@@ -1947,9 +1950,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "is_terminal_polyfill"
|
name = "is_terminal_polyfill"
|
||||||
version = "1.70.1"
|
version = "1.70.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itertools"
|
name = "itertools"
|
||||||
@@ -2133,9 +2136,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "litrs"
|
name = "litrs"
|
||||||
version = "0.4.2"
|
version = "1.0.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed"
|
checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "llama_cpp"
|
name = "llama_cpp"
|
||||||
@@ -2251,14 +2254,14 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mio"
|
name = "mio"
|
||||||
version = "1.0.4"
|
version = "1.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
|
checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"log",
|
"log",
|
||||||
"wasi",
|
"wasi",
|
||||||
"windows-sys 0.59.0",
|
"windows-sys 0.61.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2330,7 +2333,7 @@ version = "0.50.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2406,9 +2409,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell_polyfill"
|
name = "once_cell_polyfill"
|
||||||
version = "1.70.1"
|
version = "1.70.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "openssl"
|
name = "openssl"
|
||||||
@@ -2627,9 +2630,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.101"
|
version = "1.0.103"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
|
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
@@ -2901,7 +2904,7 @@ dependencies = [
|
|||||||
"errno",
|
"errno",
|
||||||
"libc",
|
"libc",
|
||||||
"linux-raw-sys 0.11.0",
|
"linux-raw-sys 0.11.0",
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3122,9 +3125,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook-mio"
|
name = "signal-hook-mio"
|
||||||
version = "0.2.4"
|
version = "0.2.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd"
|
checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"mio",
|
"mio",
|
||||||
@@ -3226,9 +3229,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "2.0.107"
|
version = "2.0.108"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
|
checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -3289,7 +3292,7 @@ dependencies = [
|
|||||||
"getrandom 0.3.4",
|
"getrandom 0.3.4",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
"rustix 1.1.2",
|
"rustix 1.1.2",
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.52.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -3631,9 +3634,9 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.19"
|
version = "1.0.20"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
|
checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-segmentation"
|
name = "unicode-segmentation"
|
||||||
@@ -3932,7 +3935,7 @@ version = "0.1.11"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"windows-sys 0.61.2",
|
"windows-sys 0.48.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
@@ -1145,6 +1145,27 @@ async fn run_interactive_machine(
|
|||||||
println!("{}", summary);
|
println!("{}", summary);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
"/readme" => {
|
||||||
|
println!("COMMAND: readme");
|
||||||
|
match agent.reload_readme() {
|
||||||
|
Ok(true) => println!("RESULT: README content reloaded successfully"),
|
||||||
|
Ok(false) => println!("RESULT: No README was loaded at startup, cannot reload"),
|
||||||
|
Err(e) => println!("ERROR: {}", e),
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
"/stats" => {
|
||||||
|
println!("COMMAND: stats");
|
||||||
|
let stats = agent.get_stats();
|
||||||
|
// Emit stats as structured data (name: value pairs)
|
||||||
|
println!("{}", stats);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
"/help" => {
|
||||||
|
println!("COMMAND: help");
|
||||||
|
println!("AVAILABLE_COMMANDS: /compact /thinnify /readme /stats /help");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
_ => {
|
_ => {
|
||||||
println!("ERROR: Unknown command: {}", input);
|
println!("ERROR: Unknown command: {}", input);
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ impl UiWriter for MachineUiWriter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn print_agent_prompt(&self) {
|
fn print_agent_prompt(&self) {
|
||||||
|
println!("AGENT_RESPONSE:");
|
||||||
let _ = io::stdout().flush();
|
let _ = io::stdout().flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -325,10 +325,19 @@ impl ContextWindow {
|
|||||||
|
|
||||||
/// Update token usage from provider response
|
/// Update token usage from provider response
|
||||||
pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
|
pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
|
||||||
// Add the tokens from this response to our running total
|
// Always use the provider's count as the authoritative value
|
||||||
// The usage.total_tokens represents tokens used in this single API call
|
// The provider knows best how many tokens were actually used
|
||||||
self.used_tokens += usage.total_tokens;
|
|
||||||
self.cumulative_tokens += usage.total_tokens;
|
let old_used = self.used_tokens;
|
||||||
|
|
||||||
|
// Use the provider's total as the current used tokens
|
||||||
|
self.used_tokens = usage.total_tokens;
|
||||||
|
self.cumulative_tokens += usage.total_tokens - old_used;
|
||||||
|
|
||||||
|
info!(
|
||||||
|
"Updated token usage from provider - was: {}, now: {} (prompt={}, completion={}, total={})",
|
||||||
|
old_used, self.used_tokens, usage.prompt_tokens, usage.completion_tokens, usage.total_tokens
|
||||||
|
);
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Added {} tokens from provider response (used: {}/{}, cumulative: {})",
|
"Added {} tokens from provider response (used: {}/{}, cumulative: {})",
|
||||||
@@ -445,8 +454,18 @@ Format this as a detailed but concise summary that can be used to resume the con
|
|||||||
if current_percentage >= 50 {
|
if current_percentage >= 50 {
|
||||||
let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
|
let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
|
||||||
if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
|
if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
|
||||||
|
info!(
|
||||||
|
"Context thinning triggered - usage: {}% ({}/{} tokens), threshold: {}%, last thinned at: {}%",
|
||||||
|
current_percentage,
|
||||||
|
self.used_tokens,
|
||||||
|
self.total_tokens,
|
||||||
|
current_threshold,
|
||||||
|
self.last_thinning_percentage
|
||||||
|
);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
debug!("Context usage at {}% ({}/{} tokens) - no thinning needed", current_percentage, self.used_tokens, self.total_tokens);
|
||||||
}
|
}
|
||||||
|
|
||||||
false
|
false
|
||||||
@@ -2675,7 +2694,12 @@ Template:
|
|||||||
|
|
||||||
// Display tool execution result with proper indentation
|
// Display tool execution result with proper indentation
|
||||||
if tool_call.tool != "final_output" {
|
if tool_call.tool != "final_output" {
|
||||||
let output_lines: Vec<&str> = tool_result.lines().collect();
|
// Skip displaying output for shell tool since it was already streamed
|
||||||
|
let should_display_output = tool_call.tool != "shell";
|
||||||
|
|
||||||
|
let output_lines: Vec<&str> = if should_display_output {
|
||||||
|
tool_result.lines().collect()
|
||||||
|
} else { vec![] };
|
||||||
|
|
||||||
// Check if UI wants full output (machine mode) or truncated (human mode)
|
// Check if UI wants full output (machine mode) or truncated (human mode)
|
||||||
let wants_full = self.ui_writer.wants_full_output();
|
let wants_full = self.ui_writer.wants_full_output();
|
||||||
@@ -2722,7 +2746,8 @@ Template:
|
|||||||
|
|
||||||
// Check if this was a final_output tool call
|
// Check if this was a final_output tool call
|
||||||
if tool_call.tool == "final_output" {
|
if tool_call.tool == "final_output" {
|
||||||
full_response.push_str(final_display_content);
|
// Don't add final_display_content here - it was already added before tool execution
|
||||||
|
// Adding it again would duplicate the output
|
||||||
if let Some(summary) = tool_call.args.get("summary") {
|
if let Some(summary) = tool_call.args.get("summary") {
|
||||||
if let Some(summary_str) = summary.as_str() {
|
if let Some(summary_str) = summary.as_str() {
|
||||||
full_response.push_str(&format!("\n\n{}", summary_str));
|
full_response.push_str(&format!("\n\n{}", summary_str));
|
||||||
@@ -3186,13 +3211,16 @@ Template:
|
|||||||
{
|
{
|
||||||
Ok(result) => {
|
Ok(result) => {
|
||||||
if result.success {
|
if result.success {
|
||||||
Ok(if result.stdout.is_empty() {
|
// Don't return stdout - it was already streamed to the UI
|
||||||
"✅ Command executed successfully".to_string()
|
// Returning it would cause duplicate output
|
||||||
} else {
|
Ok("✅ Command executed successfully".to_string())
|
||||||
result.stdout.trim().to_string()
|
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
Ok(format!("❌ Command failed: {}", result.stderr.trim()))
|
// For errors, return stderr since it wasn't streamed
|
||||||
|
Ok(if result.stderr.is_empty() {
|
||||||
|
"❌ Command failed".to_string()
|
||||||
|
} else {
|
||||||
|
format!("❌ Command failed: {}", result.stderr.trim())
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(e) => Ok(format!("❌ Execution error: {}", e)),
|
Err(e) => Ok(format!("❌ Execution error: {}", e)),
|
||||||
|
|||||||
@@ -276,6 +276,7 @@ impl AnthropicProvider {
|
|||||||
let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls
|
let mut partial_tool_json = String::new(); // Accumulate partial JSON for tool calls
|
||||||
let mut accumulated_usage: Option<Usage> = None;
|
let mut accumulated_usage: Option<Usage> = None;
|
||||||
let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
|
let mut byte_buffer = Vec::new(); // Buffer for incomplete UTF-8 sequences
|
||||||
|
let mut actual_completion_tokens: u32 = 0; // Track actual completion tokens
|
||||||
|
|
||||||
while let Some(chunk_result) = stream.next().await {
|
while let Some(chunk_result) = stream.next().await {
|
||||||
match chunk_result {
|
match chunk_result {
|
||||||
@@ -323,7 +324,12 @@ impl AnthropicProvider {
|
|||||||
let final_chunk = CompletionChunk {
|
let final_chunk = CompletionChunk {
|
||||||
content: String::new(),
|
content: String::new(),
|
||||||
finished: true,
|
finished: true,
|
||||||
usage: accumulated_usage.clone(),
|
usage: accumulated_usage.as_ref().map(|u| Usage {
|
||||||
|
prompt_tokens: u.prompt_tokens,
|
||||||
|
// Use actual completion tokens if we tracked them, otherwise use the estimate
|
||||||
|
completion_tokens: if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
|
||||||
|
total_tokens: u.prompt_tokens + if actual_completion_tokens > 0 { actual_completion_tokens } else { u.completion_tokens },
|
||||||
|
}),
|
||||||
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
|
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
|
||||||
};
|
};
|
||||||
if tx.send(Ok(final_chunk)).await.is_err() {
|
if tx.send(Ok(final_chunk)).await.is_err() {
|
||||||
@@ -337,6 +343,7 @@ impl AnthropicProvider {
|
|||||||
match serde_json::from_str::<AnthropicStreamEvent>(data) {
|
match serde_json::from_str::<AnthropicStreamEvent>(data) {
|
||||||
Ok(event) => {
|
Ok(event) => {
|
||||||
debug!("Parsed event type: {}, event: {:?}", event.event_type, event);
|
debug!("Parsed event type: {}, event: {:?}", event.event_type, event);
|
||||||
|
|
||||||
match event.event_type.as_str() {
|
match event.event_type.as_str() {
|
||||||
"message_start" => {
|
"message_start" => {
|
||||||
// Extract usage data from message_start event
|
// Extract usage data from message_start event
|
||||||
@@ -347,7 +354,10 @@ impl AnthropicProvider {
|
|||||||
completion_tokens: usage.output_tokens,
|
completion_tokens: usage.output_tokens,
|
||||||
total_tokens: usage.input_tokens + usage.output_tokens,
|
total_tokens: usage.input_tokens + usage.output_tokens,
|
||||||
});
|
});
|
||||||
debug!("Captured usage from message_start: {:?}", accumulated_usage);
|
debug!("Captured initial usage from message_start - prompt: {}, completion: {} (estimated), total: {}",
|
||||||
|
usage.input_tokens,
|
||||||
|
usage.output_tokens,
|
||||||
|
usage.input_tokens + usage.output_tokens);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -396,6 +406,9 @@ impl AnthropicProvider {
|
|||||||
"content_block_delta" => {
|
"content_block_delta" => {
|
||||||
if let Some(delta) = event.delta {
|
if let Some(delta) = event.delta {
|
||||||
if let Some(text) = delta.text {
|
if let Some(text) = delta.text {
|
||||||
|
// Track actual completion tokens (rough estimate: 4 chars per token)
|
||||||
|
actual_completion_tokens += (text.len() as f32 / 4.0).ceil() as u32;
|
||||||
|
|
||||||
debug!("Sending text chunk of length {}: '{}'", text.len(), text);
|
debug!("Sending text chunk of length {}: '{}'", text.len(), text);
|
||||||
let chunk = CompletionChunk {
|
let chunk = CompletionChunk {
|
||||||
content: text,
|
content: text,
|
||||||
@@ -416,6 +429,19 @@ impl AnthropicProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
"message_delta" => {
|
||||||
|
// Check if message_delta contains updated usage data
|
||||||
|
if let Some(delta) = event.delta {
|
||||||
|
if let Some(usage) = delta.usage {
|
||||||
|
accumulated_usage = Some(Usage {
|
||||||
|
prompt_tokens: usage.input_tokens,
|
||||||
|
completion_tokens: usage.output_tokens,
|
||||||
|
total_tokens: usage.input_tokens + usage.output_tokens,
|
||||||
|
});
|
||||||
|
debug!("Updated usage from message_delta - prompt: {}, completion: {}, total: {}", usage.input_tokens, usage.output_tokens, usage.input_tokens + usage.output_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
"content_block_stop" => {
|
"content_block_stop" => {
|
||||||
// Tool call block is complete - now parse the accumulated JSON
|
// Tool call block is complete - now parse the accumulated JSON
|
||||||
if !current_tool_calls.is_empty() && !partial_tool_json.is_empty() {
|
if !current_tool_calls.is_empty() && !partial_tool_json.is_empty() {
|
||||||
@@ -450,11 +476,44 @@ impl AnthropicProvider {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
"message_stop" => {
|
"message_stop" => {
|
||||||
debug!("Received message stop event");
|
debug!("Received message_stop event: {:?}", event);
|
||||||
|
|
||||||
|
// Check if message_stop contains final usage data
|
||||||
|
if let Some(message) = event.message {
|
||||||
|
if let Some(usage) = message.usage {
|
||||||
|
// Update with final accurate usage data from message_stop
|
||||||
|
// This should have the actual completion token count
|
||||||
|
accumulated_usage = Some(Usage {
|
||||||
|
prompt_tokens: usage.input_tokens,
|
||||||
|
// Prefer the actual output_tokens from message_stop if available
|
||||||
|
// Otherwise use our tracked count, and as last resort the initial estimate
|
||||||
|
completion_tokens: if usage.output_tokens > 0 {
|
||||||
|
usage.output_tokens
|
||||||
|
} else if actual_completion_tokens > 0 {
|
||||||
|
actual_completion_tokens
|
||||||
|
} else { usage.output_tokens },
|
||||||
|
total_tokens: usage.input_tokens + usage.output_tokens,
|
||||||
|
});
|
||||||
|
debug!("Updated with final usage from message_stop - prompt: {}, completion: {}, total: {}",
|
||||||
|
usage.input_tokens,
|
||||||
|
usage.output_tokens,
|
||||||
|
usage.input_tokens + usage.output_tokens);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let final_chunk = CompletionChunk {
|
let final_chunk = CompletionChunk {
|
||||||
content: String::new(),
|
content: String::new(),
|
||||||
finished: true,
|
finished: true,
|
||||||
usage: accumulated_usage.clone(),
|
usage: accumulated_usage.as_ref().map(|u| Usage {
|
||||||
|
prompt_tokens: u.prompt_tokens,
|
||||||
|
// Use actual completion tokens if we tracked them and they're higher
|
||||||
|
completion_tokens: if actual_completion_tokens > u.completion_tokens {
|
||||||
|
actual_completion_tokens
|
||||||
|
} else {
|
||||||
|
u.completion_tokens
|
||||||
|
},
|
||||||
|
total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
|
||||||
|
}),
|
||||||
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
|
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls.clone()) },
|
||||||
};
|
};
|
||||||
if tx.send(Ok(final_chunk)).await.is_err() {
|
if tx.send(Ok(final_chunk)).await.is_err() {
|
||||||
@@ -496,10 +555,27 @@ impl AnthropicProvider {
|
|||||||
let final_chunk = CompletionChunk {
|
let final_chunk = CompletionChunk {
|
||||||
content: String::new(),
|
content: String::new(),
|
||||||
finished: true,
|
finished: true,
|
||||||
usage: accumulated_usage.clone(),
|
usage: accumulated_usage.as_ref().map(|u| Usage {
|
||||||
|
prompt_tokens: u.prompt_tokens,
|
||||||
|
completion_tokens: if actual_completion_tokens > u.completion_tokens {
|
||||||
|
actual_completion_tokens
|
||||||
|
} else {
|
||||||
|
u.completion_tokens
|
||||||
|
},
|
||||||
|
total_tokens: u.prompt_tokens + u32::max(actual_completion_tokens, u.completion_tokens),
|
||||||
|
}),
|
||||||
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls) },
|
tool_calls: if current_tool_calls.is_empty() { None } else { Some(current_tool_calls) },
|
||||||
};
|
};
|
||||||
let _ = tx.send(Ok(final_chunk)).await;
|
let _ = tx.send(Ok(final_chunk)).await;
|
||||||
|
|
||||||
|
// Log final usage for debugging
|
||||||
|
if let Some(ref usage) = accumulated_usage {
|
||||||
|
info!("Anthropic stream completed with final usage - prompt: {}, completion: {}, total: {}",
|
||||||
|
usage.prompt_tokens, usage.completion_tokens, usage.total_tokens);
|
||||||
|
} else {
|
||||||
|
warn!("Anthropic stream completed without usage data - token accounting will fall back to estimation");
|
||||||
|
}
|
||||||
|
|
||||||
accumulated_usage
|
accumulated_usage
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -737,6 +813,8 @@ struct AnthropicStreamMessage {
|
|||||||
struct AnthropicDelta {
|
struct AnthropicDelta {
|
||||||
text: Option<String>,
|
text: Option<String>,
|
||||||
partial_json: Option<String>,
|
partial_json: Option<String>,
|
||||||
|
#[serde(default)]
|
||||||
|
usage: Option<AnthropicUsage>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
|
|||||||
164
test_token_accounting.py
Normal file
164
test_token_accounting.py
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Test script to verify token accounting is working correctly with the Anthropic provider.
|
||||||
|
This script will send multiple messages and verify that token counts accumulate properly.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
|
def run_g3_command(prompt, provider="anthropic"):
|
||||||
|
"""Run a g3 command and capture the output."""
|
||||||
|
cmd = [
|
||||||
|
"cargo", "run", "--release", "--",
|
||||||
|
"--provider", provider,
|
||||||
|
prompt
|
||||||
|
]
|
||||||
|
|
||||||
|
env = {
|
||||||
|
"RUST_LOG": "g3_providers=debug,g3_core=info",
|
||||||
|
"RUST_BACKTRACE": "1"
|
||||||
|
}
|
||||||
|
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
env={**subprocess.os.environ, **env}
|
||||||
|
)
|
||||||
|
|
||||||
|
return result.stdout + result.stderr
|
||||||
|
|
||||||
|
def extract_token_info(output):
|
||||||
|
"""Extract token usage information from the output."""
|
||||||
|
token_info = {}
|
||||||
|
|
||||||
|
# Look for token usage updates
|
||||||
|
usage_pattern = r"Updated token usage.*was: (\d+), now: (\d+).*prompt=(\d+), completion=(\d+), total=(\d+)"
|
||||||
|
matches = re.findall(usage_pattern, output)
|
||||||
|
if matches:
|
||||||
|
last_match = matches[-1]
|
||||||
|
token_info['was'] = int(last_match[0])
|
||||||
|
token_info['now'] = int(last_match[1])
|
||||||
|
token_info['prompt'] = int(last_match[2])
|
||||||
|
token_info['completion'] = int(last_match[3])
|
||||||
|
token_info['total'] = int(last_match[4])
|
||||||
|
|
||||||
|
# Look for context percentage
|
||||||
|
context_pattern = r"Context usage at (\d+)%.*\((\d+)/(\d+) tokens\)"
|
||||||
|
matches = re.findall(context_pattern, output)
|
||||||
|
if matches:
|
||||||
|
last_match = matches[-1]
|
||||||
|
token_info['percentage'] = int(last_match[0])
|
||||||
|
token_info['used'] = int(last_match[1])
|
||||||
|
token_info['total_context'] = int(last_match[2])
|
||||||
|
|
||||||
|
# Look for thinning triggers
|
||||||
|
thinning_pattern = r"Context thinning triggered.*usage: (\d+)%.*\((\d+)/(\d+) tokens\)"
|
||||||
|
matches = re.findall(thinning_pattern, output)
|
||||||
|
if matches:
|
||||||
|
token_info['thinning_triggered'] = True
|
||||||
|
token_info['thinning_percentage'] = int(matches[-1][0])
|
||||||
|
|
||||||
|
# Look for final usage from Anthropic
|
||||||
|
final_usage_pattern = r"Anthropic stream completed with final usage.*prompt: (\d+), completion: (\d+), total: (\d+)"
|
||||||
|
matches = re.findall(final_usage_pattern, output)
|
||||||
|
if matches:
|
||||||
|
last_match = matches[-1]
|
||||||
|
token_info['final_prompt'] = int(last_match[0])
|
||||||
|
token_info['final_completion'] = int(last_match[1])
|
||||||
|
token_info['final_total'] = int(last_match[2])
|
||||||
|
|
||||||
|
return token_info
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print("Testing Anthropic Provider Token Accounting")
|
||||||
|
print("="*50)
|
||||||
|
|
||||||
|
# Build the project first
|
||||||
|
print("Building project...")
|
||||||
|
subprocess.run(["cargo", "build", "--release"], capture_output=True)
|
||||||
|
|
||||||
|
# Test 1: Simple prompt
|
||||||
|
print("\nTest 1: Simple prompt")
|
||||||
|
print("-"*30)
|
||||||
|
output = run_g3_command("Say 'Hello, World!' and nothing else.")
|
||||||
|
tokens = extract_token_info(output)
|
||||||
|
|
||||||
|
if tokens:
|
||||||
|
print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
|
||||||
|
print(f" Prompt tokens: {tokens.get('prompt', 'N/A')}")
|
||||||
|
print(f" Completion tokens: {tokens.get('completion', 'N/A')}")
|
||||||
|
print(f" Total from provider: {tokens.get('total', 'N/A')}")
|
||||||
|
|
||||||
|
if 'final_total' in tokens:
|
||||||
|
print(f" Final total from stream: {tokens['final_total']}")
|
||||||
|
if tokens.get('now') != tokens['final_total']:
|
||||||
|
print(f" ⚠️ WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
|
||||||
|
|
||||||
|
# Check if the completion tokens are reasonable (should be small for "Hello, World!")
|
||||||
|
if tokens.get('completion', 0) > 50:
|
||||||
|
print(f" ⚠️ WARNING: Completion tokens seem high for a simple response: {tokens.get('completion')}")
|
||||||
|
else:
|
||||||
|
print(" ❌ No token information found in output")
|
||||||
|
|
||||||
|
# Test 2: Longer response
|
||||||
|
print("\nTest 2: Longer response")
|
||||||
|
print("-"*30)
|
||||||
|
output = run_g3_command("Write a 3-paragraph essay about the importance of accurate token counting in LLM applications.")
|
||||||
|
tokens = extract_token_info(output)
|
||||||
|
|
||||||
|
if tokens:
|
||||||
|
print(f"Token usage: {tokens.get('now', 'N/A')} tokens")
|
||||||
|
print(f" Prompt tokens: {tokens.get('prompt', 'N/A')}")
|
||||||
|
print(f" Completion tokens: {tokens.get('completion', 'N/A')}")
|
||||||
|
print(f" Total from provider: {tokens.get('total', 'N/A')}")
|
||||||
|
|
||||||
|
if 'final_total' in tokens:
|
||||||
|
print(f" Final total from stream: {tokens['final_total']}")
|
||||||
|
if tokens.get('now') != tokens['final_total']:
|
||||||
|
print(f" ⚠️ WARNING: Mismatch between tracked ({tokens.get('now')}) and final ({tokens['final_total']})")
|
||||||
|
|
||||||
|
# Check if completion tokens are reasonable for a longer response
|
||||||
|
if tokens.get('completion', 0) < 100:
|
||||||
|
print(f" ⚠️ WARNING: Completion tokens seem low for a 3-paragraph essay: {tokens.get('completion')}")
|
||||||
|
else:
|
||||||
|
print(" ❌ No token information found in output")
|
||||||
|
|
||||||
|
# Test 3: Check for proper accumulation
|
||||||
|
print("\nTest 3: Token accumulation (multiple messages)")
|
||||||
|
print("-"*30)
|
||||||
|
|
||||||
|
# First message
|
||||||
|
output1 = run_g3_command("Count from 1 to 5.")
|
||||||
|
tokens1 = extract_token_info(output1)
|
||||||
|
|
||||||
|
# Second message (this would need to be in a conversation, but for now we test separately)
|
||||||
|
output2 = run_g3_command("Now count from 6 to 10.")
|
||||||
|
tokens2 = extract_token_info(output2)
|
||||||
|
|
||||||
|
if tokens1 and tokens2:
|
||||||
|
print(f"First message: {tokens1.get('now', 'N/A')} tokens")
|
||||||
|
print(f"Second message: {tokens2.get('now', 'N/A')} tokens")
|
||||||
|
|
||||||
|
# In a real conversation, tokens2['now'] should be greater than tokens1['now']
|
||||||
|
# But since these are separate invocations, we just check they're both reasonable
|
||||||
|
if tokens1.get('now', 0) > 0 and tokens2.get('now', 0) > 0:
|
||||||
|
print(" ✅ Both messages have token counts")
|
||||||
|
else:
|
||||||
|
print(" ❌ Missing token counts")
|
||||||
|
|
||||||
|
print("\n" + "="*50)
|
||||||
|
print("Test Summary:")
|
||||||
|
print("Check the output above for any warnings or errors.")
|
||||||
|
print("Key things to verify:")
|
||||||
|
print(" 1. Token counts are being captured from the provider")
|
||||||
|
print(" 2. Completion tokens are reasonable for the response length")
|
||||||
|
print(" 3. No mismatch between tracked and final token counts")
|
||||||
|
print(" 4. Context thinning triggers at appropriate thresholds")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
46
test_token_accounting.sh
Executable file
46
test_token_accounting.sh
Executable file
@@ -0,0 +1,46 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Test script to verify token accounting with Anthropic provider
|
||||||
|
|
||||||
|
echo "Testing token accounting with Anthropic provider..."
|
||||||
|
echo "This test will send a few messages and check if token counts are properly tracked."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Set up environment for testing
|
||||||
|
export RUST_LOG=g3_providers=debug,g3_core=info
|
||||||
|
export RUST_BACKTRACE=1
|
||||||
|
|
||||||
|
# Build the project first
|
||||||
|
echo "Building project..."
|
||||||
|
cargo build --release 2>&1 | grep -E "(Compiling|Finished)" || true
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Running test with Anthropic provider..."
|
||||||
|
echo "Watch for these log messages:"
|
||||||
|
echo " - 'Captured initial usage from message_start'"
|
||||||
|
echo " - 'Updated usage from message_delta' (if available)"
|
||||||
|
echo " - 'Updated with final usage from message_stop' (if available)"
|
||||||
|
echo " - 'Anthropic stream completed with final usage'"
|
||||||
|
echo " - 'Updated token usage from provider'"
|
||||||
|
echo " - 'Context thinning triggered' (when reaching thresholds)"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Create a simple test that will generate some tokens
|
||||||
|
cat << 'EOF' > /tmp/test_prompt.txt
|
||||||
|
Please write a short paragraph about the importance of accurate token counting in LLM applications. Then list 3 reasons why token accounting might fail.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Run the test
|
||||||
|
echo "Sending test prompt..."
|
||||||
|
cargo run --release -- --provider anthropic "$(cat /tmp/test_prompt.txt)" 2>&1 | tee /tmp/token_test.log
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Analyzing results..."
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Check for token accounting messages
|
||||||
|
echo "Token accounting messages found:"
|
||||||
|
grep -E "(usage from|token usage|Context thinning|Context usage)" /tmp/token_test.log | head -20
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Test complete. Check /tmp/token_test.log for full output."
|
||||||
Reference in New Issue
Block a user