From b3d18d02ea2d2ef2a9bcb70e9c7f310c353986f6 Mon Sep 17 00:00:00 2001 From: Michael Neale Date: Wed, 22 Oct 2025 15:09:47 +1100 Subject: [PATCH] prefer provider count --- crates/g3-core/src/lib.rs | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index b88bd4d..13308b8 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -319,24 +319,17 @@ impl ContextWindow { /// Update token usage from provider response pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) { - // The provider's usage represents the tokens used in the last API call - // We need to be smarter about how we update our running total + // Always use the provider's count as the authoritative value + // The provider knows best how many tokens were actually used let old_used = self.used_tokens; - // If the provider's total is greater than our current count, use it as the authoritative value - // This handles cases where our estimation was off - if usage.total_tokens > self.used_tokens { - self.used_tokens = usage.total_tokens; - self.cumulative_tokens += usage.total_tokens - old_used; - } else { - // Otherwise, add the tokens from this response - self.used_tokens += usage.completion_tokens; // Add only the new completion tokens - self.cumulative_tokens += usage.completion_tokens; - } + // Use the provider's total as the current used tokens + self.used_tokens = usage.total_tokens; + self.cumulative_tokens += usage.total_tokens - old_used; info!( - "Updated token usage - was: {}, now: {} (provider reported: prompt={}, completion={}, total={})", + "Updated token usage from provider - was: {}, now: {} (prompt={}, completion={}, total={})", old_used, self.used_tokens, usage.prompt_tokens, usage.completion_tokens, usage.total_tokens );