prefer provider count

2025-10-22 15:09:47 +11:00
parent 442ca76cd6
commit b3d18d02ea
1 changed files with 6 additions and 13 deletions
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -319,24 +319,17 @@ impl ContextWindow {
    /// Update token usage from provider response
    pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
-        // The provider's usage represents the tokens used in the last API call
+        // Always use the provider's count as the authoritative value
-        // We need to be smarter about how we update our running total
+        // The provider knows best how many tokens were actually used
        let old_used = self.used_tokens;
-        // If the provider's total is greater than our current count, use it as the authoritative value
+        // Use the provider's total as the current used tokens
-        // This handles cases where our estimation was off
+        self.used_tokens = usage.total_tokens;
-        if usage.total_tokens > self.used_tokens {
+        self.cumulative_tokens += usage.total_tokens - old_used;
            self.used_tokens = usage.total_tokens;
            self.cumulative_tokens += usage.total_tokens - old_used;
        } else {
            // Otherwise, add the tokens from this response
            self.used_tokens += usage.completion_tokens; // Add only the new completion tokens
            self.cumulative_tokens += usage.completion_tokens;
        }
        info!(
-            "Updated token usage - was: {}, now: {} (provider reported: prompt={}, completion={}, total={})",
+            "Updated token usage from provider - was: {}, now: {} (prompt={}, completion={}, total={})",
            old_used, self.used_tokens, usage.prompt_tokens, usage.completion_tokens, usage.total_tokens
        );