context window counting bug
This commit is contained in:
@@ -271,15 +271,14 @@ impl ContextWindow {
|
|||||||
|
|
||||||
/// Update token usage from provider response
|
/// Update token usage from provider response
|
||||||
pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
|
pub fn update_usage_from_response(&mut self, usage: &g3_providers::Usage) {
|
||||||
// Update with actual token usage from the provider
|
// Add the tokens from this response to our running total
|
||||||
// This replaces our estimate with the actual count
|
// The usage.total_tokens represents tokens used in this single API call
|
||||||
let old_used = self.used_tokens;
|
self.used_tokens += usage.total_tokens;
|
||||||
self.used_tokens = usage.total_tokens;
|
self.cumulative_tokens += usage.total_tokens;
|
||||||
self.cumulative_tokens = self.cumulative_tokens - old_used + usage.total_tokens;
|
|
||||||
|
|
||||||
debug!(
|
debug!(
|
||||||
"Updated token usage from provider: {} -> {} (cumulative: {})",
|
"Added {} tokens from provider response (used: {}/{}, cumulative: {})",
|
||||||
old_used, self.used_tokens, self.cumulative_tokens
|
usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,154 +1,94 @@
|
|||||||
use g3_core::ContextWindow;
|
use g3_core::ContextWindow;
|
||||||
use g3_providers::{Message, MessageRole, Usage};
|
use g3_providers::Usage;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_context_window_with_actual_tokens() {
|
fn test_token_accumulation() {
|
||||||
let mut context = ContextWindow::new(10000);
|
let mut window = ContextWindow::new(10000);
|
||||||
|
|
||||||
// Add a message with known token count
|
// First API call: 100 prompt + 50 completion = 150 total
|
||||||
let message = Message {
|
let usage1 = Usage {
|
||||||
role: MessageRole::User,
|
prompt_tokens: 100,
|
||||||
content: "Hello, how are you today?".to_string(),
|
completion_tokens: 50,
|
||||||
|
total_tokens: 150,
|
||||||
};
|
};
|
||||||
|
window.update_usage_from_response(&usage1);
|
||||||
// Add with actual token count (let's say this is 7 tokens)
|
assert_eq!(window.used_tokens, 150, "First call should have 150 tokens");
|
||||||
context.add_message_with_tokens(message.clone(), Some(7));
|
assert_eq!(window.cumulative_tokens, 150, "Cumulative should be 150");
|
||||||
|
|
||||||
assert_eq!(context.used_tokens, 7);
|
// Second API call: 200 prompt + 75 completion = 275 total
|
||||||
assert_eq!(context.cumulative_tokens, 7);
|
let usage2 = Usage {
|
||||||
|
prompt_tokens: 200,
|
||||||
// Add another message with estimation (no token count provided)
|
completion_tokens: 75,
|
||||||
let message2 = Message {
|
total_tokens: 275,
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: "I'm doing well, thank you for asking!".to_string(),
|
|
||||||
};
|
};
|
||||||
|
window.update_usage_from_response(&usage2);
|
||||||
|
assert_eq!(window.used_tokens, 425, "Second call should accumulate to 425 tokens");
|
||||||
|
assert_eq!(window.cumulative_tokens, 425, "Cumulative should be 425");
|
||||||
|
|
||||||
|
// Third API call with SMALLER token count: 50 prompt + 25 completion = 75 total
|
||||||
|
let usage3 = Usage {
|
||||||
|
prompt_tokens: 50,
|
||||||
|
completion_tokens: 25,
|
||||||
|
total_tokens: 75,
|
||||||
|
};
|
||||||
|
window.update_usage_from_response(&usage3);
|
||||||
|
assert_eq!(window.used_tokens, 500, "Third call should accumulate to 500 tokens");
|
||||||
|
assert_eq!(window.cumulative_tokens, 500, "Cumulative should be 500");
|
||||||
|
|
||||||
context.add_message_with_tokens(message2, None);
|
// Verify tokens never decrease
|
||||||
|
assert!(window.used_tokens >= 425, "Token count should never decrease!");
|
||||||
// Should have added estimated tokens (roughly 10-11 tokens for this text)
|
|
||||||
assert!(context.used_tokens > 7);
|
|
||||||
assert_eq!(context.cumulative_tokens, context.used_tokens);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_context_window_update_from_response() {
|
fn test_add_streaming_tokens() {
|
||||||
let mut context = ContextWindow::new(10000);
|
let mut window = ContextWindow::new(10000);
|
||||||
|
|
||||||
// Add initial messages with estimation
|
// Add some streaming tokens
|
||||||
let message1 = Message {
|
window.add_streaming_tokens(100);
|
||||||
role: MessageRole::User,
|
assert_eq!(window.used_tokens, 100);
|
||||||
content: "What is the capital of France?".to_string(),
|
assert_eq!(window.cumulative_tokens, 100);
|
||||||
};
|
|
||||||
context.add_message(message1);
|
|
||||||
|
|
||||||
let initial_estimate = context.used_tokens;
|
// Add more
|
||||||
let initial_cumulative = context.cumulative_tokens;
|
window.add_streaming_tokens(50);
|
||||||
|
assert_eq!(window.used_tokens, 150);
|
||||||
|
assert_eq!(window.cumulative_tokens, 150);
|
||||||
|
|
||||||
// Now update with actual usage from provider
|
// Now update from provider response
|
||||||
let usage = Usage {
|
let usage = Usage {
|
||||||
prompt_tokens: 8,
|
prompt_tokens: 80,
|
||||||
completion_tokens: 15,
|
completion_tokens: 40,
|
||||||
total_tokens: 23,
|
total_tokens: 120,
|
||||||
};
|
};
|
||||||
|
window.update_usage_from_response(&usage);
|
||||||
|
|
||||||
context.update_usage_from_response(&usage);
|
// Should ADD to existing, not replace
|
||||||
|
assert_eq!(window.used_tokens, 270, "Should add 120 to existing 150");
|
||||||
// Should have replaced estimate with actual
|
assert_eq!(window.cumulative_tokens, 270);
|
||||||
assert_eq!(context.used_tokens, 23);
|
|
||||||
// Cumulative should be adjusted
|
|
||||||
assert_eq!(context.cumulative_tokens, context.cumulative_tokens);
|
|
||||||
assert!(context.cumulative_tokens >= 23);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_streaming_token_accumulation() {
|
fn test_percentage_calculation() {
|
||||||
let mut context = ContextWindow::new(10000);
|
let mut window = ContextWindow::new(1000);
|
||||||
|
|
||||||
// Simulate streaming tokens being added
|
// Add tokens via provider response
|
||||||
context.add_streaming_tokens(5);
|
let usage = Usage {
|
||||||
assert_eq!(context.used_tokens, 5);
|
prompt_tokens: 150,
|
||||||
assert_eq!(context.cumulative_tokens, 5);
|
completion_tokens: 100,
|
||||||
|
total_tokens: 250,
|
||||||
|
};
|
||||||
|
window.update_usage_from_response(&usage);
|
||||||
|
|
||||||
context.add_streaming_tokens(3);
|
assert_eq!(window.percentage_used(), 25.0);
|
||||||
assert_eq!(context.used_tokens, 8);
|
assert_eq!(window.remaining_tokens(), 750);
|
||||||
assert_eq!(context.cumulative_tokens, 8);
|
|
||||||
|
|
||||||
context.add_streaming_tokens(7);
|
// Add more tokens
|
||||||
assert_eq!(context.used_tokens, 15);
|
let usage2 = Usage {
|
||||||
assert_eq!(context.cumulative_tokens, 15);
|
prompt_tokens: 300,
|
||||||
|
completion_tokens: 200,
|
||||||
|
total_tokens: 500,
|
||||||
|
};
|
||||||
|
window.update_usage_from_response(&usage2);
|
||||||
|
|
||||||
|
assert_eq!(window.percentage_used(), 75.0);
|
||||||
|
assert_eq!(window.remaining_tokens(), 250);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_context_window_percentage_with_actual_tokens() {
|
|
||||||
let mut context = ContextWindow::new(1000);
|
|
||||||
|
|
||||||
// Add messages with known token counts
|
|
||||||
let message1 = Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: "First message".to_string(),
|
|
||||||
};
|
|
||||||
context.add_message_with_tokens(message1, Some(100));
|
|
||||||
|
|
||||||
assert_eq!(context.percentage_used(), 10.0);
|
|
||||||
|
|
||||||
let message2 = Message {
|
|
||||||
role: MessageRole::Assistant,
|
|
||||||
content: "Second message".to_string(),
|
|
||||||
};
|
|
||||||
context.add_message_with_tokens(message2, Some(400));
|
|
||||||
|
|
||||||
assert_eq!(context.percentage_used(), 50.0);
|
|
||||||
|
|
||||||
// Test should_summarize threshold (80%)
|
|
||||||
let message3 = Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: "Third message".to_string(),
|
|
||||||
};
|
|
||||||
context.add_message_with_tokens(message3, Some(300));
|
|
||||||
|
|
||||||
assert_eq!(context.percentage_used(), 80.0);
|
|
||||||
assert!(context.should_summarize());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_fallback_to_estimation() {
|
|
||||||
let mut context = ContextWindow::new(10000);
|
|
||||||
|
|
||||||
// Add message without token count (should use estimation)
|
|
||||||
let message = Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: "This is a test message without token count".to_string(),
|
|
||||||
};
|
|
||||||
|
|
||||||
context.add_message_with_tokens(message.clone(), None);
|
|
||||||
|
|
||||||
// Should have estimated tokens (roughly 11-12 tokens for this text)
|
|
||||||
assert!(context.used_tokens > 0);
|
|
||||||
assert!(context.used_tokens < 20); // Reasonable upper bound
|
|
||||||
|
|
||||||
// Verify estimation is reasonable
|
|
||||||
let text_len = message.content.len();
|
|
||||||
let estimated = context.used_tokens;
|
|
||||||
let ratio = text_len as f32 / estimated as f32;
|
|
||||||
|
|
||||||
// Should be roughly 3-4 characters per token
|
|
||||||
assert!(ratio > 2.0 && ratio < 6.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_empty_message_handling() {
|
|
||||||
let mut context = ContextWindow::new(10000);
|
|
||||||
|
|
||||||
// Empty messages should be skipped
|
|
||||||
let empty_message = Message {
|
|
||||||
role: MessageRole::User,
|
|
||||||
content: " ".to_string(), // Only whitespace
|
|
||||||
};
|
|
||||||
|
|
||||||
context.add_message_with_tokens(empty_message, Some(10));
|
|
||||||
|
|
||||||
// Should not have added anything
|
|
||||||
assert_eq!(context.used_tokens, 0);
|
|
||||||
assert_eq!(context.cumulative_tokens, 0);
|
|
||||||
assert_eq!(context.conversation_history.len(), 0);
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user