Files
g3/crates/g3-core/tests/test_token_counting.rs
Dhanji R. Prasanna 0410efd41b Add 1% safety buffer to context window to prevent API token limit errors
Our token estimation heuristic (chars/3 * 1.1 for code, chars/4 * 1.1 for text)
slightly undercounts over long sessions with hundreds of tool calls. This
accumulated drift of ~89 tokens caused Anthropic API 400 errors:
  'prompt is too long: 200089 tokens > 200000 maximum'

Fix: ContextWindow::new() now applies a 1% buffer, setting total_tokens to 99%
of the provider-reported limit. For a 200k window this gives 198k, providing a
2000-token safety margin that absorbs estimation drift.

All percentage calculations, compaction thresholds, and thinning triggers
operate against the buffered limit, so compaction fires earlier and we never
send a request the API will reject.
2026-02-13 15:46:53 +11:00

178 lines
6.6 KiB
Rust

use g3_core::ContextWindow;
use g3_providers::{Message, MessageRole, Usage};
/// Test that used_tokens is tracked via add_message, not update_usage_from_response.
/// This is critical for the 80% compaction threshold to work correctly.
#[test]
fn test_used_tokens_tracked_via_messages() {
let mut window = ContextWindow::new(10000);
// Add a user message - this should update used_tokens
let user_msg = Message::new(MessageRole::User, "Hello, how are you?".to_string());
window.add_message(user_msg);
// used_tokens should be non-zero after adding a message
assert!(window.used_tokens > 0, "used_tokens should increase after add_message");
let tokens_after_user_msg = window.used_tokens;
// Add an assistant message
let assistant_msg = Message::new(MessageRole::Assistant, "I'm doing well, thank you!".to_string());
window.add_message(assistant_msg);
// used_tokens should increase further
assert!(window.used_tokens > tokens_after_user_msg, "used_tokens should increase after adding assistant message");
}
/// Test that update_usage_from_response only updates cumulative_tokens, not used_tokens.
/// This prevents double-counting which was causing the 80% threshold to be reached at 200%+.
#[test]
fn test_update_usage_only_affects_cumulative() {
let mut window = ContextWindow::new(10000);
// Initial state
assert_eq!(window.used_tokens, 0);
assert_eq!(window.cumulative_tokens, 0);
// Simulate API response with usage data
let usage = Usage {
prompt_tokens: 100,
completion_tokens: 50,
total_tokens: 150,
cache_creation_tokens: 0,
cache_read_tokens: 0,
};
window.update_usage_from_response(&usage);
// used_tokens should NOT change - it's tracked via add_message
assert_eq!(window.used_tokens, 0, "used_tokens should not be updated by update_usage_from_response");
// cumulative_tokens SHOULD be updated for API usage tracking
assert_eq!(window.cumulative_tokens, 150, "cumulative_tokens should track total API usage");
// Another API call
let usage2 = Usage {
prompt_tokens: 200,
completion_tokens: 75,
total_tokens: 275,
cache_creation_tokens: 0,
cache_read_tokens: 0,
};
window.update_usage_from_response(&usage2);
// used_tokens still unchanged
assert_eq!(window.used_tokens, 0, "used_tokens should remain unchanged");
// cumulative_tokens accumulates
assert_eq!(window.cumulative_tokens, 425, "cumulative_tokens should accumulate");
}
/// Test that add_streaming_tokens only updates cumulative_tokens.
/// The assistant message will be added via add_message which tracks used_tokens.
#[test]
fn test_add_streaming_tokens_only_affects_cumulative() {
let mut window = ContextWindow::new(10000);
// Add streaming tokens (fallback when no usage data available)
window.add_streaming_tokens(100);
// used_tokens should NOT change
assert_eq!(window.used_tokens, 0, "used_tokens should not be updated by add_streaming_tokens");
// cumulative_tokens SHOULD be updated
assert_eq!(window.cumulative_tokens, 100, "cumulative_tokens should be updated");
// Add more streaming tokens
window.add_streaming_tokens(50);
assert_eq!(window.used_tokens, 0);
assert_eq!(window.cumulative_tokens, 150);
}
/// Test percentage calculation is based on used_tokens (actual context content).
#[test]
fn test_percentage_based_on_used_tokens() {
let mut window = ContextWindow::new(1000);
// Initially 0%
assert_eq!(window.percentage_used(), 0.0);
// After 1% buffer: total_tokens = 990
assert_eq!(window.remaining_tokens(), 990);
// Add messages to increase used_tokens
// A message with ~100 chars should be roughly 25-30 tokens
let msg = Message::new(MessageRole::User, "x".repeat(400)); // ~100 tokens estimated
window.add_message(msg);
// Percentage should be based on used_tokens
let percentage = window.percentage_used();
assert!(percentage > 0.0, "percentage should be > 0 after adding message");
assert!(percentage < 100.0, "percentage should be < 100");
// remaining_tokens should decrease
assert!(window.remaining_tokens() < 990, "remaining tokens should decrease");
}
/// Test that the 80% compaction threshold works correctly.
/// This was the original bug - used_tokens was being double/triple counted.
#[test]
fn test_should_compact_threshold() {
let mut window = ContextWindow::new(1000);
// Add messages until we approach 80%
// Each message of ~320 chars is roughly 80 tokens (at 4 chars/token)
for _ in 0..9 {
let msg = Message::new(MessageRole::User, "x".repeat(320));
window.add_message(msg);
}
// Should be around 720 tokens (72%) - not yet at threshold
// Note: actual token count depends on estimation algorithm
let percentage = window.percentage_used();
println!("After 9 messages: {}% used ({} tokens)", percentage, window.used_tokens);
// Add one more message to push over 80%
let msg = Message::new(MessageRole::User, "x".repeat(320));
window.add_message(msg);
let percentage_after = window.percentage_used();
println!("After 10 messages: {}% used ({} tokens)", percentage_after, window.used_tokens);
// Now should_compact should return true if we're at 80%+
if percentage_after >= 80.0 {
assert!(window.should_compact(), "should_compact should be true at 80%+");
}
}
/// Test that cumulative_tokens and used_tokens are independent.
#[test]
fn test_cumulative_vs_used_independence() {
let mut window = ContextWindow::new(10000);
// Add a message (affects used_tokens)
let msg = Message::new(MessageRole::User, "Hello world".to_string());
window.add_message(msg);
let used_after_msg = window.used_tokens;
let cumulative_after_msg = window.cumulative_tokens;
// Both should be equal at this point (message adds to both)
assert_eq!(used_after_msg, cumulative_after_msg);
// Now simulate API response (only affects cumulative_tokens)
let usage = Usage {
prompt_tokens: 500,
completion_tokens: 200,
total_tokens: 700,
cache_creation_tokens: 0,
cache_read_tokens: 0,
};
window.update_usage_from_response(&usage);
// used_tokens unchanged
assert_eq!(window.used_tokens, used_after_msg, "used_tokens should not change from API response");
// cumulative_tokens increased
assert_eq!(window.cumulative_tokens, cumulative_after_msg + 700, "cumulative_tokens should increase");
// They should now be different
assert!(window.cumulative_tokens > window.used_tokens, "cumulative should be greater than used");
}