Add prompt cache statistics tracking to /stats command

- Extend Usage struct with cache_creation_tokens and cache_read_tokens fields - Parse Anthropic cache_creation_input_tokens and cache_read_input_tokens - Parse OpenAI prompt_tokens_details.cached_tokens for automatic prefix caching - Add CacheStats struct to Agent for cumulative tracking across API calls - Add "Prompt Cache Statistics" section to /stats output showing: - API call count and cache hit count - Hit rate percentage - Total input tokens and cache read/creation tokens - Cache efficiency (% of input served from cache) - Update all provider implementations and test files
2026-01-27 11:32:45 +11:00
parent 96899230a4
commit 5b4079e861
13 changed files with 214 additions and 2 deletions
--- a/crates/g3-core/tests/test_token_counting.rs
+++ b/crates/g3-core/tests/test_token_counting.rs
@@ -38,6 +38,8 @@ fn test_update_usage_only_affects_cumulative() {
        prompt_tokens: 100,
        completion_tokens: 50,
        total_tokens: 150,
+        cache_creation_tokens: 0,
+        cache_read_tokens: 0,
    };
    window.update_usage_from_response(&usage);

@@ -52,6 +54,8 @@ fn test_update_usage_only_affects_cumulative() {
        prompt_tokens: 200,
        completion_tokens: 75,
        total_tokens: 275,
+        cache_creation_tokens: 0,
+        cache_read_tokens: 0,
    };
    window.update_usage_from_response(&usage2);

@@ -156,6 +160,8 @@ fn test_cumulative_vs_used_independence() {
        prompt_tokens: 500,
        completion_tokens: 200,
        total_tokens: 700,
+        cache_creation_tokens: 0,
+        cache_read_tokens: 0,
    };
    window.update_usage_from_response(&usage);