Add prompt cache statistics tracking to /stats command

- Extend Usage struct with cache_creation_tokens and cache_read_tokens fields - Parse Anthropic cache_creation_input_tokens and cache_read_input_tokens - Parse OpenAI prompt_tokens_details.cached_tokens for automatic prefix caching - Add CacheStats struct to Agent for cumulative tracking across API calls - Add "Prompt Cache Statistics" section to /stats output showing: - API call count and cache hit count - Hit rate percentage - Total input tokens and cache read/creation tokens - Cache efficiency (% of input served from cache) - Update all provider implementations and test files
2026-01-27 11:32:45 +11:00
parent 96899230a4
commit 5b4079e861
13 changed files with 214 additions and 2 deletions
--- a/crates/g3-core/tests/streaming_completion_test.rs
+++ b/crates/g3-core/tests/streaming_completion_test.rs
@@ -60,6 +60,8 @@ fn default_usage() -> Usage {
        prompt_tokens: 0,
        completion_tokens: 0,
        total_tokens: 0,
+            cache_creation_tokens: 0,
+            cache_read_tokens: 0,
    }
 }

@@ -169,6 +171,8 @@ impl LLMProvider for MockStreamingProvider {
                            prompt_tokens: 100,
                            completion_tokens: 50,
                            total_tokens: 150,
+            cache_creation_tokens: 0,
+            cache_read_tokens: 0,
                        }),
                        stop_reason: Some("end_turn".to_string()),
                        tool_call_streaming: None,
@@ -201,6 +205,8 @@ impl LLMProvider for MockStreamingProvider {
                            prompt_tokens: 50,
                            completion_tokens: 10,
                            total_tokens: 60,
+            cache_creation_tokens: 0,
+            cache_read_tokens: 0,
                        }),
                        stop_reason: Some("end_turn".to_string()),
                        tool_call_streaming: None,
@@ -407,6 +413,8 @@ async fn test_finished_signal_terminates_stream() {
                    prompt_tokens: 0,
                    completion_tokens: 0,
                    total_tokens: 0,
+            cache_creation_tokens: 0,
+            cache_read_tokens: 0,
                },
                model: "simple".to_string(),
            })
@@ -439,6 +447,8 @@ async fn test_finished_signal_terminates_stream() {
                            prompt_tokens: 10,
                            completion_tokens: 10,
                            total_tokens: 20,
+            cache_creation_tokens: 0,
+            cache_read_tokens: 0,
                        }),
                        stop_reason: Some("end_turn".to_string()),
                        tool_call_streaming: None,