Add prompt cache statistics tracking to /stats command

- Extend Usage struct with cache_creation_tokens and cache_read_tokens fields - Parse Anthropic cache_creation_input_tokens and cache_read_input_tokens - Parse OpenAI prompt_tokens_details.cached_tokens for automatic prefix caching - Add CacheStats struct to Agent for cumulative tracking across API calls - Add "Prompt Cache Statistics" section to /stats output showing: - API call count and cache hit count - Hit rate percentage - Total input tokens and cache read/creation tokens - Cache efficiency (% of input served from cache) - Update all provider implementations and test files
2026-01-27 11:32:45 +11:00
parent 96899230a4
commit 5b4079e861
13 changed files with 214 additions and 2 deletions
--- a/crates/g3-providers/src/openai.rs
+++ b/crates/g3-providers/src/openai.rs
@@ -220,6 +220,12 @@ impl OpenAIProvider {
                                            prompt_tokens: usage.prompt_tokens,
                                            completion_tokens: usage.completion_tokens,
                                            total_tokens: usage.total_tokens,
+                                            cache_creation_tokens: 0, // OpenAI doesn't report cache creation
+                                            cache_read_tokens: usage
+                                                .prompt_tokens_details
+                                                .as_ref()
+                                                .map(|d| d.cached_tokens)
+                                                .unwrap_or(0),
                                        });
                                    }
                                }
@@ -306,6 +312,13 @@ impl LLMProvider for OpenAIProvider {
            prompt_tokens: openai_response.usage.prompt_tokens,
            completion_tokens: openai_response.usage.completion_tokens,
            total_tokens: openai_response.usage.total_tokens,
+            cache_creation_tokens: 0, // OpenAI doesn't report cache creation
+            cache_read_tokens: openai_response
+                .usage
+                .prompt_tokens_details
+                .as_ref()
+                .map(|d| d.cached_tokens)
+                .unwrap_or(0),
        };

        debug!(
@@ -495,6 +508,16 @@ struct OpenAIUsage {
    prompt_tokens: u32,
    completion_tokens: u32,
    total_tokens: u32,
+    /// Detailed breakdown of prompt tokens including cache info
+    #[serde(default)]
+    prompt_tokens_details: Option<OpenAIPromptTokensDetails>,
+}
+
+#[derive(Debug, Deserialize, Default)]
+struct OpenAIPromptTokensDetails {
+    /// Tokens retrieved from cache (cache hit)
+    #[serde(default)]
+    cached_tokens: u32,
 }

 // Streaming response structures