Add prompt cache statistics tracking to /stats command

- Extend Usage struct with cache_creation_tokens and cache_read_tokens fields
- Parse Anthropic cache_creation_input_tokens and cache_read_input_tokens
- Parse OpenAI prompt_tokens_details.cached_tokens for automatic prefix caching
- Add CacheStats struct to Agent for cumulative tracking across API calls
- Add "Prompt Cache Statistics" section to /stats output showing:
  - API call count and cache hit count
  - Hit rate percentage
  - Total input tokens and cache read/creation tokens
  - Cache efficiency (% of input served from cache)
- Update all provider implementations and test files
This commit is contained in:
Dhanji R. Prasanna
2026-01-27 11:32:45 +11:00
parent 96899230a4
commit 5b4079e861
13 changed files with 214 additions and 2 deletions

View File

@@ -220,6 +220,12 @@ impl OpenAIProvider {
prompt_tokens: usage.prompt_tokens,
completion_tokens: usage.completion_tokens,
total_tokens: usage.total_tokens,
cache_creation_tokens: 0, // OpenAI doesn't report cache creation
cache_read_tokens: usage
.prompt_tokens_details
.as_ref()
.map(|d| d.cached_tokens)
.unwrap_or(0),
});
}
}
@@ -306,6 +312,13 @@ impl LLMProvider for OpenAIProvider {
prompt_tokens: openai_response.usage.prompt_tokens,
completion_tokens: openai_response.usage.completion_tokens,
total_tokens: openai_response.usage.total_tokens,
cache_creation_tokens: 0, // OpenAI doesn't report cache creation
cache_read_tokens: openai_response
.usage
.prompt_tokens_details
.as_ref()
.map(|d| d.cached_tokens)
.unwrap_or(0),
};
debug!(
@@ -495,6 +508,16 @@ struct OpenAIUsage {
prompt_tokens: u32,
completion_tokens: u32,
total_tokens: u32,
/// Detailed breakdown of prompt tokens including cache info
#[serde(default)]
prompt_tokens_details: Option<OpenAIPromptTokensDetails>,
}
#[derive(Debug, Deserialize, Default)]
struct OpenAIPromptTokensDetails {
/// Tokens retrieved from cache (cache hit)
#[serde(default)]
cached_tokens: u32,
}
// Streaming response structures