Add prompt cache statistics tracking to /stats command

- Extend Usage struct with cache_creation_tokens and cache_read_tokens fields - Parse Anthropic cache_creation_input_tokens and cache_read_input_tokens - Parse OpenAI prompt_tokens_details.cached_tokens for automatic prefix caching - Add CacheStats struct to Agent for cumulative tracking across API calls - Add "Prompt Cache Statistics" section to /stats output showing: - API call count and cache hit count - Hit rate percentage - Total input tokens and cache read/creation tokens - Cache efficiency (% of input served from cache) - Update all provider implementations and test files
2026-01-27 11:32:45 +11:00
parent 96899230a4
commit 5b4079e861
13 changed files with 214 additions and 2 deletions
--- a/crates/g3-providers/src/mock.rs
+++ b/crates/g3-providers/src/mock.rs
@@ -120,6 +120,8 @@ impl MockResponse {
                prompt_tokens: 100,
                completion_tokens: content.len() as u32 / 4,
                total_tokens: 100 + content.len() as u32 / 4,
+                cache_creation_tokens: 0,
+                cache_read_tokens: 0,
            },
        }
    }
@@ -139,6 +141,8 @@ impl MockResponse {
                prompt_tokens: 100,
                completion_tokens: total_content.len() as u32 / 4,
                total_tokens: 100 + total_content.len() as u32 / 4,
+                cache_creation_tokens: 0,
+                cache_read_tokens: 0,
            },
        }
    }
@@ -155,6 +159,8 @@ impl MockResponse {
                prompt_tokens: 100,
                completion_tokens: 50,
                total_tokens: 150,
+                cache_creation_tokens: 0,
+                cache_read_tokens: 0,
            },
        }
    }
@@ -172,6 +178,8 @@ impl MockResponse {
                prompt_tokens: 100,
                completion_tokens: 50 + text.len() as u32 / 4,
                total_tokens: 150 + text.len() as u32 / 4,
+                cache_creation_tokens: 0,
+                cache_read_tokens: 0,
            },
        }
    }
@@ -192,6 +200,8 @@ impl MockResponse {
                prompt_tokens: 100,
                completion_tokens: 100,
                total_tokens: 200,
+                cache_creation_tokens: 0,
+                cache_read_tokens: 0,
            },
        }
    }
@@ -215,6 +225,8 @@ impl MockResponse {
                prompt_tokens: 100,
                completion_tokens: full_content.len() as u32 / 4,
                total_tokens: 100 + full_content.len() as u32 / 4,
+                cache_creation_tokens: 0,
+                cache_read_tokens: 0,
            },
        }
    }
@@ -230,6 +242,8 @@ impl MockResponse {
                prompt_tokens: 100,
                completion_tokens: content.len() as u32 / 4,
                total_tokens: 100 + content.len() as u32 / 4,
+                cache_creation_tokens: 0,
+                cache_read_tokens: 0,
            },
        }
    }