adds cache_control

This commit is contained in:
Jochen
2025-11-18 22:38:52 +11:00
parent 39efa24c55
commit 296bf5a449
7 changed files with 466 additions and 125 deletions

View File

@@ -21,22 +21,18 @@
//! // Create the provider with your API key
//! let provider = AnthropicProvider::new(
//! "your-api-key".to_string(),
//! Some("claude-3-5-sonnet-20241022".to_string()), // Optional: defaults to claude-3-5-sonnet-20241022
//! Some(4096), // Optional: max tokens
//! Some(0.1), // Optional: temperature
//! Some("claude-3-5-sonnet-20241022".to_string()),
//! Some(4096),
//! Some(0.1),
//! None, // cache_config
//! None, // enable_1m_context
//! )?;
//!
//! // Create a completion request
//! let request = CompletionRequest {
//! messages: vec![
//! Message {
//! role: MessageRole::System,
//! content: "You are a helpful assistant.".to_string(),
//! },
//! Message {
//! role: MessageRole::User,
//! content: "Hello! How are you?".to_string(),
//! },
//! Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
//! Message::new(MessageRole::User, "Hello! How are you?".to_string()),
//! ],
//! max_tokens: Some(1000),
//! temperature: Some(0.7),
@@ -62,15 +58,16 @@
//! async fn main() -> anyhow::Result<()> {
//! let provider = AnthropicProvider::new(
//! "your-api-key".to_string(),
//! None, None, None,
//! None,
//! None,
//! None,
//! None, // cache_config
//! None, // enable_1m_context
//! )?;
//!
//! let request = CompletionRequest {
//! messages: vec![
//! Message {
//! role: MessageRole::User,
//! content: "Write a short story about a robot.".to_string(),
//! },
//! Message::new(MessageRole::User, "Write a short story about a robot.".to_string()),
//! ],
//! max_tokens: Some(1000),
//! temperature: Some(0.7),
@@ -123,6 +120,8 @@ pub struct AnthropicProvider {
model: String,
max_tokens: u32,
temperature: f32,
cache_config: Option<String>,
enable_1m_context: bool,
}
impl AnthropicProvider {
@@ -131,6 +130,8 @@ impl AnthropicProvider {
model: Option<String>,
max_tokens: Option<u32>,
temperature: Option<f32>,
cache_config: Option<String>,
enable_1m_context: Option<bool>,
) -> Result<Self> {
let client = Client::builder()
.timeout(Duration::from_secs(300))
@@ -147,6 +148,8 @@ impl AnthropicProvider {
model,
max_tokens: max_tokens.unwrap_or(4096),
temperature: temperature.unwrap_or(0.1),
cache_config,
enable_1m_context: enable_1m_context.unwrap_or(false),
})
}
@@ -156,9 +159,12 @@ impl AnthropicProvider {
.post(ANTHROPIC_API_URL)
.header("x-api-key", &self.api_key)
.header("anthropic-version", ANTHROPIC_VERSION)
// Anthropic beta 1m context window. Enable if needed. It costs extra, so check first.
// .header("anthropic-beta", "context-1m-2025-08-07")
.header("content-type", "application/json");
if self.enable_1m_context {
builder = builder.header("anthropic-beta", "context-1m-2025-08-07");
}
if streaming {
builder = builder.header("accept", "text/event-stream");
}
@@ -166,6 +172,17 @@ impl AnthropicProvider {
builder
}
fn convert_cache_control(cache_control: &crate::CacheControl) -> AnthropicCacheControl {
let cache_type = match cache_control {
crate::CacheControl::Ephemeral => "ephemeral",
crate::CacheControl::FiveMinute => "5minute",
crate::CacheControl::OneHour => "1hour",
};
AnthropicCacheControl {
cache_type: cache_type.to_string(),
}
}
fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
tools
.iter()
@@ -214,6 +231,8 @@ impl AnthropicProvider {
role: "user".to_string(),
content: vec![AnthropicContent::Text {
text: message.content.clone(),
cache_control: message.cache_control.as_ref()
.map(Self::convert_cache_control),
}],
});
}
@@ -222,6 +241,8 @@ impl AnthropicProvider {
role: "assistant".to_string(),
content: vec![AnthropicContent::Text {
text: message.content.clone(),
cache_control: message.cache_control.as_ref()
.map(Self::convert_cache_control),
}],
});
}
@@ -564,7 +585,7 @@ impl LLMProvider for AnthropicProvider {
.content
.iter()
.filter_map(|c| match c {
AnthropicContent::Text { text } => Some(text.as_str()),
AnthropicContent::Text { text, .. } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
@@ -658,6 +679,11 @@ impl LLMProvider for AnthropicProvider {
// Claude models support native tool calling
true
}
fn supports_cache_control(&self) -> bool {
// Anthropic supports cache control
true
}
}
// Anthropic API request/response structures
@@ -697,11 +723,21 @@ struct AnthropicMessage {
content: Vec<AnthropicContent>,
}
#[derive(Debug, Serialize, Deserialize)]
struct AnthropicCacheControl {
#[serde(rename = "type")]
cache_type: String,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(tag = "type")]
enum AnthropicContent {
#[serde(rename = "text")]
Text { text: String },
Text {
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<AnthropicCacheControl>,
},
#[serde(rename = "tool_use")]
ToolUse {
id: String,
@@ -771,21 +807,14 @@ mod tests {
None,
None,
None,
None,
None,
).unwrap();
let messages = vec![
Message {
role: MessageRole::System,
content: "You are a helpful assistant.".to_string(),
},
Message {
role: MessageRole::User,
content: "Hello!".to_string(),
},
Message {
role: MessageRole::Assistant,
content: "Hi there!".to_string(),
},
Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
Message::new(MessageRole::User, "Hello!".to_string()),
Message::new(MessageRole::Assistant, "Hi there!".to_string()),
];
let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
@@ -803,14 +832,11 @@ mod tests {
Some("claude-3-haiku-20240307".to_string()),
Some(1000),
Some(0.5),
None,
None,
).unwrap();
let messages = vec![
Message {
role: MessageRole::User,
content: "Test message".to_string(),
},
];
let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
let request_body = provider
.create_request_body(&messages, None, false, 1000, 0.5)
@@ -831,6 +857,8 @@ mod tests {
None,
None,
None,
None,
None,
).unwrap();
let tools = vec![
@@ -859,4 +887,48 @@ mod tests {
assert!(anthropic_tools[0].input_schema.required.is_some());
assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
}
#[test]
fn test_cache_control_serialization() {
let provider = AnthropicProvider::new(
"test-key".to_string(),
None,
None,
None,
None,
None,
).unwrap();
// Test message WITHOUT cache_control
let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
let (_, anthropic_messages_without) = provider.convert_messages(&messages_without).unwrap();
let json_without = serde_json::to_string(&anthropic_messages_without).unwrap();
println!("Anthropic JSON without cache_control: {}", json_without);
// Check if cache_control appears in the JSON
if json_without.contains("cache_control") {
println!("WARNING: JSON contains 'cache_control' field when not configured!");
assert!(!json_without.contains("\"cache_control\":null"),
"JSON should not contain 'cache_control: null'");
}
// Test message WITH cache_control
let messages_with = vec![Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
crate::CacheControl::Ephemeral,
)];
let (_, anthropic_messages_with) = provider.convert_messages(&messages_with).unwrap();
let json_with = serde_json::to_string(&anthropic_messages_with).unwrap();
println!("Anthropic JSON with cache_control: {}", json_with);
assert!(json_with.contains("cache_control"),
"JSON should contain 'cache_control' field when configured");
assert!(json_with.contains("ephemeral"),
"JSON should contain 'ephemeral' type");
// The key assertion: when cache_control is None, it should not appear in JSON
assert!(!json_without.contains("cache_control") || !json_without.contains("null"),
"JSON should not contain 'cache_control' field or null values when not configured");
}
}

View File

@@ -39,10 +39,7 @@
//! // Create a completion request
//! let request = CompletionRequest {
//! messages: vec![
//! Message {
//! role: MessageRole::User,
//! content: "Hello! How are you?".to_string(),
//! },
//! Message::new(MessageRole::User, "Hello! How are you?".to_string()),
//! ],
//! max_tokens: Some(1000),
//! temperature: Some(0.7),
@@ -241,6 +238,15 @@ impl DatabricksProvider {
.collect()
}
fn convert_cache_control(cache_control: &crate::CacheControl) -> DatabricksCacheControl {
let cache_type = match cache_control {
crate::CacheControl::Ephemeral => "ephemeral",
crate::CacheControl::FiveMinute => "5minute",
crate::CacheControl::OneHour => "1hour",
};
DatabricksCacheControl { cache_type: cache_type.to_string() }
}
fn convert_messages(&self, messages: &[Message]) -> Result<Vec<DatabricksMessage>> {
let mut databricks_messages = Vec::new();
@@ -251,9 +257,24 @@ impl DatabricksProvider {
MessageRole::Assistant => "assistant",
};
// If message has cache_control, use content array format
let content = if message.cache_control.is_some() {
// Use array format with cache_control
let content_block = DatabricksContent::Text {
content_type: "text".to_string(),
text: message.content.clone(),
cache_control: message.cache_control.as_ref()
.map(Self::convert_cache_control),
};
serde_json::to_value(vec![content_block])?
} else {
// Use simple string format
serde_json::Value::String(message.content.clone())
};
databricks_messages.push(DatabricksMessage {
role: role.to_string(),
content: Some(message.content.clone()),
content: Some(content),
tool_calls: None, // Only used in responses, not requests
});
}
@@ -864,8 +885,22 @@ impl LLMProvider for DatabricksProvider {
let content = databricks_response
.choices
.first()
.and_then(|choice| choice.message.content.as_ref())
.cloned()
.and_then(|choice| {
choice.message.content.as_ref().map(|c| {
// Handle both string and array formats
if let Some(s) = c.as_str() {
s.to_string()
} else if let Some(arr) = c.as_array() {
// Extract text from content blocks
arr.iter()
.filter_map(|block| block.get("text").and_then(|t| t.as_str()))
.collect::<Vec<_>>()
.join("")
} else {
String::new()
}
})
})
.unwrap_or_default();
// Check if there are tool calls in the response
@@ -1037,6 +1072,11 @@ impl LLMProvider for DatabricksProvider {
// This includes Claude, Llama, DBRX, and most other models on the platform
true
}
fn supports_cache_control(&self) -> bool {
// Databricks supports cache control when using Anthropic models
self.model.contains("claude")
}
}
// Databricks API request/response structures
@@ -1064,10 +1104,29 @@ struct DatabricksFunction {
parameters: serde_json::Value,
}
#[derive(Debug, Serialize, Deserialize)]
struct DatabricksCacheControl {
#[serde(rename = "type")]
cache_type: String,
}
#[derive(Debug, Serialize, Deserialize)]
#[serde(untagged)]
enum DatabricksContent {
Text {
#[serde(rename = "type")]
content_type: String,
text: String,
#[serde(skip_serializing_if = "Option::is_none")]
cache_control: Option<DatabricksCacheControl>,
},
}
#[derive(Debug, Serialize, Deserialize)]
struct DatabricksMessage {
role: String,
content: Option<String>, // Make content optional since tool calls might not have content
#[serde(skip_serializing_if = "Option::is_none")]
content: Option<serde_json::Value>, // Can be string or array of content blocks
#[serde(skip_serializing_if = "Option::is_none")]
tool_calls: Option<Vec<DatabricksToolCall>>, // Add tool_calls field for responses
}
@@ -1154,18 +1213,9 @@ mod tests {
.unwrap();
let messages = vec![
Message {
role: MessageRole::System,
content: "You are a helpful assistant.".to_string(),
},
Message {
role: MessageRole::User,
content: "Hello!".to_string(),
},
Message {
role: MessageRole::Assistant,
content: "Hi there!".to_string(),
},
Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
Message::new(MessageRole::User, "Hello!".to_string()),
Message::new(MessageRole::Assistant, "Hi there!".to_string()),
];
let databricks_messages = provider.convert_messages(&messages).unwrap();
@@ -1187,10 +1237,7 @@ mod tests {
)
.unwrap();
let messages = vec![Message {
role: MessageRole::User,
content: "Test message".to_string(),
}];
let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];
let request_body = provider
.create_request_body(&messages, None, false, 1000, 0.5)
@@ -1273,4 +1320,53 @@ mod tests {
assert!(llama_provider.has_native_tool_calling());
assert!(dbrx_provider.has_native_tool_calling());
}
#[test]
fn test_cache_control_serialization() {
let provider = DatabricksProvider::from_token(
"https://test.databricks.com".to_string(),
"test-token".to_string(),
"databricks-claude-sonnet-4".to_string(),
None,
None,
)
.unwrap();
// Test message WITHOUT cache_control - should use string format
let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
let databricks_messages_without = provider.convert_messages(&messages_without).unwrap();
let json_without = serde_json::to_string(&databricks_messages_without).unwrap();
println!("JSON without cache_control: {}", json_without);
assert!(!json_without.contains("cache_control"),
"JSON should not contain 'cache_control' field when not configured");
// Test message WITH cache_control - should use array format
let messages_with = vec![Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
crate::CacheControl::Ephemeral,
)];
let databricks_messages_with = provider.convert_messages(&messages_with).unwrap();
let json_with = serde_json::to_string(&databricks_messages_with).unwrap();
println!("JSON with cache_control: {}", json_with);
assert!(json_with.contains("cache_control"),
"JSON should contain 'cache_control' field when configured");
assert!(json_with.contains("ephemeral"),
"JSON should contain 'ephemeral' type");
assert!(!json_with.contains("null"),
"JSON should not contain null values");
// Verify the structure is correct
let msg_with = &databricks_messages_with[0];
if let Some(content) = &msg_with.content {
if let Some(arr) = content.as_array() {
assert_eq!(arr.len(), 1, "Content array should have one element");
assert!(arr[0].get("cache_control").is_some(), "Content should have cache_control");
} else {
panic!("Content should be an array when cache_control is present");
}
}
}
}

View File

@@ -21,6 +21,11 @@ pub trait LLMProvider: Send + Sync {
fn has_native_tool_calling(&self) -> bool {
false
}
/// Check if the provider supports cache control
fn supports_cache_control(&self) -> bool {
false
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -32,10 +37,22 @@ pub struct CompletionRequest {
pub tools: Option<Vec<Tool>>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CacheControl {
Ephemeral,
#[serde(rename = "5minute")]
FiveMinute,
#[serde(rename = "1hour")]
OneHour,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Message {
pub role: MessageRole,
pub content: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub cache_control: Option<CacheControl>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -95,6 +112,45 @@ pub use databricks::DatabricksProvider;
pub use embedded::EmbeddedProvider;
pub use openai::OpenAIProvider;
impl Message {
/// Create a new message with optional cache control
pub fn new(role: MessageRole, content: String) -> Self {
Self {
role,
content,
cache_control: None,
}
}
/// Create a new message with cache control
pub fn with_cache_control(role: MessageRole, content: String, cache_control: CacheControl) -> Self {
Self {
role,
content,
cache_control: Some(cache_control),
}
}
/// Create a message with cache control, with provider validation
pub fn with_cache_control_validated(
role: MessageRole,
content: String,
cache_control: CacheControl,
provider: &dyn LLMProvider
) -> Self {
if !provider.supports_cache_control() {
tracing::warn!(
"Cache control requested for provider '{}' which does not support it. \
Cache control is only supported by Anthropic and Anthropic via Databricks.",
provider.name()
);
return Self::new(role, content);
}
Self::with_cache_control(role, content, cache_control)
}
}
/// Provider registry for managing multiple LLM providers
pub struct ProviderRegistry {
providers: HashMap<String, Box<dyn LLMProvider>>,
@@ -144,3 +200,36 @@ impl Default for ProviderRegistry {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_message_serialization_without_cache_control() {
let msg = Message::new(MessageRole::User, "Hello".to_string());
let json = serde_json::to_string(&msg).unwrap();
println!("Message JSON without cache_control: {}", json);
assert!(!json.contains("cache_control"),
"JSON should not contain 'cache_control' field when not configured");
}
#[test]
fn test_message_serialization_with_cache_control() {
let msg = Message::with_cache_control(
MessageRole::User,
"Hello".to_string(),
CacheControl::Ephemeral,
);
let json = serde_json::to_string(&msg).unwrap();
println!("Message JSON with cache_control: {}", json);
assert!(json.contains("cache_control"),
"JSON should contain 'cache_control' field when configured");
assert!(json.contains("ephemeral"),
"JSON should contain 'ephemeral' value");
assert!(!json.contains("null"),
"JSON should not contain null values");
}
}