adds cache_control

2025-11-18 22:38:52 +11:00
parent 39efa24c55
commit 296bf5a449
7 changed files with 466 additions and 125 deletions
--- a/config.coach-player.example.toml
+++ b/config.coach-player.example.toml
@@ -11,12 +11,23 @@ model = "databricks-claude-sonnet-4"
 max_tokens = 4096
 temperature = 0.1
 use_oauth = true
+# cache_config = "ephemeral"  # Optional: Enable prompt caching for Claude models
+                              # Options: "ephemeral", "5minute", "1hour"
+                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+                                # The cache control will be automatically applied to:
+                                # - The system prompt at the start of each session
+                                # - Assistant responses after every 10 tool calls
+                                # - 5minute costs $3/mtok, more details below
+                                # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing

 [providers.anthropic]
 api_key = "your-anthropic-api-key"
 model = "claude-3-haiku-20240307"  # Using a faster model for player
 max_tokens = 4096
 temperature = 0.3  # Slightly higher temperature for more creative implementations
+# cache_config = "ephemeral"  # Optional: Enable prompt caching
+                              # Options: "ephemeral", "5minute", "1hour"
+                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.

 [agent]
 fallback_default_max_tokens = 8192
--- a/config.example.toml
+++ b/config.example.toml
@@ -14,6 +14,15 @@ max_tokens = 4096  # Per-request output limit (how many tokens the model can gen
                   # Note: This is different from max_context_length (total conversation history size)
 temperature = 0.1
 use_oauth = true
+# cache_config = "ephemeral"  # Optional: Enable prompt caching for Claude models on Databricks
+                              # Options: "ephemeral", "5minute", "1hour"
+                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+                                # The cache control will be automatically applied to:
+                                # - The system prompt at the start of each session
+                                # - Assistant responses after every 10 tool calls
+                                # - 5minute costs $3/mtok, more details below
+                                # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
+

 # Multiple OpenAI-compatible providers can be configured with custom names
 # Each provider gets its own section under [providers.openai_compatible.<name>]
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -40,6 +40,8 @@ pub struct AnthropicConfig {
    pub model: String,
    pub max_tokens: Option<u32>,
    pub temperature: Option<f32>,
+    pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
+    pub enable_1m_context: Option<bool>, // Enable 1m context window (costs extra)
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -49,6 +51,7 @@ pub struct DatabricksConfig {
    pub model: String,
    pub max_tokens: Option<u32>,
    pub temperature: Option<f32>,
+    pub cache_config: Option<String>, // "ephemeral", "5minute", "1hour", or None to disable
    pub use_oauth: Option<bool>, // Default to true if token not provided
 }

@@ -132,6 +135,7 @@ impl Default for Config {
                    model: "databricks-claude-sonnet-4".to_string(),
                    max_tokens: Some(4096),
                    temperature: Some(0.1),
+                    cache_config: None,
                    use_oauth: Some(true),
                }),
                embedded: None,
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -23,7 +23,7 @@ use anyhow::Result;
 use g3_computer_control::WebDriverController;
 use g3_config::Config;
 use g3_execution::CodeExecutor;
-use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
+use g3_providers::{CacheControl, CompletionRequest, Message, MessageRole, ProviderRegistry, Tool};
 #[allow(unused_imports)]
 use regex::Regex;
 use serde::{Deserialize, Serialize};
@@ -423,18 +423,12 @@ Format this as a detailed but concise summary that can be used to resume the con
        self.used_tokens = 0;

        // Add the summary as a system message
-        let summary_message = Message {
-            role: MessageRole::System,
-            content: format!("Previous conversation summary:\n\n{}", summary),
-        };
+        let summary_message = Message::new(MessageRole::System, format!("Previous conversation summary:\n\n{}", summary));
        self.add_message(summary_message);

        // Add the latest user message if provided
        if let Some(user_msg) = latest_user_message {
-            self.add_message(Message {
-                role: MessageRole::User,
-                content: user_msg,
-            });
+            self.add_message(Message::new(MessageRole::User, user_msg));
        }

        let new_chars: usize = self
@@ -756,6 +750,7 @@ pub struct Agent<W: UiWriter> {
    safaridriver_process: std::sync::Arc<tokio::sync::RwLock<Option<tokio::process::Child>>>,
    macax_controller:
        std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
+    tool_call_count: usize,
 }

 impl<W: UiWriter> Agent<W> {
@@ -898,6 +893,8 @@ impl<W: UiWriter> Agent<W> {
                    Some(anthropic_config.model.clone()),
                    anthropic_config.max_tokens,
                    anthropic_config.temperature,
+                    anthropic_config.cache_config.clone(),
+                    anthropic_config.enable_1m_context,
                )?;
                providers.register(anthropic_provider);
            }
@@ -944,10 +941,7 @@ impl<W: UiWriter> Agent<W> {

        // If README content is provided, add it as the first system message
        if let Some(readme) = readme_content {
-            let readme_message = Message {
-                role: MessageRole::System,
-                content: readme,
-            };
+            let readme_message = Message::new(MessageRole::System, readme);
            context_window.add_message(readme_message);
        }

@@ -1003,9 +997,23 @@ impl<W: UiWriter> Agent<W> {
                    None
                }))
            },
+            tool_call_count: 0,
        })
    }

+    /// Convert cache config string to CacheControl enum
+    fn parse_cache_control(cache_config: &str) -> Option<CacheControl> {
+        match cache_config {
+            "ephemeral" => Some(CacheControl::Ephemeral),
+            "5minute" => Some(CacheControl::FiveMinute),
+            "1hour" => Some(CacheControl::OneHour),
+            _ => {
+                warn!("Invalid cache_config value: '{}'. Valid values are: ephemeral, 5minute, 1hour", cache_config);
+                None
+            }
+        }
+    }
+
    fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result<u32> {
        // First, check if there's a global max_context_length override in agent config
        if let Some(max_context_length) = config.agent.max_context_length {
@@ -1185,7 +1193,11 @@ impl<W: UiWriter> Agent<W> {
        // Only add system message if this is the first interaction (empty conversation history)
        if self.context_window.conversation_history.is_empty() {
            let provider = self.providers.get(None)?;
-            let system_prompt = if provider.has_native_tool_calling() {
+            let provider_has_native_tool_calling = provider.has_native_tool_calling();
+            let provider_name_for_system = provider.name().to_string();
+            drop(provider); // Drop provider reference to avoid borrowing issues
+            
+            let system_prompt = if provider_has_native_tool_calling {
                // For native tool calling providers, use a more explicit system prompt
                "You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals.

@@ -1493,18 +1505,34 @@ If you can complete it with 1-2 tool calls, skip TODO.
            }

            // Add system message to context window
-            let system_message = Message {
-                role: MessageRole::System,
-                content: system_prompt,
+            let system_message = {
+                // Check if we should use cache control for system message
+                if let Some(cache_config) = match provider_name_for_system.as_str() {
+                    "anthropic" => self.config.providers.anthropic.as_ref()
+                        .and_then(|c| c.cache_config.as_ref())
+                        .and_then(|config| Self::parse_cache_control(config)),
+                    "databricks" => self.config.providers.databricks.as_ref()
+                        .and_then(|c| {
+                            if c.model.contains("claude") {
+                                c.cache_config.as_ref()
+                                    .and_then(|config| Self::parse_cache_control(config))
+                            } else {
+                                None
+                            }
+                        }),
+                    _ => None,
+                } {
+                    let provider = self.providers.get(None)?;
+                    Message::with_cache_control_validated(MessageRole::System, system_prompt, cache_config, provider)
+                } else {
+                    Message::new(MessageRole::System, system_prompt)
+                }
            };
            self.context_window.add_message(system_message);
        }

        // Add user message to context window
-        let user_message = Message {
-            role: MessageRole::User,
-            content: format!("Task: {}", description),
-        };
+        let user_message = Message::new(MessageRole::User, format!("Task: {}", description));
        self.context_window.add_message(user_message);

        // Use the complete conversation history for the request
@@ -1512,6 +1540,9 @@ If you can complete it with 1-2 tool calls, skip TODO.

        // Check if provider supports native tool calling and add tools if so
        let provider = self.providers.get(None)?;
+        let provider_name = provider.name().to_string();
+        let has_native_tool_calling = provider.has_native_tool_calling();
+        let supports_cache_control = provider.supports_cache_control();
        let tools = if provider.has_native_tool_calling() {
            Some(Self::create_tool_definitions(
                self.config.webdriver.enabled,
@@ -1521,9 +1552,10 @@ If you can complete it with 1-2 tool calls, skip TODO.
        } else {
            None
        };
+        drop(provider); // Drop the provider reference to avoid borrowing issues

        // Get max_tokens from provider configuration
-        let max_tokens = match provider.name() {
+        let max_tokens = match provider_name.as_str() {
            "databricks" => {
                // Use the model's maximum limit for Databricks to allow large file generation
                Some(32000)
@@ -1578,9 +1610,32 @@ If you can complete it with 1-2 tool calls, skip TODO.
        // Add assistant response to context window only if not empty
        // This prevents the "Skipping empty message" warning when only tools were executed
        if !response_content.trim().is_empty() {
-            let assistant_message = Message {
-                role: MessageRole::Assistant,
-                content: response_content.clone(),
+            let assistant_message = {
+                // Check if we should use cache control (every 10 tool calls)
+                if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
+                    let provider = self.providers.get(None)?;
+                    if let Some(cache_config) = match provider.name() {
+                        "anthropic" => self.config.providers.anthropic.as_ref()
+                            .and_then(|c| c.cache_config.as_ref())
+                            .and_then(|config| Self::parse_cache_control(config)),
+                        "databricks" => self.config.providers.databricks.as_ref()
+                            .and_then(|c| {
+                                if c.model.contains("claude") {
+                                    c.cache_config.as_ref()
+                                        .and_then(|config| Self::parse_cache_control(config))
+                                } else {
+                                    None
+                                }
+                            }),
+                        _ => None,
+                    } {
+                        Message::with_cache_control_validated(MessageRole::Assistant, response_content.clone(), cache_config, provider)
+                    } else {
+                        Message::new(MessageRole::Assistant, response_content.clone())
+                    }
+                } else {
+                    Message::new(MessageRole::Assistant, response_content.clone())
+                }
            };
            self.context_window.add_message(assistant_message);
        } else {
@@ -1783,17 +1838,11 @@ If you can complete it with 1-2 tool calls, skip TODO.
            .join("\n\n");

        let summary_messages = vec![
-            Message {
-                role: MessageRole::System,
-                content: "You are a helpful assistant that creates concise summaries.".to_string(),
-            },
-            Message {
-                role: MessageRole::User,
-                content: format!(
+            Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
+            Message::new(MessageRole::User, format!(
                    "Based on this conversation history, {}\n\nConversation:\n{}",
                    summary_prompt, conversation_text
-                ),
-            },
+                )),
        ];

        let provider = self.providers.get(None)?;
@@ -2776,18 +2825,11 @@ If you can complete it with 1-2 tool calls, skip TODO.
                .join("\n\n");

            let summary_messages = vec![
-                Message {
-                    role: MessageRole::System,
-                    content: "You are a helpful assistant that creates concise summaries."
-                        .to_string(),
-                },
-                Message {
-                    role: MessageRole::User,
-                    content: format!(
+                Message::new(MessageRole::System, "You are a helpful assistant that creates concise summaries.".to_string()),
+                Message::new(MessageRole::User, format!(
                        "Based on this conversation history, {}\n\nConversation:\n{}",
                        summary_prompt, conversation_text
-                    ),
-                },
+                )),
            ];

            let provider = self.providers.get(None)?;
@@ -3273,29 +3315,20 @@ If you can complete it with 1-2 tool calls, skip TODO.
                            // Add the tool call and result to the context window using RAW unfiltered content
                            // This ensures the log file contains the true raw content including JSON tool calls
                            let tool_message = if !raw_content_for_log.trim().is_empty() {
-                                Message {
-                                    role: MessageRole::Assistant,
-                                    content: format!(
+                                Message::new(MessageRole::Assistant, format!(
                                        "{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
                                        raw_content_for_log.trim(),
                                        tool_call.tool,
                                        tool_call.args
-                                    ),
-                                }
+                                ))
                            } else {
                                // No text content before tool call, just include the tool call
-                                Message {
-                                    role: MessageRole::Assistant,
-                                    content: format!(
+                                Message::new(MessageRole::Assistant, format!(
                                        "{{\"tool\": \"{}\", \"args\": {}}}",
                                        tool_call.tool, tool_call.args
-                                    ),
-                                }
-                            };
-                            let result_message = Message {
-                                role: MessageRole::User,
-                                content: format!("Tool result: {}", tool_result),
+                                ))
                            };
+                            let result_message = Message::new(MessageRole::User, format!("Tool result: {}", tool_result));

                            self.context_window.add_message(tool_message);
                            self.context_window.add_message(result_message);
@@ -3304,7 +3337,8 @@ If you can complete it with 1-2 tool calls, skip TODO.
                            request.messages = self.context_window.conversation_history.clone();

                            // Ensure tools are included for native providers in subsequent iterations
-                            if provider.has_native_tool_calling() {
+                            let provider_for_tools = self.providers.get(None)?;
+                            if provider_for_tools.has_native_tool_calling() {
                                request.tools = Some(Self::create_tool_definitions(
                                    self.config.webdriver.enabled,
                                    self.config.macax.enabled,
@@ -3635,9 +3669,32 @@ If you can complete it with 1-2 tool calls, skip TODO.
                        .replace("<</SYS>>", "");

                    if !raw_clean.trim().is_empty() {
-                        let assistant_message = Message {
-                            role: MessageRole::Assistant,
-                            content: raw_clean,
+                        let assistant_message = {
+                            // Check if we should use cache control (every 10 tool calls)
+                            if self.tool_call_count > 0 && self.tool_call_count % 10 == 0 {
+                                let provider = self.providers.get(None)?;
+                                if let Some(cache_config) = match provider.name() {
+                                    "anthropic" => self.config.providers.anthropic.as_ref()
+                                        .and_then(|c| c.cache_config.as_ref())
+                                        .and_then(|config| Self::parse_cache_control(config)),
+                                    "databricks" => self.config.providers.databricks.as_ref()
+                                        .and_then(|c| {
+                                            if c.model.contains("claude") {
+                                                c.cache_config.as_ref()
+                                                    .and_then(|config| Self::parse_cache_control(config))
+                                            } else {
+                                                None
+                                            }
+                                        }),
+                                    _ => None,
+                                } {
+                                    Message::with_cache_control_validated(MessageRole::Assistant, raw_clean, cache_config, provider)
+                                } else {
+                                    Message::new(MessageRole::Assistant, raw_clean)
+                                }
+                            } else {
+                                Message::new(MessageRole::Assistant, raw_clean)
+                            }
                        };
                        self.context_window.add_message(assistant_message);
                    }
@@ -3679,7 +3736,10 @@ If you can complete it with 1-2 tool calls, skip TODO.
        Ok(TaskResult::new(final_response, self.context_window.clone()))
    }

-    pub async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
+    pub async fn execute_tool(&mut self, tool_call: &ToolCall) -> Result<String> {
+        // Increment tool call count
+        self.tool_call_count += 1;
+        
        debug!("=== EXECUTING TOOL ===");
        debug!("Tool name: {}", tool_call.tool);
        debug!("Tool args (raw): {:?}", tool_call.args);
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -21,22 +21,18 @@
 //!     // Create the provider with your API key
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         Some("claude-3-5-sonnet-20241022".to_string()), // Optional: defaults to claude-3-5-sonnet-20241022
-//!         Some(4096),  // Optional: max tokens
-//!         Some(0.1),   // Optional: temperature
+//!         Some("claude-3-5-sonnet-20241022".to_string()),
+//!         Some(4096),
+//!         Some(0.1),
+//!         None, // cache_config
+//!         None, // enable_1m_context
 //!     )?;
 //!
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::System,
-//!                 content: "You are a helpful assistant.".to_string(),
-//!             },
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Hello! How are you?".to_string(),
-//!             },
+//!             Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+//!             Message::new(MessageRole::User, "Hello! How are you?".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -62,15 +58,16 @@
 //! async fn main() -> anyhow::Result<()> {
 //!     let provider = AnthropicProvider::new(
 //!         "your-api-key".to_string(),
-//!         None, None, None,
+//!         None,
+//!         None,
+//!         None,
+//!         None, // cache_config
+//!         None, // enable_1m_context
 //!     )?;
 //!
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Write a short story about a robot.".to_string(),
-//!             },
+//!             Message::new(MessageRole::User, "Write a short story about a robot.".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -123,6 +120,8 @@ pub struct AnthropicProvider {
    model: String,
    max_tokens: u32,
    temperature: f32,
+    cache_config: Option<String>,
+    enable_1m_context: bool,
 }

 impl AnthropicProvider {
@@ -131,6 +130,8 @@ impl AnthropicProvider {
        model: Option<String>,
        max_tokens: Option<u32>,
        temperature: Option<f32>,
+        cache_config: Option<String>,
+        enable_1m_context: Option<bool>,
    ) -> Result<Self> {
        let client = Client::builder()
            .timeout(Duration::from_secs(300))
@@ -147,6 +148,8 @@ impl AnthropicProvider {
            model,
            max_tokens: max_tokens.unwrap_or(4096),
            temperature: temperature.unwrap_or(0.1),
+            cache_config,
+            enable_1m_context: enable_1m_context.unwrap_or(false),
        })
    }

@@ -156,9 +159,12 @@ impl AnthropicProvider {
            .post(ANTHROPIC_API_URL)
            .header("x-api-key", &self.api_key)
            .header("anthropic-version", ANTHROPIC_VERSION)
-            // Anthropic beta 1m context window. Enable if needed. It costs extra, so check first.
-            // .header("anthropic-beta", "context-1m-2025-08-07")
            .header("content-type", "application/json");
+        
+        if self.enable_1m_context {
+            builder = builder.header("anthropic-beta", "context-1m-2025-08-07");
+        }
+        
        if streaming {
            builder = builder.header("accept", "text/event-stream");
        }
@@ -166,6 +172,17 @@ impl AnthropicProvider {
        builder
    }

+    fn convert_cache_control(cache_control: &crate::CacheControl) -> AnthropicCacheControl {
+        let cache_type = match cache_control {
+            crate::CacheControl::Ephemeral => "ephemeral",
+            crate::CacheControl::FiveMinute => "5minute", 
+            crate::CacheControl::OneHour => "1hour",
+        };
+        AnthropicCacheControl {
+            cache_type: cache_type.to_string(),
+        }
+    }
+
    fn convert_tools(&self, tools: &[Tool]) -> Vec<AnthropicTool> {
        tools
            .iter()
@@ -214,6 +231,8 @@ impl AnthropicProvider {
                        role: "user".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
+                            cache_control: message.cache_control.as_ref()
+                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -222,6 +241,8 @@ impl AnthropicProvider {
                        role: "assistant".to_string(),
                        content: vec![AnthropicContent::Text {
                            text: message.content.clone(),
+                            cache_control: message.cache_control.as_ref()
+                                .map(Self::convert_cache_control),
                        }],
                    });
                }
@@ -564,7 +585,7 @@ impl LLMProvider for AnthropicProvider {
            .content
            .iter()
            .filter_map(|c| match c {
-                AnthropicContent::Text { text } => Some(text.as_str()),
+                AnthropicContent::Text { text, .. } => Some(text.as_str()),
                _ => None,
            })
            .collect::<Vec<_>>()
@@ -658,6 +679,11 @@ impl LLMProvider for AnthropicProvider {
        // Claude models support native tool calling
        true
    }
+    
+    fn supports_cache_control(&self) -> bool {
+        // Anthropic supports cache control
+        true
+    }
 }

 // Anthropic API request/response structures
@@ -697,11 +723,21 @@ struct AnthropicMessage {
    content: Vec<AnthropicContent>,
 }

+#[derive(Debug, Serialize, Deserialize)]
+struct AnthropicCacheControl {
+    #[serde(rename = "type")]
+    cache_type: String,
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 #[serde(tag = "type")]
 enum AnthropicContent {
    #[serde(rename = "text")]
-    Text { text: String },
+    Text { 
+        text: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<AnthropicCacheControl>,
+    },
    #[serde(rename = "tool_use")]
    ToolUse {
        id: String,
@@ -771,21 +807,14 @@ mod tests {
            None,
            None,
            None,
+            None,
+            None,
        ).unwrap();

        let messages = vec![
-            Message {
-                role: MessageRole::System,
-                content: "You are a helpful assistant.".to_string(),
-            },
-            Message {
-                role: MessageRole::User,
-                content: "Hello!".to_string(),
-            },
-            Message {
-                role: MessageRole::Assistant,
-                content: "Hi there!".to_string(),
-            },
+            Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+            Message::new(MessageRole::User, "Hello!".to_string()),
+            Message::new(MessageRole::Assistant, "Hi there!".to_string()),
        ];

        let (system, anthropic_messages) = provider.convert_messages(&messages).unwrap();
@@ -803,14 +832,11 @@ mod tests {
            Some("claude-3-haiku-20240307".to_string()),
            Some(1000),
            Some(0.5),
+            None,
+            None,
        ).unwrap();

-        let messages = vec![
-            Message {
-                role: MessageRole::User,
-                content: "Test message".to_string(),
-            },
-        ];
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];

        let request_body = provider
            .create_request_body(&messages, None, false, 1000, 0.5)
@@ -831,6 +857,8 @@ mod tests {
            None,
            None,
            None,
+            None,
+            None,
        ).unwrap();

        let tools = vec![
@@ -859,4 +887,48 @@ mod tests {
        assert!(anthropic_tools[0].input_schema.required.is_some());
        assert_eq!(anthropic_tools[0].input_schema.required.as_ref().unwrap()[0], "location");
    }
+
+    #[test]
+    fn test_cache_control_serialization() {
+        let provider = AnthropicProvider::new(
+            "test-key".to_string(),
+            None,
+            None,
+            None,
+            None,
+            None,
+        ).unwrap();
+
+        // Test message WITHOUT cache_control
+        let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
+        let (_, anthropic_messages_without) = provider.convert_messages(&messages_without).unwrap();
+        let json_without = serde_json::to_string(&anthropic_messages_without).unwrap();
+        
+        println!("Anthropic JSON without cache_control: {}", json_without);
+        // Check if cache_control appears in the JSON
+        if json_without.contains("cache_control") {
+            println!("WARNING: JSON contains 'cache_control' field when not configured!");
+            assert!(!json_without.contains("\"cache_control\":null"), 
+                    "JSON should not contain 'cache_control: null'");
+        }
+
+        // Test message WITH cache_control
+        let messages_with = vec![Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            crate::CacheControl::Ephemeral,
+        )];
+        let (_, anthropic_messages_with) = provider.convert_messages(&messages_with).unwrap();
+        let json_with = serde_json::to_string(&anthropic_messages_with).unwrap();
+        
+        println!("Anthropic JSON with cache_control: {}", json_with);
+        assert!(json_with.contains("cache_control"), 
+                "JSON should contain 'cache_control' field when configured");
+        assert!(json_with.contains("ephemeral"), 
+                "JSON should contain 'ephemeral' type");
+        
+        // The key assertion: when cache_control is None, it should not appear in JSON
+        assert!(!json_without.contains("cache_control") || !json_without.contains("null"),
+                "JSON should not contain 'cache_control' field or null values when not configured");
+    }
 }
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -39,10 +39,7 @@
 //!     // Create a completion request
 //!     let request = CompletionRequest {
 //!         messages: vec![
-//!             Message {
-//!                 role: MessageRole::User,
-//!                 content: "Hello! How are you?".to_string(),
-//!             },
+//!             Message::new(MessageRole::User, "Hello! How are you?".to_string()),
 //!         ],
 //!         max_tokens: Some(1000),
 //!         temperature: Some(0.7),
@@ -241,6 +238,15 @@ impl DatabricksProvider {
            .collect()
    }

+    fn convert_cache_control(cache_control: &crate::CacheControl) -> DatabricksCacheControl {
+        let cache_type = match cache_control {
+            crate::CacheControl::Ephemeral => "ephemeral",
+            crate::CacheControl::FiveMinute => "5minute",
+            crate::CacheControl::OneHour => "1hour",
+        };
+        DatabricksCacheControl { cache_type: cache_type.to_string() }
+    }
+
    fn convert_messages(&self, messages: &[Message]) -> Result<Vec<DatabricksMessage>> {
        let mut databricks_messages = Vec::new();

@@ -251,9 +257,24 @@ impl DatabricksProvider {
                MessageRole::Assistant => "assistant",
            };

+            // If message has cache_control, use content array format
+            let content = if message.cache_control.is_some() {
+                // Use array format with cache_control
+                let content_block = DatabricksContent::Text {
+                    content_type: "text".to_string(),
+                    text: message.content.clone(),
+                    cache_control: message.cache_control.as_ref()
+                        .map(Self::convert_cache_control),
+                };
+                serde_json::to_value(vec![content_block])?
+            } else {
+                // Use simple string format
+                serde_json::Value::String(message.content.clone())
+            };
+
            databricks_messages.push(DatabricksMessage {
                role: role.to_string(),
-                content: Some(message.content.clone()),
+                content: Some(content),
                tool_calls: None, // Only used in responses, not requests
            });
        }
@@ -864,8 +885,22 @@ impl LLMProvider for DatabricksProvider {
        let content = databricks_response
            .choices
            .first()
-            .and_then(|choice| choice.message.content.as_ref())
-            .cloned()
+            .and_then(|choice| {
+                choice.message.content.as_ref().map(|c| {
+                    // Handle both string and array formats
+                    if let Some(s) = c.as_str() {
+                        s.to_string()
+                    } else if let Some(arr) = c.as_array() {
+                        // Extract text from content blocks
+                        arr.iter()
+                            .filter_map(|block| block.get("text").and_then(|t| t.as_str()))
+                            .collect::<Vec<_>>()
+                            .join("")
+                    } else {
+                        String::new()
+                    }
+                })
+            })
            .unwrap_or_default();

        // Check if there are tool calls in the response
@@ -1037,6 +1072,11 @@ impl LLMProvider for DatabricksProvider {
        // This includes Claude, Llama, DBRX, and most other models on the platform
        true
    }
+    
+    fn supports_cache_control(&self) -> bool {
+        // Databricks supports cache control when using Anthropic models
+        self.model.contains("claude")
+    }
 }

 // Databricks API request/response structures
@@ -1064,10 +1104,29 @@ struct DatabricksFunction {
    parameters: serde_json::Value,
 }

+#[derive(Debug, Serialize, Deserialize)]
+struct DatabricksCacheControl {
+    #[serde(rename = "type")]
+    cache_type: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(untagged)]
+enum DatabricksContent {
+    Text {
+        #[serde(rename = "type")]
+        content_type: String,
+        text: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<DatabricksCacheControl>,
+    },
+}
+
 #[derive(Debug, Serialize, Deserialize)]
 struct DatabricksMessage {
    role: String,
-    content: Option<String>, // Make content optional since tool calls might not have content
+    #[serde(skip_serializing_if = "Option::is_none")]
+    content: Option<serde_json::Value>, // Can be string or array of content blocks
    #[serde(skip_serializing_if = "Option::is_none")]
    tool_calls: Option<Vec<DatabricksToolCall>>, // Add tool_calls field for responses
 }
@@ -1154,18 +1213,9 @@ mod tests {
        .unwrap();

        let messages = vec![
-            Message {
-                role: MessageRole::System,
-                content: "You are a helpful assistant.".to_string(),
-            },
-            Message {
-                role: MessageRole::User,
-                content: "Hello!".to_string(),
-            },
-            Message {
-                role: MessageRole::Assistant,
-                content: "Hi there!".to_string(),
-            },
+            Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+            Message::new(MessageRole::User, "Hello!".to_string()),
+            Message::new(MessageRole::Assistant, "Hi there!".to_string()),
        ];

        let databricks_messages = provider.convert_messages(&messages).unwrap();
@@ -1187,10 +1237,7 @@ mod tests {
        )
        .unwrap();

-        let messages = vec![Message {
-            role: MessageRole::User,
-            content: "Test message".to_string(),
-        }];
+        let messages = vec![Message::new(MessageRole::User, "Test message".to_string())];

        let request_body = provider
            .create_request_body(&messages, None, false, 1000, 0.5)
@@ -1273,4 +1320,53 @@ mod tests {
        assert!(llama_provider.has_native_tool_calling());
        assert!(dbrx_provider.has_native_tool_calling());
    }
+
+    #[test]
+    fn test_cache_control_serialization() {
+        let provider = DatabricksProvider::from_token(
+            "https://test.databricks.com".to_string(),
+            "test-token".to_string(),
+            "databricks-claude-sonnet-4".to_string(),
+            None,
+            None,
+        )
+        .unwrap();
+
+        // Test message WITHOUT cache_control - should use string format
+        let messages_without = vec![Message::new(MessageRole::User, "Hello".to_string())];
+        let databricks_messages_without = provider.convert_messages(&messages_without).unwrap();
+        let json_without = serde_json::to_string(&databricks_messages_without).unwrap();
+        
+        println!("JSON without cache_control: {}", json_without);
+        assert!(!json_without.contains("cache_control"), 
+                "JSON should not contain 'cache_control' field when not configured");
+
+        // Test message WITH cache_control - should use array format
+        let messages_with = vec![Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            crate::CacheControl::Ephemeral,
+        )];
+        let databricks_messages_with = provider.convert_messages(&messages_with).unwrap();
+        let json_with = serde_json::to_string(&databricks_messages_with).unwrap();
+        
+        println!("JSON with cache_control: {}", json_with);
+        assert!(json_with.contains("cache_control"), 
+                "JSON should contain 'cache_control' field when configured");
+        assert!(json_with.contains("ephemeral"), 
+                "JSON should contain 'ephemeral' type");
+        assert!(!json_with.contains("null"), 
+                "JSON should not contain null values");
+
+        // Verify the structure is correct
+        let msg_with = &databricks_messages_with[0];
+        if let Some(content) = &msg_with.content {
+            if let Some(arr) = content.as_array() {
+                assert_eq!(arr.len(), 1, "Content array should have one element");
+                assert!(arr[0].get("cache_control").is_some(), "Content should have cache_control");
+            } else {
+                panic!("Content should be an array when cache_control is present");
+            }
+        }
+    }
 }
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -21,6 +21,11 @@ pub trait LLMProvider: Send + Sync {
    fn has_native_tool_calling(&self) -> bool {
        false
    }
+    
+    /// Check if the provider supports cache control
+    fn supports_cache_control(&self) -> bool {
+        false
+    }
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -32,10 +37,22 @@ pub struct CompletionRequest {
    pub tools: Option<Vec<Tool>>,
 }

+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum CacheControl {
+    Ephemeral,
+    #[serde(rename = "5minute")]
+    FiveMinute,
+    #[serde(rename = "1hour")]
+    OneHour,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Message {
    pub role: MessageRole,
    pub content: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub cache_control: Option<CacheControl>,
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -95,6 +112,45 @@ pub use databricks::DatabricksProvider;
 pub use embedded::EmbeddedProvider;
 pub use openai::OpenAIProvider;

+impl Message {
+    /// Create a new message with optional cache control
+    pub fn new(role: MessageRole, content: String) -> Self {
+        Self {
+            role,
+            content,
+            cache_control: None,
+        }
+    }
+
+    /// Create a new message with cache control
+    pub fn with_cache_control(role: MessageRole, content: String, cache_control: CacheControl) -> Self {
+        Self {
+            role,
+            content,
+            cache_control: Some(cache_control),
+        }
+    }
+    
+    /// Create a message with cache control, with provider validation
+    pub fn with_cache_control_validated(
+        role: MessageRole, 
+        content: String, 
+        cache_control: CacheControl,
+        provider: &dyn LLMProvider
+    ) -> Self {
+        if !provider.supports_cache_control() {
+            tracing::warn!(
+                "Cache control requested for provider '{}' which does not support it. \
+                Cache control is only supported by Anthropic and Anthropic via Databricks.",
+                provider.name()
+            );
+            return Self::new(role, content);
+        }
+        
+        Self::with_cache_control(role, content, cache_control)
+    }
+}
+
 /// Provider registry for managing multiple LLM providers
 pub struct ProviderRegistry {
    providers: HashMap<String, Box<dyn LLMProvider>>,
@@ -144,3 +200,36 @@ impl Default for ProviderRegistry {
        Self::new()
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_message_serialization_without_cache_control() {
+        let msg = Message::new(MessageRole::User, "Hello".to_string());
+        let json = serde_json::to_string(&msg).unwrap();
+        
+        println!("Message JSON without cache_control: {}", json);
+        assert!(!json.contains("cache_control"), 
+                "JSON should not contain 'cache_control' field when not configured");
+    }
+
+    #[test]
+    fn test_message_serialization_with_cache_control() {
+        let msg = Message::with_cache_control(
+            MessageRole::User,
+            "Hello".to_string(),
+            CacheControl::Ephemeral,
+        );
+        let json = serde_json::to_string(&msg).unwrap();
+        
+        println!("Message JSON with cache_control: {}", json);
+        assert!(json.contains("cache_control"), 
+                "JSON should contain 'cache_control' field when configured");
+        assert!(json.contains("ephemeral"), 
+                "JSON should contain 'ephemeral' value");
+        assert!(!json.contains("null"), 
+                "JSON should not contain null values");
+    }
+}