From fb2cf6f898d81d6556840d60057fc3f41855788f Mon Sep 17 00:00:00 2001
From: Jochen <jochenx@bekmann.com>
Date: Tue, 9 Dec 2025 12:41:52 +1100
Subject: [PATCH] fix for thinking budget and hardcoded max token on summary

---
 crates/g3-core/src/lib.rs            | 24 ++++++++--
 crates/g3-providers/src/anthropic.rs | 67 +++++++++++++++++++++++++---
 2 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index f95746d..08a4b7d 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1475,12 +1475,16 @@ impl<W: UiWriter> Agent<W> {
         let model_limit = self.context_window.total_tokens;
         let current_usage = self.context_window.used_tokens;
         
+        // Get the configured max_tokens for this provider
+        let configured_max_tokens = self.resolve_max_tokens(provider_name);
+        
         // Calculate available tokens with buffer
         let buffer = (model_limit / 40).clamp(1000, 10000); // 2.5% buffer
         let available = model_limit
             .saturating_sub(current_usage)
             .saturating_sub(buffer);
-        let proposed_max_tokens = available.min(10_000);
+        // Use the smaller of available tokens or configured max_tokens
+        let proposed_max_tokens = available.min(configured_max_tokens);
 
         // Validate against thinking budget constraint
         let (adjusted, needs_reduction) = self.preflight_validate_max_tokens(provider_name, proposed_max_tokens);
@@ -2423,8 +2427,15 @@ impl<W: UiWriter> Agent<W> {
         let mut summary_max_tokens = self.apply_summary_fallback_sequence(&provider_name);
 
         // Apply provider-specific caps
+        // For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens
+        // So we set a higher cap when thinking is configured
+        let anthropic_cap = match self.get_thinking_budget_tokens() {
+            Some(budget) => (budget + 2000).max(10_000), // At least budget + 2000 for response
+            None => 10_000,
+        };
         summary_max_tokens = match provider_name.as_str() {
-            "databricks" | "anthropic" => summary_max_tokens.min(10_000),
+            "anthropic" => summary_max_tokens.min(anthropic_cap),
+            "databricks" => summary_max_tokens.min(10_000),
             "embedded" => summary_max_tokens.min(3000),
             _ => summary_max_tokens.min(5000),
         };
@@ -3436,8 +3447,15 @@ impl<W: UiWriter> Agent<W> {
                 let mut summary_max_tokens = self.apply_summary_fallback_sequence(&provider_name);
 
                 // Apply provider-specific caps
+                // For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens
+                // So we set a higher cap when thinking is configured
+                let anthropic_cap = match self.get_thinking_budget_tokens() {
+                    Some(budget) => (budget + 2000).max(10_000), // At least budget + 2000 for response
+                    None => 10_000,
+                };
                 summary_max_tokens = match provider_name.as_str() {
-                    "databricks" | "anthropic" => summary_max_tokens.min(10_000),
+                    "anthropic" => summary_max_tokens.min(anthropic_cap),
+                    "databricks" => summary_max_tokens.min(10_000),
                     "embedded" => summary_max_tokens.min(3000),
                     _ => summary_max_tokens.min(5000),
                 };
diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs
index c2dc052..b2d63c7 100644
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -284,9 +284,21 @@ impl AnthropicProvider {
         // Convert tools if provided
         let anthropic_tools = tools.map(|t| self.convert_tools(t));
 
-        // Add thinking configuration if budget_tokens is set
-        let thinking = self.thinking_budget_tokens.map(|budget| {
-            ThinkingConfig::enabled(budget)
+        // Add thinking configuration if budget_tokens is set AND max_tokens is sufficient
+        // Anthropic requires: max_tokens > thinking.budget_tokens
+        // We add 1024 as minimum buffer for actual response content
+        let thinking = self.thinking_budget_tokens.and_then(|budget| {
+            let min_required = budget + 1024;
+            if max_tokens > min_required {
+                Some(ThinkingConfig::enabled(budget))
+            } else {
+                tracing::warn!(
+                    "Disabling thinking mode: max_tokens ({}) is not greater than thinking.budget_tokens ({}) + 1024 buffer. \
+                     Required: max_tokens > {}",
+                    max_tokens, budget, min_required
+                );
+                None
+            }
         });
 
         let request = AnthropicRequest {
@@ -847,6 +859,12 @@ enum AnthropicContent {
         #[serde(skip_serializing_if = "Option::is_none")]
         cache_control: Option<crate::CacheControl>,
     },
+    #[serde(rename = "thinking")]
+    Thinking {
+        thinking: String,
+        #[serde(default)]
+        signature: Option<String>,
+    },
     #[serde(rename = "tool_use")]
     ToolUse {
         id: String,
@@ -1058,11 +1076,12 @@ mod tests {
         let json_without = serde_json::to_string(&request_without).unwrap();
         assert!(!json_without.contains("thinking"), "JSON should not contain 'thinking' field when not configured");
 
-        // Test WITH thinking parameter
+        // Test WITH thinking parameter - max_tokens must be > budget_tokens + 1024
+        // Using budget=10000 requires max_tokens > 11024
         let provider_with = AnthropicProvider::new(
             "test-key".to_string(),
             Some("claude-sonnet-4-5".to_string()),
-            Some(1000),
+            Some(20000),  // Sufficient for thinking budget
             Some(0.5),
             None,
             None,
@@ -1071,11 +1090,47 @@ mod tests {
         .unwrap();
 
         let request_with = provider_with
-            .create_request_body(&messages, None, false, 1000, 0.5)
+            .create_request_body(&messages, None, false, 20000, 0.5)
             .unwrap();
         let json_with = serde_json::to_string(&request_with).unwrap();
         assert!(json_with.contains("thinking"), "JSON should contain 'thinking' field when configured");
         assert!(json_with.contains("\"type\":\"enabled\""), "JSON should contain type: enabled");
         assert!(json_with.contains("\"budget_tokens\":10000"), "JSON should contain budget_tokens: 10000");
+
+        // Test WITH thinking parameter but INSUFFICIENT max_tokens - thinking should be disabled
+        let request_insufficient = provider_with
+            .create_request_body(&messages, None, false, 5000, 0.5)  // Less than budget + 1024
+            .unwrap();
+        let json_insufficient = serde_json::to_string(&request_insufficient).unwrap();
+        assert!(!json_insufficient.contains("thinking"), "JSON should NOT contain 'thinking' field when max_tokens is insufficient");
+    }
+
+    #[test]
+    fn test_thinking_content_block_deserialization() {
+        // Test that we can deserialize a response containing a "thinking" content block
+        // This is what Anthropic returns when extended thinking is enabled
+        let json_response = r#"{
+            "content": [
+                {"type": "thinking", "thinking": "Let me analyze this...", "signature": "abc123"},
+                {"type": "text", "text": "Here is my response."}
+            ],
+            "model": "claude-sonnet-4-5",
+            "usage": {"input_tokens": 100, "output_tokens": 50}
+        }"#;
+
+        let response: AnthropicResponse = serde_json::from_str(json_response)
+            .expect("Should be able to deserialize response with thinking block");
+        
+        assert_eq!(response.content.len(), 2);
+        assert_eq!(response.model, "claude-sonnet-4-5");
+        
+        // Extract only text content (thinking should be filtered out)
+        let text_content: Vec<_> = response.content.iter().filter_map(|c| match c {
+            AnthropicContent::Text { text, .. } => Some(text.as_str()),
+            _ => None,
+        }).collect();
+        
+        assert_eq!(text_content.len(), 1);
+        assert_eq!(text_content[0], "Here is my response.");
     }
 }