From 01cb4f6691734bec2f083e1d6bd6828e494ae586 Mon Sep 17 00:00:00 2001
From: "Dhanji R. Prasanna" <d@wideplay.com>
Date: Fri, 16 Jan 2026 07:05:57 +0530
Subject: [PATCH] fix: use consistent max_tokens defaults across providers

- Fix aliasing issue where resolve_max_tokens() used fallback_default_max_tokens
  (8192) instead of provider-specific defaults
- Update fallback_default_max_tokens from 8192 to 32000
- Set provider-specific max_tokens defaults:
  - Anthropic: 32000
  - OpenAI: 32000 (was 16000)
  - Databricks: 32000 (was 50000, now matches Anthropic as passthru)
  - Embedded: 2048
- Context window lengths unchanged:
  - OpenAI: 400,000
  - Anthropic: 200,000
  - Databricks (Claude): 200,000

This fixes the 'LLM response was cut off due to max_tokens limit' error
in agent mode that occurred because 8192 was being used instead of 32000.
---
 crates/g3-config/src/lib.rs           |  6 +++---
 crates/g3-config/src/tests.rs         | 12 ++++++------
 crates/g3-core/src/provider_config.rs | 16 +++++++++-------
 crates/g3-providers/src/databricks.rs |  8 ++++----
 crates/g3-providers/src/openai.rs     |  2 +-
 5 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs
index a261a80..c57e727 100644
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -112,7 +112,7 @@ pub struct AgentConfig {
 }
 
 fn default_fallback_max_tokens() -> usize {
-    8192
+    32000
 }
 fn default_true() -> bool {
     true
@@ -185,7 +185,7 @@ impl Default for AgentConfig {
     fn default() -> Self {
         Self {
             max_context_length: None,
-            fallback_default_max_tokens: 8192,
+            fallback_default_max_tokens: 32000,
             enable_streaming: true,
             timeout_seconds: 120,
             auto_compact: true,
@@ -234,7 +234,7 @@ impl Default for Config {
             },
             agent: AgentConfig {
                 max_context_length: None,
-                fallback_default_max_tokens: 8192,
+                fallback_default_max_tokens: 32000,
                 enable_streaming: true,
                 timeout_seconds: 60,
                 auto_compact: true,
diff --git a/crates/g3-config/src/tests.rs b/crates/g3-config/src/tests.rs
index a725e8d..e6d0c05 100644
--- a/crates/g3-config/src/tests.rs
+++ b/crates/g3-config/src/tests.rs
@@ -44,7 +44,7 @@ model_path = "test.gguf"
 model_type = "llama"
 
 [agent]
-fallback_default_max_tokens = 8192
+fallback_default_max_tokens = 32000
 enable_streaming = true
 timeout_seconds = 60
 auto_compact = true
@@ -88,7 +88,7 @@ token = "test-token"
 model = "test-model"
 
 [agent]
-fallback_default_max_tokens = 8192
+fallback_default_max_tokens = 32000
 enable_streaming = true
 timeout_seconds = 60
 auto_compact = true
@@ -132,7 +132,7 @@ token = "test-token"
 model = "test-model"
 
 [agent]
-fallback_default_max_tokens = 8192
+fallback_default_max_tokens = 32000
 enable_streaming = true
 timeout_seconds = 60
 auto_compact = true
@@ -169,7 +169,7 @@ api_key = "test-key"
 model = "claude-3"
 
 [agent]
-fallback_default_max_tokens = 8192
+fallback_default_max_tokens = 32000
 enable_streaming = true
 timeout_seconds = 60
 auto_compact = true
@@ -210,7 +210,7 @@ model = "claude-opus"
 thinking_budget_tokens = 16000
 
 [agent]
-fallback_default_max_tokens = 8192
+fallback_default_max_tokens = 32000
 enable_streaming = true
 timeout_seconds = 60
 auto_compact = true
@@ -248,7 +248,7 @@ token = "test-token"
 model = "test-model"
 
 [agent]
-fallback_default_max_tokens = 8192
+fallback_default_max_tokens = 32000
 enable_streaming = true
 timeout_seconds = 60
 auto_compact = true
diff --git a/crates/g3-core/src/provider_config.rs b/crates/g3-core/src/provider_config.rs
index 939147a..4ee4550 100644
--- a/crates/g3-core/src/provider_config.rs
+++ b/crates/g3-core/src/provider_config.rs
@@ -66,14 +66,16 @@ pub fn get_thinking_budget_tokens(config: &Config, provider_name: &str) -> Optio
 pub fn resolve_max_tokens(config: &Config, provider_name: &str) -> u32 {
     let (provider_type, _) = parse_provider_ref(provider_name);
     
-    let base = match provider_type {
-        "databricks" => get_max_tokens(config, provider_name)
-            .or(Some(config.agent.fallback_default_max_tokens as u32))
-            .unwrap_or(32000),
-        _ => get_max_tokens(config, provider_name)
-            .or(Some(config.agent.fallback_default_max_tokens as u32))
-            .unwrap_or(16000),
+    // Use provider-specific defaults that match the provider implementations
+    // These defaults should match what the providers use internally
+    let provider_default = match provider_type {
+        "anthropic" => 32000,   // Anthropic provider defaults to 32768, we use 32000
+        "databricks" => 32000,  // Databricks is passthru to Anthropic, match its defaults
+        "openai" => 32000,      // OpenAI models support large outputs
+        "embedded" => 2048,     // Embedded provider defaults to 2048
+        _ => 16000,             // Generic fallback
     };
+    let base = get_max_tokens(config, provider_name).unwrap_or(provider_default);
     
     // For Anthropic with thinking enabled, ensure max_tokens is sufficient
     // Anthropic requires: max_tokens > thinking.budget_tokens
diff --git a/crates/g3-providers/src/databricks.rs b/crates/g3-providers/src/databricks.rs
index db0dbcc..c309a78 100644
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -227,7 +227,7 @@ impl DatabricksProvider {
             host: host.trim_end_matches('/').to_string(),
             auth: DatabricksAuth::token(token),
             model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
             temperature: temperature.unwrap_or(0.1),
         })
     }
@@ -254,7 +254,7 @@ impl DatabricksProvider {
             host: host.trim_end_matches('/').to_string(),
             auth: DatabricksAuth::token(token),
             model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
             temperature: temperature.unwrap_or(0.1),
         })
     }
@@ -281,7 +281,7 @@ impl DatabricksProvider {
             host: host.trim_end_matches('/').to_string(),
             auth: DatabricksAuth::oauth(host.clone()),
             model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
             temperature: temperature.unwrap_or(0.1),
         })
     }
@@ -307,7 +307,7 @@ impl DatabricksProvider {
             host: host.trim_end_matches('/').to_string(),
             auth: DatabricksAuth::oauth(host.clone()),
             model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
             temperature: temperature.unwrap_or(0.1),
         })
     }
diff --git a/crates/g3-providers/src/openai.rs b/crates/g3-providers/src/openai.rs
index c95471f..a60b333 100644
--- a/crates/g3-providers/src/openai.rs
+++ b/crates/g3-providers/src/openai.rs
@@ -393,7 +393,7 @@ impl LLMProvider for OpenAIProvider {
     }
 
     fn max_tokens(&self) -> u32 {
-        self.max_tokens.unwrap_or(16000)
+        self.max_tokens.unwrap_or(32000)
     }
 
     fn temperature(&self) -> f32 {