From 01cb4f6691734bec2f083e1d6bd6828e494ae586 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Fri, 16 Jan 2026 07:05:57 +0530 Subject: [PATCH] fix: use consistent max_tokens defaults across providers - Fix aliasing issue where resolve_max_tokens() used fallback_default_max_tokens (8192) instead of provider-specific defaults - Update fallback_default_max_tokens from 8192 to 32000 - Set provider-specific max_tokens defaults: - Anthropic: 32000 - OpenAI: 32000 (was 16000) - Databricks: 32000 (was 50000, now matches Anthropic as passthru) - Embedded: 2048 - Context window lengths unchanged: - OpenAI: 400,000 - Anthropic: 200,000 - Databricks (Claude): 200,000 This fixes the 'LLM response was cut off due to max_tokens limit' error in agent mode that occurred because 8192 was being used instead of 32000. --- crates/g3-config/src/lib.rs | 6 +++--- crates/g3-config/src/tests.rs | 12 ++++++------ crates/g3-core/src/provider_config.rs | 16 +++++++++------- crates/g3-providers/src/databricks.rs | 8 ++++---- crates/g3-providers/src/openai.rs | 2 +- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs index a261a80..c57e727 100644 --- a/crates/g3-config/src/lib.rs +++ b/crates/g3-config/src/lib.rs @@ -112,7 +112,7 @@ pub struct AgentConfig { } fn default_fallback_max_tokens() -> usize { - 8192 + 32000 } fn default_true() -> bool { true @@ -185,7 +185,7 @@ impl Default for AgentConfig { fn default() -> Self { Self { max_context_length: None, - fallback_default_max_tokens: 8192, + fallback_default_max_tokens: 32000, enable_streaming: true, timeout_seconds: 120, auto_compact: true, @@ -234,7 +234,7 @@ impl Default for Config { }, agent: AgentConfig { max_context_length: None, - fallback_default_max_tokens: 8192, + fallback_default_max_tokens: 32000, enable_streaming: true, timeout_seconds: 60, auto_compact: true, diff --git a/crates/g3-config/src/tests.rs b/crates/g3-config/src/tests.rs index a725e8d..e6d0c05 100644 --- a/crates/g3-config/src/tests.rs +++ b/crates/g3-config/src/tests.rs @@ -44,7 +44,7 @@ model_path = "test.gguf" model_type = "llama" [agent] -fallback_default_max_tokens = 8192 +fallback_default_max_tokens = 32000 enable_streaming = true timeout_seconds = 60 auto_compact = true @@ -88,7 +88,7 @@ token = "test-token" model = "test-model" [agent] -fallback_default_max_tokens = 8192 +fallback_default_max_tokens = 32000 enable_streaming = true timeout_seconds = 60 auto_compact = true @@ -132,7 +132,7 @@ token = "test-token" model = "test-model" [agent] -fallback_default_max_tokens = 8192 +fallback_default_max_tokens = 32000 enable_streaming = true timeout_seconds = 60 auto_compact = true @@ -169,7 +169,7 @@ api_key = "test-key" model = "claude-3" [agent] -fallback_default_max_tokens = 8192 +fallback_default_max_tokens = 32000 enable_streaming = true timeout_seconds = 60 auto_compact = true @@ -210,7 +210,7 @@ model = "claude-opus" thinking_budget_tokens = 16000 [agent] -fallback_default_max_tokens = 8192 +fallback_default_max_tokens = 32000 enable_streaming = true timeout_seconds = 60 auto_compact = true @@ -248,7 +248,7 @@ token = "test-token" model = "test-model" [agent] -fallback_default_max_tokens = 8192 +fallback_default_max_tokens = 32000 enable_streaming = true timeout_seconds = 60 auto_compact = true diff --git a/crates/g3-core/src/provider_config.rs b/crates/g3-core/src/provider_config.rs index 939147a..4ee4550 100644 --- a/crates/g3-core/src/provider_config.rs +++ b/crates/g3-core/src/provider_config.rs @@ -66,14 +66,16 @@ pub fn get_thinking_budget_tokens(config: &Config, provider_name: &str) -> Optio pub fn resolve_max_tokens(config: &Config, provider_name: &str) -> u32 { let (provider_type, _) = parse_provider_ref(provider_name); - let base = match provider_type { - "databricks" => get_max_tokens(config, provider_name) - .or(Some(config.agent.fallback_default_max_tokens as u32)) - .unwrap_or(32000), - _ => get_max_tokens(config, provider_name) - .or(Some(config.agent.fallback_default_max_tokens as u32)) - .unwrap_or(16000), + // Use provider-specific defaults that match the provider implementations + // These defaults should match what the providers use internally + let provider_default = match provider_type { + "anthropic" => 32000, // Anthropic provider defaults to 32768, we use 32000 + "databricks" => 32000, // Databricks is passthru to Anthropic, match its defaults + "openai" => 32000, // OpenAI models support large outputs + "embedded" => 2048, // Embedded provider defaults to 2048 + _ => 16000, // Generic fallback }; + let base = get_max_tokens(config, provider_name).unwrap_or(provider_default); // For Anthropic with thinking enabled, ensure max_tokens is sufficient // Anthropic requires: max_tokens > thinking.budget_tokens diff --git a/crates/g3-providers/src/databricks.rs b/crates/g3-providers/src/databricks.rs index db0dbcc..c309a78 100644 --- a/crates/g3-providers/src/databricks.rs +++ b/crates/g3-providers/src/databricks.rs @@ -227,7 +227,7 @@ impl DatabricksProvider { host: host.trim_end_matches('/').to_string(), auth: DatabricksAuth::token(token), model, - max_tokens: max_tokens.unwrap_or(50000), + max_tokens: max_tokens.unwrap_or(32000), temperature: temperature.unwrap_or(0.1), }) } @@ -254,7 +254,7 @@ impl DatabricksProvider { host: host.trim_end_matches('/').to_string(), auth: DatabricksAuth::token(token), model, - max_tokens: max_tokens.unwrap_or(50000), + max_tokens: max_tokens.unwrap_or(32000), temperature: temperature.unwrap_or(0.1), }) } @@ -281,7 +281,7 @@ impl DatabricksProvider { host: host.trim_end_matches('/').to_string(), auth: DatabricksAuth::oauth(host.clone()), model, - max_tokens: max_tokens.unwrap_or(50000), + max_tokens: max_tokens.unwrap_or(32000), temperature: temperature.unwrap_or(0.1), }) } @@ -307,7 +307,7 @@ impl DatabricksProvider { host: host.trim_end_matches('/').to_string(), auth: DatabricksAuth::oauth(host.clone()), model, - max_tokens: max_tokens.unwrap_or(50000), + max_tokens: max_tokens.unwrap_or(32000), temperature: temperature.unwrap_or(0.1), }) } diff --git a/crates/g3-providers/src/openai.rs b/crates/g3-providers/src/openai.rs index c95471f..a60b333 100644 --- a/crates/g3-providers/src/openai.rs +++ b/crates/g3-providers/src/openai.rs @@ -393,7 +393,7 @@ impl LLMProvider for OpenAIProvider { } fn max_tokens(&self) -> u32 { - self.max_tokens.unwrap_or(16000) + self.max_tokens.unwrap_or(32000) } fn temperature(&self) -> f32 {