fix: use consistent max_tokens defaults across providers

- Fix aliasing issue where resolve_max_tokens() used fallback_default_max_tokens
  (8192) instead of provider-specific defaults
- Update fallback_default_max_tokens from 8192 to 32000
- Set provider-specific max_tokens defaults:
  - Anthropic: 32000
  - OpenAI: 32000 (was 16000)
  - Databricks: 32000 (was 50000, now matches Anthropic as passthru)
  - Embedded: 2048
- Context window lengths unchanged:
  - OpenAI: 400,000
  - Anthropic: 200,000
  - Databricks (Claude): 200,000

This fixes the 'LLM response was cut off due to max_tokens limit' error
in agent mode that occurred because 8192 was being used instead of 32000.
This commit is contained in:
Dhanji R. Prasanna
2026-01-16 07:05:57 +05:30
parent 65e0217c68
commit 01cb4f6691
5 changed files with 23 additions and 21 deletions

View File

@@ -112,7 +112,7 @@ pub struct AgentConfig {
} }
fn default_fallback_max_tokens() -> usize { fn default_fallback_max_tokens() -> usize {
8192 32000
} }
fn default_true() -> bool { fn default_true() -> bool {
true true
@@ -185,7 +185,7 @@ impl Default for AgentConfig {
fn default() -> Self { fn default() -> Self {
Self { Self {
max_context_length: None, max_context_length: None,
fallback_default_max_tokens: 8192, fallback_default_max_tokens: 32000,
enable_streaming: true, enable_streaming: true,
timeout_seconds: 120, timeout_seconds: 120,
auto_compact: true, auto_compact: true,
@@ -234,7 +234,7 @@ impl Default for Config {
}, },
agent: AgentConfig { agent: AgentConfig {
max_context_length: None, max_context_length: None,
fallback_default_max_tokens: 8192, fallback_default_max_tokens: 32000,
enable_streaming: true, enable_streaming: true,
timeout_seconds: 60, timeout_seconds: 60,
auto_compact: true, auto_compact: true,

View File

@@ -44,7 +44,7 @@ model_path = "test.gguf"
model_type = "llama" model_type = "llama"
[agent] [agent]
fallback_default_max_tokens = 8192 fallback_default_max_tokens = 32000
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
@@ -88,7 +88,7 @@ token = "test-token"
model = "test-model" model = "test-model"
[agent] [agent]
fallback_default_max_tokens = 8192 fallback_default_max_tokens = 32000
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
@@ -132,7 +132,7 @@ token = "test-token"
model = "test-model" model = "test-model"
[agent] [agent]
fallback_default_max_tokens = 8192 fallback_default_max_tokens = 32000
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
@@ -169,7 +169,7 @@ api_key = "test-key"
model = "claude-3" model = "claude-3"
[agent] [agent]
fallback_default_max_tokens = 8192 fallback_default_max_tokens = 32000
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
@@ -210,7 +210,7 @@ model = "claude-opus"
thinking_budget_tokens = 16000 thinking_budget_tokens = 16000
[agent] [agent]
fallback_default_max_tokens = 8192 fallback_default_max_tokens = 32000
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
@@ -248,7 +248,7 @@ token = "test-token"
model = "test-model" model = "test-model"
[agent] [agent]
fallback_default_max_tokens = 8192 fallback_default_max_tokens = 32000
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true

View File

@@ -66,14 +66,16 @@ pub fn get_thinking_budget_tokens(config: &Config, provider_name: &str) -> Optio
pub fn resolve_max_tokens(config: &Config, provider_name: &str) -> u32 { pub fn resolve_max_tokens(config: &Config, provider_name: &str) -> u32 {
let (provider_type, _) = parse_provider_ref(provider_name); let (provider_type, _) = parse_provider_ref(provider_name);
let base = match provider_type { // Use provider-specific defaults that match the provider implementations
"databricks" => get_max_tokens(config, provider_name) // These defaults should match what the providers use internally
.or(Some(config.agent.fallback_default_max_tokens as u32)) let provider_default = match provider_type {
.unwrap_or(32000), "anthropic" => 32000, // Anthropic provider defaults to 32768, we use 32000
_ => get_max_tokens(config, provider_name) "databricks" => 32000, // Databricks is passthru to Anthropic, match its defaults
.or(Some(config.agent.fallback_default_max_tokens as u32)) "openai" => 32000, // OpenAI models support large outputs
.unwrap_or(16000), "embedded" => 2048, // Embedded provider defaults to 2048
_ => 16000, // Generic fallback
}; };
let base = get_max_tokens(config, provider_name).unwrap_or(provider_default);
// For Anthropic with thinking enabled, ensure max_tokens is sufficient // For Anthropic with thinking enabled, ensure max_tokens is sufficient
// Anthropic requires: max_tokens > thinking.budget_tokens // Anthropic requires: max_tokens > thinking.budget_tokens

View File

@@ -227,7 +227,7 @@ impl DatabricksProvider {
host: host.trim_end_matches('/').to_string(), host: host.trim_end_matches('/').to_string(),
auth: DatabricksAuth::token(token), auth: DatabricksAuth::token(token),
model, model,
max_tokens: max_tokens.unwrap_or(50000), max_tokens: max_tokens.unwrap_or(32000),
temperature: temperature.unwrap_or(0.1), temperature: temperature.unwrap_or(0.1),
}) })
} }
@@ -254,7 +254,7 @@ impl DatabricksProvider {
host: host.trim_end_matches('/').to_string(), host: host.trim_end_matches('/').to_string(),
auth: DatabricksAuth::token(token), auth: DatabricksAuth::token(token),
model, model,
max_tokens: max_tokens.unwrap_or(50000), max_tokens: max_tokens.unwrap_or(32000),
temperature: temperature.unwrap_or(0.1), temperature: temperature.unwrap_or(0.1),
}) })
} }
@@ -281,7 +281,7 @@ impl DatabricksProvider {
host: host.trim_end_matches('/').to_string(), host: host.trim_end_matches('/').to_string(),
auth: DatabricksAuth::oauth(host.clone()), auth: DatabricksAuth::oauth(host.clone()),
model, model,
max_tokens: max_tokens.unwrap_or(50000), max_tokens: max_tokens.unwrap_or(32000),
temperature: temperature.unwrap_or(0.1), temperature: temperature.unwrap_or(0.1),
}) })
} }
@@ -307,7 +307,7 @@ impl DatabricksProvider {
host: host.trim_end_matches('/').to_string(), host: host.trim_end_matches('/').to_string(),
auth: DatabricksAuth::oauth(host.clone()), auth: DatabricksAuth::oauth(host.clone()),
model, model,
max_tokens: max_tokens.unwrap_or(50000), max_tokens: max_tokens.unwrap_or(32000),
temperature: temperature.unwrap_or(0.1), temperature: temperature.unwrap_or(0.1),
}) })
} }

View File

@@ -393,7 +393,7 @@ impl LLMProvider for OpenAIProvider {
} }
fn max_tokens(&self) -> u32 { fn max_tokens(&self) -> u32 {
self.max_tokens.unwrap_or(16000) self.max_tokens.unwrap_or(32000)
} }
fn temperature(&self) -> f32 { fn temperature(&self) -> f32 {