fix: use consistent max_tokens defaults across providers
- Fix aliasing issue where resolve_max_tokens() used fallback_default_max_tokens (8192) instead of provider-specific defaults - Update fallback_default_max_tokens from 8192 to 32000 - Set provider-specific max_tokens defaults: - Anthropic: 32000 - OpenAI: 32000 (was 16000) - Databricks: 32000 (was 50000, now matches Anthropic as passthru) - Embedded: 2048 - Context window lengths unchanged: - OpenAI: 400,000 - Anthropic: 200,000 - Databricks (Claude): 200,000 This fixes the 'LLM response was cut off due to max_tokens limit' error in agent mode that occurred because 8192 was being used instead of 32000.
This commit is contained in:
@@ -112,7 +112,7 @@ pub struct AgentConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn default_fallback_max_tokens() -> usize {
|
fn default_fallback_max_tokens() -> usize {
|
||||||
8192
|
32000
|
||||||
}
|
}
|
||||||
fn default_true() -> bool {
|
fn default_true() -> bool {
|
||||||
true
|
true
|
||||||
@@ -185,7 +185,7 @@ impl Default for AgentConfig {
|
|||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
max_context_length: None,
|
max_context_length: None,
|
||||||
fallback_default_max_tokens: 8192,
|
fallback_default_max_tokens: 32000,
|
||||||
enable_streaming: true,
|
enable_streaming: true,
|
||||||
timeout_seconds: 120,
|
timeout_seconds: 120,
|
||||||
auto_compact: true,
|
auto_compact: true,
|
||||||
@@ -234,7 +234,7 @@ impl Default for Config {
|
|||||||
},
|
},
|
||||||
agent: AgentConfig {
|
agent: AgentConfig {
|
||||||
max_context_length: None,
|
max_context_length: None,
|
||||||
fallback_default_max_tokens: 8192,
|
fallback_default_max_tokens: 32000,
|
||||||
enable_streaming: true,
|
enable_streaming: true,
|
||||||
timeout_seconds: 60,
|
timeout_seconds: 60,
|
||||||
auto_compact: true,
|
auto_compact: true,
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ model_path = "test.gguf"
|
|||||||
model_type = "llama"
|
model_type = "llama"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
fallback_default_max_tokens = 8192
|
fallback_default_max_tokens = 32000
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
@@ -88,7 +88,7 @@ token = "test-token"
|
|||||||
model = "test-model"
|
model = "test-model"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
fallback_default_max_tokens = 8192
|
fallback_default_max_tokens = 32000
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
@@ -132,7 +132,7 @@ token = "test-token"
|
|||||||
model = "test-model"
|
model = "test-model"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
fallback_default_max_tokens = 8192
|
fallback_default_max_tokens = 32000
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
@@ -169,7 +169,7 @@ api_key = "test-key"
|
|||||||
model = "claude-3"
|
model = "claude-3"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
fallback_default_max_tokens = 8192
|
fallback_default_max_tokens = 32000
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
@@ -210,7 +210,7 @@ model = "claude-opus"
|
|||||||
thinking_budget_tokens = 16000
|
thinking_budget_tokens = 16000
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
fallback_default_max_tokens = 8192
|
fallback_default_max_tokens = 32000
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
@@ -248,7 +248,7 @@ token = "test-token"
|
|||||||
model = "test-model"
|
model = "test-model"
|
||||||
|
|
||||||
[agent]
|
[agent]
|
||||||
fallback_default_max_tokens = 8192
|
fallback_default_max_tokens = 32000
|
||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
|
|||||||
@@ -66,14 +66,16 @@ pub fn get_thinking_budget_tokens(config: &Config, provider_name: &str) -> Optio
|
|||||||
pub fn resolve_max_tokens(config: &Config, provider_name: &str) -> u32 {
|
pub fn resolve_max_tokens(config: &Config, provider_name: &str) -> u32 {
|
||||||
let (provider_type, _) = parse_provider_ref(provider_name);
|
let (provider_type, _) = parse_provider_ref(provider_name);
|
||||||
|
|
||||||
let base = match provider_type {
|
// Use provider-specific defaults that match the provider implementations
|
||||||
"databricks" => get_max_tokens(config, provider_name)
|
// These defaults should match what the providers use internally
|
||||||
.or(Some(config.agent.fallback_default_max_tokens as u32))
|
let provider_default = match provider_type {
|
||||||
.unwrap_or(32000),
|
"anthropic" => 32000, // Anthropic provider defaults to 32768, we use 32000
|
||||||
_ => get_max_tokens(config, provider_name)
|
"databricks" => 32000, // Databricks is passthru to Anthropic, match its defaults
|
||||||
.or(Some(config.agent.fallback_default_max_tokens as u32))
|
"openai" => 32000, // OpenAI models support large outputs
|
||||||
.unwrap_or(16000),
|
"embedded" => 2048, // Embedded provider defaults to 2048
|
||||||
|
_ => 16000, // Generic fallback
|
||||||
};
|
};
|
||||||
|
let base = get_max_tokens(config, provider_name).unwrap_or(provider_default);
|
||||||
|
|
||||||
// For Anthropic with thinking enabled, ensure max_tokens is sufficient
|
// For Anthropic with thinking enabled, ensure max_tokens is sufficient
|
||||||
// Anthropic requires: max_tokens > thinking.budget_tokens
|
// Anthropic requires: max_tokens > thinking.budget_tokens
|
||||||
|
|||||||
@@ -227,7 +227,7 @@ impl DatabricksProvider {
|
|||||||
host: host.trim_end_matches('/').to_string(),
|
host: host.trim_end_matches('/').to_string(),
|
||||||
auth: DatabricksAuth::token(token),
|
auth: DatabricksAuth::token(token),
|
||||||
model,
|
model,
|
||||||
max_tokens: max_tokens.unwrap_or(50000),
|
max_tokens: max_tokens.unwrap_or(32000),
|
||||||
temperature: temperature.unwrap_or(0.1),
|
temperature: temperature.unwrap_or(0.1),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -254,7 +254,7 @@ impl DatabricksProvider {
|
|||||||
host: host.trim_end_matches('/').to_string(),
|
host: host.trim_end_matches('/').to_string(),
|
||||||
auth: DatabricksAuth::token(token),
|
auth: DatabricksAuth::token(token),
|
||||||
model,
|
model,
|
||||||
max_tokens: max_tokens.unwrap_or(50000),
|
max_tokens: max_tokens.unwrap_or(32000),
|
||||||
temperature: temperature.unwrap_or(0.1),
|
temperature: temperature.unwrap_or(0.1),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -281,7 +281,7 @@ impl DatabricksProvider {
|
|||||||
host: host.trim_end_matches('/').to_string(),
|
host: host.trim_end_matches('/').to_string(),
|
||||||
auth: DatabricksAuth::oauth(host.clone()),
|
auth: DatabricksAuth::oauth(host.clone()),
|
||||||
model,
|
model,
|
||||||
max_tokens: max_tokens.unwrap_or(50000),
|
max_tokens: max_tokens.unwrap_or(32000),
|
||||||
temperature: temperature.unwrap_or(0.1),
|
temperature: temperature.unwrap_or(0.1),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@@ -307,7 +307,7 @@ impl DatabricksProvider {
|
|||||||
host: host.trim_end_matches('/').to_string(),
|
host: host.trim_end_matches('/').to_string(),
|
||||||
auth: DatabricksAuth::oauth(host.clone()),
|
auth: DatabricksAuth::oauth(host.clone()),
|
||||||
model,
|
model,
|
||||||
max_tokens: max_tokens.unwrap_or(50000),
|
max_tokens: max_tokens.unwrap_or(32000),
|
||||||
temperature: temperature.unwrap_or(0.1),
|
temperature: temperature.unwrap_or(0.1),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -393,7 +393,7 @@ impl LLMProvider for OpenAIProvider {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn max_tokens(&self) -> u32 {
|
fn max_tokens(&self) -> u32 {
|
||||||
self.max_tokens.unwrap_or(16000)
|
self.max_tokens.unwrap_or(32000)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn temperature(&self) -> f32 {
|
fn temperature(&self) -> f32 {
|
||||||
|
|||||||
Reference in New Issue
Block a user