fix: use consistent max_tokens defaults across providers
- Fix aliasing issue where resolve_max_tokens() used fallback_default_max_tokens (8192) instead of provider-specific defaults - Update fallback_default_max_tokens from 8192 to 32000 - Set provider-specific max_tokens defaults: - Anthropic: 32000 - OpenAI: 32000 (was 16000) - Databricks: 32000 (was 50000, now matches Anthropic as passthru) - Embedded: 2048 - Context window lengths unchanged: - OpenAI: 400,000 - Anthropic: 200,000 - Databricks (Claude): 200,000 This fixes the 'LLM response was cut off due to max_tokens limit' error in agent mode that occurred because 8192 was being used instead of 32000.
This commit is contained in:
@@ -112,7 +112,7 @@ pub struct AgentConfig {
|
||||
}
|
||||
|
||||
fn default_fallback_max_tokens() -> usize {
|
||||
8192
|
||||
32000
|
||||
}
|
||||
fn default_true() -> bool {
|
||||
true
|
||||
@@ -185,7 +185,7 @@ impl Default for AgentConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_context_length: None,
|
||||
fallback_default_max_tokens: 8192,
|
||||
fallback_default_max_tokens: 32000,
|
||||
enable_streaming: true,
|
||||
timeout_seconds: 120,
|
||||
auto_compact: true,
|
||||
@@ -234,7 +234,7 @@ impl Default for Config {
|
||||
},
|
||||
agent: AgentConfig {
|
||||
max_context_length: None,
|
||||
fallback_default_max_tokens: 8192,
|
||||
fallback_default_max_tokens: 32000,
|
||||
enable_streaming: true,
|
||||
timeout_seconds: 60,
|
||||
auto_compact: true,
|
||||
|
||||
@@ -44,7 +44,7 @@ model_path = "test.gguf"
|
||||
model_type = "llama"
|
||||
|
||||
[agent]
|
||||
fallback_default_max_tokens = 8192
|
||||
fallback_default_max_tokens = 32000
|
||||
enable_streaming = true
|
||||
timeout_seconds = 60
|
||||
auto_compact = true
|
||||
@@ -88,7 +88,7 @@ token = "test-token"
|
||||
model = "test-model"
|
||||
|
||||
[agent]
|
||||
fallback_default_max_tokens = 8192
|
||||
fallback_default_max_tokens = 32000
|
||||
enable_streaming = true
|
||||
timeout_seconds = 60
|
||||
auto_compact = true
|
||||
@@ -132,7 +132,7 @@ token = "test-token"
|
||||
model = "test-model"
|
||||
|
||||
[agent]
|
||||
fallback_default_max_tokens = 8192
|
||||
fallback_default_max_tokens = 32000
|
||||
enable_streaming = true
|
||||
timeout_seconds = 60
|
||||
auto_compact = true
|
||||
@@ -169,7 +169,7 @@ api_key = "test-key"
|
||||
model = "claude-3"
|
||||
|
||||
[agent]
|
||||
fallback_default_max_tokens = 8192
|
||||
fallback_default_max_tokens = 32000
|
||||
enable_streaming = true
|
||||
timeout_seconds = 60
|
||||
auto_compact = true
|
||||
@@ -210,7 +210,7 @@ model = "claude-opus"
|
||||
thinking_budget_tokens = 16000
|
||||
|
||||
[agent]
|
||||
fallback_default_max_tokens = 8192
|
||||
fallback_default_max_tokens = 32000
|
||||
enable_streaming = true
|
||||
timeout_seconds = 60
|
||||
auto_compact = true
|
||||
@@ -248,7 +248,7 @@ token = "test-token"
|
||||
model = "test-model"
|
||||
|
||||
[agent]
|
||||
fallback_default_max_tokens = 8192
|
||||
fallback_default_max_tokens = 32000
|
||||
enable_streaming = true
|
||||
timeout_seconds = 60
|
||||
auto_compact = true
|
||||
|
||||
Reference in New Issue
Block a user