fix: use consistent max_tokens defaults across providers

- Fix aliasing issue where resolve_max_tokens() used fallback_default_max_tokens (8192) instead of provider-specific defaults - Update fallback_default_max_tokens from 8192 to 32000 - Set provider-specific max_tokens defaults: - Anthropic: 32000 - OpenAI: 32000 (was 16000) - Databricks: 32000 (was 50000, now matches Anthropic as passthru) - Embedded: 2048 - Context window lengths unchanged: - OpenAI: 400,000 - Anthropic: 200,000 - Databricks (Claude): 200,000 This fixes the 'LLM response was cut off due to max_tokens limit' error in agent mode that occurred because 8192 was being used instead of 32000.
2026-01-16 07:05:57 +05:30
parent 65e0217c68
commit 01cb4f6691
5 changed files with 23 additions and 21 deletions
--- a/crates/g3-config/src/lib.rs
+++ b/crates/g3-config/src/lib.rs
@@ -112,7 +112,7 @@ pub struct AgentConfig {
 }

 fn default_fallback_max_tokens() -> usize {
-    8192
+    32000
 }
 fn default_true() -> bool {
    true
@@ -185,7 +185,7 @@ impl Default for AgentConfig {
    fn default() -> Self {
        Self {
            max_context_length: None,
-            fallback_default_max_tokens: 8192,
+            fallback_default_max_tokens: 32000,
            enable_streaming: true,
            timeout_seconds: 120,
            auto_compact: true,
@@ -234,7 +234,7 @@ impl Default for Config {
            },
            agent: AgentConfig {
                max_context_length: None,
-                fallback_default_max_tokens: 8192,
+                fallback_default_max_tokens: 32000,
                enable_streaming: true,
                timeout_seconds: 60,
                auto_compact: true,