fix: use consistent max_tokens defaults across providers

- Fix aliasing issue where resolve_max_tokens() used fallback_default_max_tokens (8192) instead of provider-specific defaults - Update fallback_default_max_tokens from 8192 to 32000 - Set provider-specific max_tokens defaults: - Anthropic: 32000 - OpenAI: 32000 (was 16000) - Databricks: 32000 (was 50000, now matches Anthropic as passthru) - Embedded: 2048 - Context window lengths unchanged: - OpenAI: 400,000 - Anthropic: 200,000 - Databricks (Claude): 200,000 This fixes the 'LLM response was cut off due to max_tokens limit' error in agent mode that occurred because 8192 was being used instead of 32000.
2026-01-16 07:05:57 +05:30
parent 65e0217c68
commit 01cb4f6691
5 changed files with 23 additions and 21 deletions
--- a/crates/g3-providers/src/databricks.rs
+++ b/crates/g3-providers/src/databricks.rs
@@ -227,7 +227,7 @@ impl DatabricksProvider {
            host: host.trim_end_matches('/').to_string(),
            auth: DatabricksAuth::token(token),
            model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
            temperature: temperature.unwrap_or(0.1),
        })
    }
@@ -254,7 +254,7 @@ impl DatabricksProvider {
            host: host.trim_end_matches('/').to_string(),
            auth: DatabricksAuth::token(token),
            model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
            temperature: temperature.unwrap_or(0.1),
        })
    }
@@ -281,7 +281,7 @@ impl DatabricksProvider {
            host: host.trim_end_matches('/').to_string(),
            auth: DatabricksAuth::oauth(host.clone()),
            model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
            temperature: temperature.unwrap_or(0.1),
        })
    }
@@ -307,7 +307,7 @@ impl DatabricksProvider {
            host: host.trim_end_matches('/').to_string(),
            auth: DatabricksAuth::oauth(host.clone()),
            model,
-            max_tokens: max_tokens.unwrap_or(50000),
+            max_tokens: max_tokens.unwrap_or(32000),
            temperature: temperature.unwrap_or(0.1),
        })
    }
--- a/crates/g3-providers/src/openai.rs
+++ b/crates/g3-providers/src/openai.rs
@@ -393,7 +393,7 @@ impl LLMProvider for OpenAIProvider {
    }

    fn max_tokens(&self) -> u32 {
-        self.max_tokens.unwrap_or(16000)
+        self.max_tokens.unwrap_or(32000)
    }

    fn temperature(&self) -> f32 {