fix refresh and max_tokens bug

2025-11-07 09:50:43 +11:00
parent cb43fcdecf
commit 1f12ff6ca0
5 changed files with 200 additions and 24 deletions
--- a/config.example.toml
+++ b/config.example.toml
@@ -10,12 +10,18 @@ default_provider = "databricks"
 host = "https://your-workspace.cloud.databricks.com"
 # token = "your-databricks-token"  # Optional - will use OAuth if not provided
 model = "databricks-claude-sonnet-4"
-max_tokens = 4096
+max_tokens = 4096  # Per-request output limit (how many tokens the model can generate per response)
+                   # Note: This is different from max_context_length (total conversation history size)
 temperature = 0.1
 use_oauth = true

 [agent]
 fallback_default_max_tokens = 8192
+# max_context_length: Override the context window size for all providers
+# This is the total size of conversation history, not per-request output limit
+# Useful for models with large context windows (e.g., Claude with 200k tokens)
+# If not set, uses provider-specific defaults based on model capabilities
+# max_context_length = 200000
 enable_streaming = true
 timeout_seconds = 60
 # Retry configuration for recoverable errors (timeouts, rate limits, etc.)