fix bad max_tokens and context_window logic

for non-databricks code
2025-11-19 13:51:16 +11:00
parent 3f21bdc7b2
commit 1069664e16
4 changed files with 119 additions and 44 deletions
--- a/config.example.toml
+++ b/config.example.toml
@@ -14,14 +14,16 @@ max_tokens = 4096  # Per-request output limit (how many tokens the model can gen
                   # Note: This is different from max_context_length (total conversation history size)
 temperature = 0.1
 use_oauth = true
-# cache_config = "ephemeral"  # Optional: Enable prompt caching for Claude models on Databricks
-                              # Options: "ephemeral", "5minute", "1hour"
-                              # Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
-                                # The cache control will be automatically applied to:
-                                # - The system prompt at the start of each session
-                                # - Assistant responses after every 10 tool calls
-                                # - 5minute costs $3/mtok, more details below
-                                # https://docs.claude.com/en/docs/build-with-claude/prompt-caching#pricing
+
+[providers.anthropic]
+api_key = "your-anthropic-api-key"
+model = "claude-sonnet-4-5"
+max_tokens = 4096
+temperature = 0.3  # Slightly higher temperature for more creative implementations
+# cache_config = "ephemeral"  # Optional: Enable prompt caching
+# Options: "ephemeral", "5minute", "1hour"
+# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
+# enable_1m_context = true    # optional, more expensive


 # Multiple OpenAI-compatible providers can be configured with custom names