The temperature param was not passed to the llm. Now support anthropic models in 'thinking' mode.
68 lines
2.9 KiB
TOML
68 lines
2.9 KiB
TOML
[providers]
|
|
default_provider = "databricks"
|
|
# Optional: Specify different providers for coach and player in autonomous mode
|
|
# If not specified, will use default_provider for both
|
|
# coach = "databricks" # Provider for coach (code reviewer)
|
|
# player = "anthropic" # Provider for player (code implementer)
|
|
# Note: Make sure the specified providers are configured below
|
|
|
|
[providers.databricks]
|
|
host = "https://your-workspace.cloud.databricks.com"
|
|
# token = "your-databricks-token" # Optional - will use OAuth if not provided
|
|
model = "databricks-claude-sonnet-4"
|
|
max_tokens = 4096 # Per-request output limit (how many tokens the model can generate per response)
|
|
# Note: This is different from max_context_length (total conversation history size)
|
|
temperature = 0.1
|
|
use_oauth = true
|
|
|
|
[providers.anthropic]
|
|
api_key = "your-anthropic-api-key"
|
|
model = "claude-sonnet-4-5"
|
|
max_tokens = 4096
|
|
temperature = 0.3 # Slightly higher temperature for more creative implementations
|
|
# cache_config = "ephemeral" # Optional: Enable prompt caching
|
|
# Options: "ephemeral", "5minute", "1hour"
|
|
# Reduces costs and latency for repeated prompts. Uses Anthropic's prompt caching with different TTLs.
|
|
# enable_1m_context = true # optional, more expensive
|
|
# thinking_budget_tokens = 10000 # Optional: Enable extended thinking mode with token budget
|
|
# Allows the model to "think" before responding. Useful for complex reasoning tasks.
|
|
|
|
|
|
# Multiple OpenAI-compatible providers can be configured with custom names
|
|
# Each provider gets its own section under [providers.openai_compatible.<name>]
|
|
# [providers.openai_compatible.openrouter]
|
|
# api_key = "your-openrouter-api-key"
|
|
# model = "anthropic/claude-3.5-sonnet"
|
|
# base_url = "https://openrouter.ai/api/v1"
|
|
# max_tokens = 4096
|
|
# temperature = 0.1
|
|
|
|
# [providers.openai_compatible.groq]
|
|
# api_key = "your-groq-api-key"
|
|
# model = "llama-3.3-70b-versatile"
|
|
# base_url = "https://api.groq.com/openai/v1"
|
|
# max_tokens = 4096
|
|
# temperature = 0.1
|
|
|
|
# To use one of these providers, set default_provider to the name you chose:
|
|
# default_provider = "openrouter"
|
|
|
|
[agent]
|
|
fallback_default_max_tokens = 8192
|
|
# max_context_length: Override the context window size for all providers
|
|
# This is the total size of conversation history, not per-request output limit
|
|
# Useful for models with large context windows (e.g., Claude with 200k tokens)
|
|
# If not set, uses provider-specific defaults based on model capabilities
|
|
# max_context_length = 200000
|
|
enable_streaming = true
|
|
timeout_seconds = 60
|
|
# Retry configuration for recoverable errors (timeouts, rate limits, etc.)
|
|
max_retry_attempts = 3 # Default mode retry attempts
|
|
autonomous_max_retry_attempts = 6 # Autonomous mode retry attempts (higher for long-running tasks)
|
|
allow_multiple_tool_calls = true # Enable multiple tool calls
|
|
|
|
[computer_control]
|
|
enabled = false # Set to true to enable computer control (requires OS permissions)
|
|
require_confirmation = true
|
|
max_actions_per_second = 5
|