Add GLM-4 to context length defaults (32k)
GLM-4 models support 32k context but were falling back to the conservative 4096 default, causing context overflow on startup.
This commit is contained in:
@@ -663,6 +663,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
match &embedded_config.model_type.to_lowercase()[..] {
|
||||
"codellama" => 16384, // CodeLlama supports 16k context
|
||||
"llama" => 4096, // Base Llama models
|
||||
"glm4" => 32768, // GLM-4 supports 32k context
|
||||
"mistral" => 8192, // Mistral models
|
||||
"qwen" => 32768, // Qwen2.5 supports 32k context
|
||||
_ => 4096, // Conservative default
|
||||
|
||||
Reference in New Issue
Block a user