Add GLM-4 to context length defaults (32k)
GLM-4 models support 32k context but were falling back to the conservative 4096 default, causing context overflow on startup.
This commit is contained in:
@@ -663,6 +663,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
match &embedded_config.model_type.to_lowercase()[..] {
|
match &embedded_config.model_type.to_lowercase()[..] {
|
||||||
"codellama" => 16384, // CodeLlama supports 16k context
|
"codellama" => 16384, // CodeLlama supports 16k context
|
||||||
"llama" => 4096, // Base Llama models
|
"llama" => 4096, // Base Llama models
|
||||||
|
"glm4" => 32768, // GLM-4 supports 32k context
|
||||||
"mistral" => 8192, // Mistral models
|
"mistral" => 8192, // Mistral models
|
||||||
"qwen" => 32768, // Qwen2.5 supports 32k context
|
"qwen" => 32768, // Qwen2.5 supports 32k context
|
||||||
_ => 4096, // Conservative default
|
_ => 4096, // Conservative default
|
||||||
|
|||||||
Reference in New Issue
Block a user