From 58fe74334df879b5bcc1430050d8ece6cdff9db3 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Wed, 28 Jan 2026 11:16:14 +1100 Subject: [PATCH] Auto-detect context window size from GGUF for embedded providers - Add context_window_size() method to LLMProvider trait - Implement for EmbeddedProvider to return the auto-detected context length - Update Agent to query provider directly instead of using hardcoded defaults - Removes need for model-specific context length mappings --- crates/g3-core/src/lib.rs | 23 +++++++++-------------- crates/g3-providers/src/embedded.rs | 4 ++++ crates/g3-providers/src/lib.rs | 6 ++++++ 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 60b2ed3..b4009cb 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -651,24 +651,19 @@ impl Agent { let model_name = provider.model(); // Parse provider name to get type and config name - let (provider_type, config_name) = provider_config::parse_provider_ref(provider_name); + let (provider_type, _config_name) = provider_config::parse_provider_ref(provider_name); // Use provider-specific context length if available let context_length = match provider_type { "embedded" | "embedded." => { - // For embedded models, use the configured context_length or model-specific defaults - if let Some(embedded_config) = config.providers.embedded.get(config_name) { - embedded_config.context_length.unwrap_or_else(|| { - // Model-specific defaults for embedded models - match &embedded_config.model_type.to_lowercase()[..] { - "codellama" => 16384, // CodeLlama supports 16k context - "llama" => 4096, // Base Llama models - "glm4" => 32768, // GLM-4 supports 32k context - "mistral" => 8192, // Mistral models - "qwen" => 32768, // Qwen2.5 supports 32k context - _ => 4096, // Conservative default - } - }) + // For embedded models, query the provider directly for its context window + // The provider auto-detects this from the GGUF file + if let Some(ctx_size) = provider.context_window_size() { + debug!( + "Using context window size {} from embedded provider", + ctx_size + ); + ctx_size } else { config.agent.fallback_default_max_tokens as u32 } diff --git a/crates/g3-providers/src/embedded.rs b/crates/g3-providers/src/embedded.rs index ac99e03..ce62d42 100644 --- a/crates/g3-providers/src/embedded.rs +++ b/crates/g3-providers/src/embedded.rs @@ -700,6 +700,10 @@ impl LLMProvider for EmbeddedProvider { fn temperature(&self) -> f32 { self.temperature } + + fn context_window_size(&self) -> Option { + Some(self.context_length) + } } #[cfg(test)] diff --git a/crates/g3-providers/src/lib.rs b/crates/g3-providers/src/lib.rs index ab5c38a..18cce1f 100644 --- a/crates/g3-providers/src/lib.rs +++ b/crates/g3-providers/src/lib.rs @@ -39,6 +39,12 @@ pub trait LLMProvider: Send + Sync { /// Get the configured temperature for this provider fn temperature(&self) -> f32; + + /// Get the context window size for this provider + /// Returns None if the provider doesn't have a fixed context window + fn context_window_size(&self) -> Option { + None + } } #[derive(Debug, Clone, Serialize, Deserialize)]