diff --git a/config.coach-player.example.toml b/config.coach-player.example.toml index 2101564..999b674 100644 --- a/config.coach-player.example.toml +++ b/config.coach-player.example.toml @@ -19,6 +19,6 @@ max_tokens = 4096 temperature = 0.3 # Slightly higher temperature for more creative implementations [agent] -max_context_length = 8192 +fallback_default_max_tokens = 8192 enable_streaming = true timeout_seconds = 60 \ No newline at end of file diff --git a/config.example.toml b/config.example.toml index b58ae3f..56954f9 100644 --- a/config.example.toml +++ b/config.example.toml @@ -15,7 +15,7 @@ temperature = 0.1 use_oauth = true [agent] -max_context_length = 8192 +fallback_default_max_tokens = 8192 enable_streaming = true timeout_seconds = 60 diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs index d9f0602..ba578e9 100644 --- a/crates/g3-config/src/lib.rs +++ b/crates/g3-config/src/lib.rs @@ -62,7 +62,7 @@ pub struct EmbeddedConfig { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AgentConfig { - pub max_context_length: usize, + pub fallback_default_max_tokens: usize, pub enable_streaming: bool, pub timeout_seconds: u64, pub auto_compact: bool, @@ -133,7 +133,7 @@ impl Default for Config { player: None, // Will use default_provider if not specified }, agent: AgentConfig { - max_context_length: 8192, + fallback_default_max_tokens: 8192, enable_streaming: true, timeout_seconds: 60, auto_compact: true, @@ -249,7 +249,7 @@ impl Config { player: None, // Will use default_provider if not specified }, agent: AgentConfig { - max_context_length: 8192, + fallback_default_max_tokens: 8192, enable_streaming: true, timeout_seconds: 60, auto_compact: true, diff --git a/crates/g3-config/src/tests.rs b/crates/g3-config/src/tests.rs index a1e1e9f..6899a8b 100644 --- a/crates/g3-config/src/tests.rs +++ b/crates/g3-config/src/tests.rs @@ -31,7 +31,7 @@ model_path = "test.gguf" model_type = "llama" [agent] -max_context_length = 8192 +fallback_default_max_tokens = 8192 enable_streaming = true timeout_seconds = 60 "#; @@ -72,7 +72,7 @@ token = "test-token" model = "test-model" [agent] -max_context_length = 8192 +fallback_default_max_tokens = 8192 enable_streaming = true timeout_seconds = 60 "#; @@ -113,7 +113,7 @@ token = "test-token" model = "test-model" [agent] -max_context_length = 8192 +fallback_default_max_tokens = 8192 enable_streaming = true timeout_seconds = 60 "#; diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 1c754fa..b338852 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -865,7 +865,7 @@ impl Agent { debug!("Default provider set successfully"); // Determine context window size based on active provider - let context_length = Self::determine_context_length(&config, &providers)?; + let context_length = Self::get_configured_context_length(&config, &providers)?; let mut context_window = ContextWindow::new(context_length); // If README content is provided, add it as the first system message @@ -920,7 +920,7 @@ impl Agent { }) } - fn determine_context_length(config: &Config, providers: &ProviderRegistry) -> Result { + fn get_configured_context_length(config: &Config, providers: &ProviderRegistry) -> Result { // Get the configured max_tokens for the current provider fn get_provider_max_tokens(config: &Config, provider_name: &str) -> Option { match provider_name { @@ -959,7 +959,7 @@ impl Agent { } }) } else { - config.agent.max_context_length as u32 + config.agent.fallback_default_max_tokens as u32 } } "openai" => { @@ -983,7 +983,7 @@ impl Agent { } }) } - _ => config.agent.max_context_length as u32, + _ => config.agent.fallback_default_max_tokens as u32, }; debug!( @@ -2415,8 +2415,8 @@ Template: // Check if we need to summarize before starting if self.context_window.should_summarize() { - // First try thinning if we haven't reached 90% yet - if self.context_window.percentage_used() < 90.0 && self.context_window.should_thin() { + // First try thinning if we are at capacity, don't call the LLM for a summary (might fail) + if self.context_window.percentage_used() > 90.0 && self.context_window.should_thin() { self.ui_writer.print_context_status(&format!( "\n🥒 Context window at {}%. Trying thinning first...", self.context_window.percentage_used() as u32