From 76bfb77f84b1a381e6fc733fa2c6af179f2b590c Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Sat, 3 Jan 2026 15:47:04 +1100 Subject: [PATCH] further fowler fixes and session fixes --- crates/g3-cli/src/lib.rs | 59 +++- crates/g3-core/src/lib.rs | 366 +++++---------------- crates/g3-core/src/provider_config.rs | 225 +++++++++++++ crates/g3-core/src/session_continuation.rs | 21 +- 4 files changed, 382 insertions(+), 289 deletions(-) create mode 100644 crates/g3-core/src/provider_config.rs diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 9212c51..98abf4a 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -689,7 +689,9 @@ async fn run_agent_mode( std::env::set_current_dir(&workspace_dir)?; // Check for incomplete agent sessions before starting a new one - if let Ok(Some(incomplete_session)) = find_incomplete_agent_session(agent_name) { + let resuming_session = find_incomplete_agent_session(agent_name).ok().flatten(); + + if let Some(ref incomplete_session) = resuming_session { output.print(&format!( "\nšŸ”„ Found incomplete session for agent '{}'", agent_name @@ -710,9 +712,6 @@ async fn run_agent_mode( output.print(""); output.print(" Resuming incomplete session..."); output.print(""); - - // TODO: Actually resume the session - for now we just notify and continue - // In a future iteration, we could restore the context and continue } // Load agent prompt from agents/.md @@ -775,9 +774,55 @@ async fn run_agent_mode( // Set agent mode for session tracking agent.set_agent_mode(agent_name); - // The agent prompt should contain instructions to start working immediately - // Send an initial message to trigger the agent - let initial_task = "Begin your analysis and work on the current project. Follow your mission and workflow as specified in your instructions."; + // If resuming a session, restore context and TODO + let initial_task = if let Some(ref incomplete_session) = resuming_session { + // Restore the session context + match agent.restore_from_continuation(incomplete_session) { + Ok(full_restore) => { + if full_restore { + output.print(" āœ… Full context restored from previous session"); + } else { + output.print(" āš ļø Restored from summary (context was > 80%)"); + } + } + Err(e) => { + output.print(&format!(" āš ļø Could not restore context: {}", e)); + } + } + + // Copy TODO from old session to new session directory + let todo_content = if let Some(ref content) = incomplete_session.todo_snapshot { + Some(content.clone()) + } else { + // Fallback: read from the actual todo.g3.md file in the old session directory + let old_session_dir = std::path::Path::new(".g3/sessions").join(&incomplete_session.session_id); + let old_todo_path = old_session_dir.join("todo.g3.md"); + if old_todo_path.exists() { + std::fs::read_to_string(&old_todo_path).ok() + } else { + None + } + }; + + if let Some(ref content) = todo_content { + if let Some(session_id) = agent.get_session_id() { + let new_todo_path = g3_core::paths::get_session_todo_path(session_id); + let _ = g3_core::paths::ensure_session_dir(session_id); + if let Err(e) = std::fs::write(&new_todo_path, content) { + output.print(&format!(" āš ļø Could not restore TODO: {}", e)); + } else { + output.print(" āœ… TODO list restored"); + } + } + } + output.print(""); + + // Resume message instead of fresh start + "Continue working on the incomplete tasks. Use todo_read to see the current TODO list and resume from where you left off." + } else { + // Fresh start - the agent prompt should contain instructions to start working immediately + "Begin your analysis and work on the current project. Follow your mission and workflow as specified in your instructions." + }; let _result = agent.execute_task(initial_task, None, true).await?; diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index fd4a6e3..b25367b 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -5,6 +5,7 @@ pub mod error_handling; pub mod feedback_extraction; pub mod paths; pub mod project; +pub mod provider_config; pub mod retry; pub mod session_continuation; pub mod streaming_parser; @@ -52,7 +53,7 @@ use tracing::{debug, error, warn}; // Re-export path utilities for backward compatibility pub use paths::{ G3_WORKSPACE_PATH_ENV, ensure_session_dir, get_context_summary_file, get_g3_dir, get_logs_dir, - get_session_file, get_session_logs_dir, get_thinned_dir, logs_dir, + get_session_file, get_session_logs_dir, get_session_todo_path, get_thinned_dir, logs_dir, }; use paths::get_todo_path; @@ -487,191 +488,56 @@ impl Agent { .count() } - /// Get the configured max_tokens for a provider from top-level config - fn provider_max_tokens(config: &Config, provider_name: &str) -> Option { - // Parse provider reference (format: "provider_type.config_name") - let parts: Vec<&str> = provider_name.split('.').collect(); - let (provider_type, config_name) = if parts.len() == 2 { - (parts[0], parts[1]) - } else { - // Fallback for simple provider names - assume "default" config - (provider_name, "default") - }; - - match provider_type { - "anthropic" => config.providers.anthropic.get(config_name)?.max_tokens, - "openai" => config.providers.openai.get(config_name)?.max_tokens, - "databricks" => config.providers.databricks.get(config_name)?.max_tokens, - "embedded" => config.providers.embedded.get(config_name)?.max_tokens, - _ => None, - } - } - - /// Get the configured temperature for a provider from top-level config - fn provider_temperature(config: &Config, provider_name: &str) -> Option { - // Parse provider reference (format: "provider_type.config_name") - let parts: Vec<&str> = provider_name.split('.').collect(); - let (provider_type, config_name) = if parts.len() == 2 { - (parts[0], parts[1]) - } else { - // Fallback for simple provider names - assume "default" config - (provider_name, "default") - }; - - match provider_type { - "anthropic" => config.providers.anthropic.get(config_name)?.temperature, - "openai" => config.providers.openai.get(config_name)?.temperature, - "databricks" => config.providers.databricks.get(config_name)?.temperature, - "embedded" => config.providers.embedded.get(config_name)?.temperature, - _ => None, - } - } - - /// Resolve the max_tokens to use for a given provider, applying fallbacks + /// Resolve the max_tokens to use for a given provider, applying fallbacks. fn resolve_max_tokens(&self, provider_name: &str) -> u32 { - let base = match provider_name { - "databricks" => Self::provider_max_tokens(&self.config, "databricks") - .or(Some(self.config.agent.fallback_default_max_tokens as u32)) - .unwrap_or(32000), - other => Self::provider_max_tokens(&self.config, other) - .or(Some(self.config.agent.fallback_default_max_tokens as u32)) - .unwrap_or(16000), - }; - - // For Anthropic with thinking enabled, ensure max_tokens is sufficient - // Anthropic requires: max_tokens > thinking.budget_tokens - if provider_name == "anthropic" { - if let Some(budget) = self.get_thinking_budget_tokens(provider_name) { - let minimum_for_thinking = budget + 1024; - return base.max(minimum_for_thinking); - } - } - - base + provider_config::resolve_max_tokens(&self.config, provider_name) } - /// Get the thinking budget tokens for Anthropic provider, if configured + /// Get the thinking budget tokens for Anthropic provider, if configured. fn get_thinking_budget_tokens(&self, provider_name: &str) -> Option { - // Parse provider reference (format: "provider_type.config_name") - let parts: Vec<&str> = provider_name.split('.').collect(); - let (provider_type, config_name) = if parts.len() == 2 { - (parts[0], parts[1]) - } else { - // Fallback for simple provider names - assume "default" config - (provider_name, "default") - }; - - // Only Anthropic has thinking_budget_tokens - if provider_type != "anthropic" { - return None; - } - - self.config.providers.anthropic - .get(config_name) - .and_then(|c| c.thinking_budget_tokens) + provider_config::get_thinking_budget_tokens(&self.config, provider_name) } - /// Pre-flight check to validate and adjust max_tokens for the thinking.budget_tokens constraint. - /// Returns the adjusted max_tokens that satisfies: max_tokens > thinking.budget_tokens - /// Also returns whether we need to apply fallback actions (thinnify/skinnify). - /// - /// Returns: (adjusted_max_tokens, needs_context_reduction) - fn preflight_validate_max_tokens( - &self, - provider_name: &str, - proposed_max_tokens: u32, - ) -> (u32, bool) { - // Parse provider type from provider_name (format: "provider_type.config_name") - let provider_type = provider_name.split('.').next().unwrap_or(provider_name); - - // Only applies to Anthropic provider - if provider_type != "anthropic" { - return (proposed_max_tokens, false); - } - - let budget_tokens = match self.get_thinking_budget_tokens(provider_name) { - Some(budget) => budget, - None => return (proposed_max_tokens, false), // No thinking enabled - }; - - // Anthropic requires: max_tokens > budget_tokens - // We add a minimum output buffer of 1024 tokens for actual response content - let minimum_required = budget_tokens + 1024; - - if proposed_max_tokens >= minimum_required { - // We have enough headroom - (proposed_max_tokens, false) - } else { - // max_tokens is too low - need to either adjust or reduce context - warn!( - "max_tokens ({}) is below required minimum ({}) for thinking.budget_tokens ({}). Context reduction needed.", - proposed_max_tokens, minimum_required, budget_tokens - ); - // Return the minimum required, but flag that we need context reduction - (minimum_required, true) - } + /// Pre-flight check to validate max_tokens for thinking.budget_tokens constraint. + fn preflight_validate_max_tokens(&self, provider_name: &str, proposed_max_tokens: u32) -> (u32, bool) { + provider_config::preflight_validate_max_tokens(&self.config, provider_name, proposed_max_tokens) } - /// Calculate max_tokens for a summary request, ensuring it satisfies the thinking constraint. - /// Applies fallback sequence: thinnify -> skinnify -> hard-coded minimum - /// Returns (max_tokens, whether_fallback_was_used) - /// - /// IMPORTANT: Always returns at least SUMMARY_MIN_TOKENS to avoid API errors - /// when context is nearly full (90%+). - fn calculate_summary_max_tokens( - &mut self, - provider_name: &str, - ) -> (u32, bool) { - let model_limit = self.context_window.total_tokens; - let current_usage = self.context_window.used_tokens; - - // Get the configured max_tokens for this provider - let configured_max_tokens = self.resolve_max_tokens(provider_name); - - // Calculate available tokens with buffer - let buffer = (model_limit / 40).clamp(1000, 10000); // 2.5% buffer - let available = model_limit - .saturating_sub(current_usage) - .saturating_sub(buffer); - // Ensure we have at least a minimum floor for summary requests - // This prevents max_tokens=0 errors when context is 90%+ full - let available = available.max(Self::SUMMARY_MIN_TOKENS); - // Use the smaller of available tokens (with floor) or configured max_tokens, - // but ensure we don't go below thinking budget floor for Anthropic - let proposed_max_tokens = available.min(configured_max_tokens); - let proposed_max_tokens = if provider_name == "anthropic" { - if let Some(budget) = self.get_thinking_budget_tokens(provider_name) { - proposed_max_tokens.max(budget + 1024) - } else { - proposed_max_tokens - } - } else { - proposed_max_tokens - }; - - // Validate against thinking budget constraint - let (adjusted, needs_reduction) = self.preflight_validate_max_tokens(provider_name, proposed_max_tokens); - - if !needs_reduction { - return (adjusted, false); - } - - // We need more headroom - the context is too full - // Return the adjusted value but flag that fallbacks are needed - (adjusted, true) + /// Calculate max_tokens for a summary request. + fn calculate_summary_max_tokens(&self, provider_name: &str) -> (u32, bool) { + provider_config::calculate_summary_max_tokens( + &self.config, + provider_name, + self.context_window.total_tokens, + self.context_window.used_tokens, + ) } /// Apply the fallback sequence to free up context space for thinking budget. - /// Sequence: thinnify (first third) → skinnify (all) → hard-coded minimum - /// Returns the validated max_tokens that satisfies thinking.budget_tokens constraint. - fn apply_max_tokens_fallback_sequence( + fn apply_max_tokens_fallback_sequence(&mut self, provider_name: &str, initial_max_tokens: u32, hard_coded_minimum: u32) -> u32 { + self.apply_fallback_sequence_impl(provider_name, Some(initial_max_tokens), hard_coded_minimum) + } + + /// Apply the fallback sequence for summary requests to free up context space. + fn apply_summary_fallback_sequence(&mut self, provider_name: &str) -> u32 { + self.apply_fallback_sequence_impl(provider_name, None, 5000) + } + + /// Unified implementation of the fallback sequence for freeing context space. + /// If `initial_max_tokens` is Some, uses preflight_validate_max_tokens for validation. + /// If `initial_max_tokens` is None, uses calculate_summary_max_tokens for validation. + fn apply_fallback_sequence_impl( &mut self, provider_name: &str, - initial_max_tokens: u32, + initial_max_tokens: Option, hard_coded_minimum: u32, ) -> u32 { - let (mut max_tokens, needs_reduction) = self.preflight_validate_max_tokens(provider_name, initial_max_tokens); - + // Initial validation + let (mut max_tokens, needs_reduction) = match initial_max_tokens { + Some(initial) => self.preflight_validate_max_tokens(provider_name, initial), + None => self.calculate_summary_max_tokens(provider_name), + }; + if !needs_reduction { return max_tokens; } @@ -682,115 +548,50 @@ impl Agent { // Step 1: Try thinnify (first third of context) self.ui_writer.print_context_status("šŸ„’ Step 1: Trying thinnify...\n"); - let (thin_msg, thin_saved) = self.context_window.thin_context(self.session_id.as_deref()); - self.thinning_events.push(thin_saved); + let thin_msg = self.do_thin_context(); self.ui_writer.print_context_thinning(&thin_msg); - // Recalculate max_tokens after thinnify - let recalc_max = self.resolve_max_tokens(provider_name); - let (new_max, still_needs_reduction) = self.preflight_validate_max_tokens(provider_name, recalc_max); + // Recalculate after thinnify + let (new_max, still_needs_reduction) = self.recalculate_max_tokens(provider_name, initial_max_tokens.is_some()); max_tokens = new_max; - if !still_needs_reduction { - self.ui_writer.print_context_status( - "āœ… Thinnify resolved capacity issue. Continuing...\n", - ); + self.ui_writer.print_context_status("āœ… Thinnify resolved capacity issue. Continuing...\n"); return max_tokens; } // Step 2: Try skinnify (entire context) self.ui_writer.print_context_status("🦓 Step 2: Trying skinnify...\n"); - let (skinny_msg, skinny_saved) = self.context_window.thin_context_all(self.session_id.as_deref()); - self.thinning_events.push(skinny_saved); + let skinny_msg = self.do_thin_context_all(); self.ui_writer.print_context_thinning(&skinny_msg); - // Recalculate max_tokens after skinnify - let recalc_max = self.resolve_max_tokens(provider_name); - let (final_max, final_needs_reduction) = self.preflight_validate_max_tokens(provider_name, recalc_max); - max_tokens = final_max; - + // Recalculate after skinnify + let (final_max, final_needs_reduction) = self.recalculate_max_tokens(provider_name, initial_max_tokens.is_some()); if !final_needs_reduction { - self.ui_writer.print_context_status( - "āœ… Skinnify resolved capacity issue. Continuing...\n", - ); - return max_tokens; + self.ui_writer.print_context_status("āœ… Skinnify resolved capacity issue. Continuing...\n"); + return final_max; } - // Step 3: Nothing worked, use hard-coded minimum as last resort + // Step 3: Nothing worked, use hard-coded minimum self.ui_writer.print_context_status(&format!( "āš ļø Step 3: Context reduction insufficient. Using hard-coded max_tokens={} as last resort...\n", hard_coded_minimum )); - hard_coded_minimum } - /// Apply the fallback sequence for summary requests to free up context space. - /// Uses calculate_summary_max_tokens for recalculation (based on available space). - /// Returns the validated max_tokens for summary requests. - fn apply_summary_fallback_sequence( - &mut self, - provider_name: &str, - ) -> u32 { - let (mut summary_max_tokens, needs_reduction) = self.calculate_summary_max_tokens(provider_name); - - if !needs_reduction { - return summary_max_tokens; + /// Helper to recalculate max_tokens after context reduction. + fn recalculate_max_tokens(&self, provider_name: &str, use_preflight: bool) -> (u32, bool) { + if use_preflight { + let recalc_max = self.resolve_max_tokens(provider_name); + self.preflight_validate_max_tokens(provider_name, recalc_max) + } else { + self.calculate_summary_max_tokens(provider_name) } - - self.ui_writer.print_context_status( - "āš ļø Context window too full for thinking budget. Applying fallback sequence...\n", - ); - - // Step 1: Try thinnify (first third of context) - self.ui_writer.print_context_status("šŸ„’ Step 1: Trying thinnify...\n"); - let (thin_msg, thin_saved) = self.context_window.thin_context(self.session_id.as_deref()); - self.thinning_events.push(thin_saved); - self.ui_writer.print_context_thinning(&thin_msg); - - // Recalculate max_tokens after thinnify - let (new_max, still_needs_reduction) = self.calculate_summary_max_tokens(provider_name); - summary_max_tokens = new_max; - - if !still_needs_reduction { - self.ui_writer.print_context_status( - "āœ… Thinnify resolved capacity issue. Continuing...\n", - ); - return summary_max_tokens; - } - - // Step 2: Try skinnify (entire context) - self.ui_writer.print_context_status("🦓 Step 2: Trying skinnify...\n"); - let (skinny_msg, skinny_saved) = self.context_window.thin_context_all(self.session_id.as_deref()); - self.thinning_events.push(skinny_saved); - self.ui_writer.print_context_thinning(&skinny_msg); - - // Recalculate max_tokens after skinnify - let (final_max, final_needs_reduction) = self.calculate_summary_max_tokens(provider_name); - summary_max_tokens = final_max; - - if !final_needs_reduction { - self.ui_writer.print_context_status( - "āœ… Skinnify resolved capacity issue. Continuing...\n", - ); - return summary_max_tokens; - } - - // Step 3: Nothing worked, use hard-coded minimum - self.ui_writer.print_context_status( - "āš ļø Step 3: Context reduction insufficient. Using hard-coded max_tokens=5000 as last resort...\n", - ); - 5000 } - /// Resolve the temperature to use for a given provider, applying fallbacks + /// Resolve the temperature to use for a given provider, applying fallbacks. fn resolve_temperature(&self, provider_name: &str) -> f32 { - match provider_name { - "databricks" => Self::provider_temperature(&self.config, "databricks") - .unwrap_or(0.1), - other => Self::provider_temperature(&self.config, other) - .unwrap_or(0.1), - } + provider_config::resolve_temperature(&self.config, provider_name) } /// Print provider diagnostics through the UiWriter for visibility @@ -845,13 +646,7 @@ impl Agent { let model_name = provider.model(); // Parse provider name to get type and config name - let parts: Vec<&str> = provider_name.split('.').collect(); - let (provider_type, config_name) = if parts.len() == 2 { - (parts[0], parts[1]) - } else { - // Fallback for simple provider names - (provider_name, "default") - }; + let (provider_type, config_name) = provider_config::parse_provider_ref(provider_name); // Use provider-specific context length if available let context_length = match provider_type { @@ -874,7 +669,7 @@ impl Agent { } "openai" => { // OpenAI models have varying context windows - if let Some(max_tokens) = Self::provider_max_tokens(config, provider_name) { + if let Some(max_tokens) = provider_config::get_max_tokens(config, provider_name) { warnings.push(format!( "Context length falling back to max_tokens ({}) for provider={}", max_tokens, provider_name @@ -886,7 +681,7 @@ impl Agent { } "anthropic" => { // Claude models have large context windows - if let Some(max_tokens) = Self::provider_max_tokens(config, provider_name) { + if let Some(max_tokens) = provider_config::get_max_tokens(config, provider_name) { warnings.push(format!( "Context length falling back to max_tokens ({}) for provider={}", max_tokens, provider_name @@ -898,7 +693,7 @@ impl Agent { } "databricks" => { // Databricks models have varying context windows depending on the model - if let Some(max_tokens) = Self::provider_max_tokens(config, provider_name) { + if let Some(max_tokens) = provider_config::get_max_tokens(config, provider_name) { warnings.push(format!( "Context length falling back to max_tokens ({}) for provider={}", max_tokens, provider_name @@ -1045,8 +840,7 @@ impl Agent { // But only if we haven't already added 4 cache_control annotations let provider = self.providers.get(None)?; let provider_name = provider.name(); - let provider_type = provider_name.split('.').next().unwrap_or(""); - let config_name = provider_name.split('.').nth(1).unwrap_or("default"); + let (provider_type, config_name) = provider_config::parse_provider_ref(provider_name); if let Some(cache_config) = match provider_type { "anthropic" => { self.config @@ -1663,15 +1457,25 @@ impl Agent { /// Manually trigger context thinning regardless of thresholds pub fn force_thin(&mut self) -> String { debug!("Manual context thinning triggered"); - let (message, chars_saved) = self.context_window.thin_context(self.session_id.as_deref()); - self.thinning_events.push(chars_saved); - message + self.do_thin_context() } /// Manually trigger context thinning for the ENTIRE context window /// Unlike force_thin which only processes the first third, this processes all messages pub fn force_thin_all(&mut self) -> String { debug!("Manual full context skinnifying triggered"); + self.do_thin_context_all() + } + + /// Internal helper: thin context and track the event + fn do_thin_context(&mut self) -> String { + let (message, chars_saved) = self.context_window.thin_context(self.session_id.as_deref()); + self.thinning_events.push(chars_saved); + message + } + + /// Internal helper: thin all context and track the event + fn do_thin_context_all(&mut self) -> String { let (message, chars_saved) = self.context_window.thin_context_all(self.session_id.as_deref()); self.thinning_events.push(chars_saved); message @@ -1921,8 +1725,14 @@ impl Agent { // Get the session log path (now in .g3/sessions//session.json) let session_log_path = get_session_file(&session_id); - // Get current TODO content - let todo_snapshot = std::fs::read_to_string(get_todo_path()).ok(); + // Get current TODO content - try session-specific path first, then workspace path + let session_todo_path = crate::paths::get_session_todo_path(&session_id); + let todo_snapshot = if session_todo_path.exists() { + std::fs::read_to_string(&session_todo_path).ok() + } else { + // Fall back to workspace TODO path for backwards compatibility + std::fs::read_to_string(get_todo_path()).ok() + }; // Get working directory let working_directory = std::env::current_dir() @@ -2140,8 +1950,7 @@ impl Agent { self.context_window.percentage_used() as u32 )); - let (thin_summary, chars_saved) = self.context_window.thin_context(self.session_id.as_deref()); - self.thinning_events.push(chars_saved); + let thin_summary = self.do_thin_context(); self.ui_writer.print_context_thinning(&thin_summary); // Check if thinning was sufficient @@ -2541,10 +2350,8 @@ impl Agent { // Check if we should thin the context BEFORE executing the tool if self.context_window.should_thin() { - let (thin_summary, chars_saved) = - self.context_window.thin_context(self.session_id.as_deref()); - self.thinning_events.push(chars_saved); - // Print the thinning summary to the user + let thin_summary = self.do_thin_context(); + // Print the thinning summary self.ui_writer.print_context_thinning(&thin_summary); } @@ -2752,8 +2559,7 @@ impl Agent { { let provider = self.providers.get(None)?; let provider_name = provider.name(); - let provider_type = provider_name.split('.').next().unwrap_or(""); - let config_name = provider_name.split('.').nth(1).unwrap_or("default"); + let (provider_type, config_name) = provider_config::parse_provider_ref(provider_name); if let Some(cache_config) = match provider_type { "anthropic" => { self.config diff --git a/crates/g3-core/src/provider_config.rs b/crates/g3-core/src/provider_config.rs new file mode 100644 index 0000000..939147a --- /dev/null +++ b/crates/g3-core/src/provider_config.rs @@ -0,0 +1,225 @@ +//! Provider configuration resolution. +//! +//! This module handles resolving provider-specific configuration values +//! like max_tokens, temperature, and thinking budget tokens from the +//! hierarchical config structure. + +use g3_config::Config; +use tracing::warn; + +/// Minimum tokens for summary requests to avoid API errors when context is nearly full. +pub const SUMMARY_MIN_TOKENS: u32 = 1000; + +/// Parse a provider reference into (provider_type, config_name). +/// Format: "provider_type.config_name" (e.g., "anthropic.default") +/// Falls back to (provider_name, "default") for simple names. +pub fn parse_provider_ref(provider_name: &str) -> (&str, &str) { + let parts: Vec<&str> = provider_name.split('.').collect(); + if parts.len() == 2 { + (parts[0], parts[1]) + } else { + (provider_name, "default") + } +} + +/// Get the configured max_tokens for a provider from config. +pub fn get_max_tokens(config: &Config, provider_name: &str) -> Option { + let (provider_type, config_name) = parse_provider_ref(provider_name); + + match provider_type { + "anthropic" => config.providers.anthropic.get(config_name)?.max_tokens, + "openai" => config.providers.openai.get(config_name)?.max_tokens, + "databricks" => config.providers.databricks.get(config_name)?.max_tokens, + "embedded" => config.providers.embedded.get(config_name)?.max_tokens, + _ => None, + } +} + +/// Get the configured temperature for a provider from config. +pub fn get_temperature(config: &Config, provider_name: &str) -> Option { + let (provider_type, config_name) = parse_provider_ref(provider_name); + + match provider_type { + "anthropic" => config.providers.anthropic.get(config_name)?.temperature, + "openai" => config.providers.openai.get(config_name)?.temperature, + "databricks" => config.providers.databricks.get(config_name)?.temperature, + "embedded" => config.providers.embedded.get(config_name)?.temperature, + _ => None, + } +} + +/// Get the thinking budget tokens for Anthropic provider, if configured. +pub fn get_thinking_budget_tokens(config: &Config, provider_name: &str) -> Option { + let (provider_type, config_name) = parse_provider_ref(provider_name); + + // Only Anthropic has thinking_budget_tokens + if provider_type != "anthropic" { + return None; + } + + config.providers.anthropic + .get(config_name) + .and_then(|c| c.thinking_budget_tokens) +} + +/// Resolve the max_tokens to use for a given provider, applying fallbacks. +pub fn resolve_max_tokens(config: &Config, provider_name: &str) -> u32 { + let (provider_type, _) = parse_provider_ref(provider_name); + + let base = match provider_type { + "databricks" => get_max_tokens(config, provider_name) + .or(Some(config.agent.fallback_default_max_tokens as u32)) + .unwrap_or(32000), + _ => get_max_tokens(config, provider_name) + .or(Some(config.agent.fallback_default_max_tokens as u32)) + .unwrap_or(16000), + }; + + // For Anthropic with thinking enabled, ensure max_tokens is sufficient + // Anthropic requires: max_tokens > thinking.budget_tokens + if provider_type == "anthropic" { + if let Some(budget) = get_thinking_budget_tokens(config, provider_name) { + let minimum_for_thinking = budget + 1024; + return base.max(minimum_for_thinking); + } + } + + base +} + +/// Resolve the temperature to use for a given provider, applying fallbacks. +pub fn resolve_temperature(config: &Config, provider_name: &str) -> f32 { + let (provider_type, _) = parse_provider_ref(provider_name); + + match provider_type { + "databricks" => get_temperature(config, provider_name).unwrap_or(0.1), + _ => get_temperature(config, provider_name).unwrap_or(0.1), + } +} + +/// Pre-flight check to validate and adjust max_tokens for the thinking.budget_tokens constraint. +/// Returns the adjusted max_tokens that satisfies: max_tokens > thinking.budget_tokens +/// Also returns whether we need to apply fallback actions (thinnify/skinnify). +/// +/// Returns: (adjusted_max_tokens, needs_context_reduction) +pub fn preflight_validate_max_tokens( + config: &Config, + provider_name: &str, + proposed_max_tokens: u32, +) -> (u32, bool) { + let (provider_type, _) = parse_provider_ref(provider_name); + + // Only applies to Anthropic provider + if provider_type != "anthropic" { + return (proposed_max_tokens, false); + } + + let budget_tokens = match get_thinking_budget_tokens(config, provider_name) { + Some(budget) => budget, + None => return (proposed_max_tokens, false), // No thinking enabled + }; + + // Anthropic requires: max_tokens > budget_tokens + // We add a minimum output buffer of 1024 tokens for actual response content + let minimum_required = budget_tokens + 1024; + + if proposed_max_tokens >= minimum_required { + // We have enough headroom + (proposed_max_tokens, false) + } else { + // max_tokens is too low - need to either adjust or reduce context + warn!( + "max_tokens ({}) is below required minimum ({}) for thinking.budget_tokens ({}). Context reduction needed.", + proposed_max_tokens, minimum_required, budget_tokens + ); + // Return the minimum required, but flag that we need context reduction + (minimum_required, true) + } +} + +/// Calculate max_tokens for a summary request, ensuring it satisfies the thinking constraint. +/// Returns (max_tokens, whether_fallback_is_needed) +/// +/// IMPORTANT: Always returns at least SUMMARY_MIN_TOKENS to avoid API errors +/// when context is nearly full (90%+). +pub fn calculate_summary_max_tokens( + config: &Config, + provider_name: &str, + model_limit: u32, + current_usage: u32, +) -> (u32, bool) { + let (provider_type, _) = parse_provider_ref(provider_name); + + // Get the configured max_tokens for this provider + let configured_max_tokens = resolve_max_tokens(config, provider_name); + + // Calculate available tokens with buffer + let buffer = (model_limit / 40).clamp(1000, 10000); // 2.5% buffer + let available = model_limit + .saturating_sub(current_usage) + .saturating_sub(buffer); + // Ensure we have at least a minimum floor for summary requests + // This prevents max_tokens=0 errors when context is 90%+ full + let available = available.max(SUMMARY_MIN_TOKENS); + + // Use the smaller of available tokens (with floor) or configured max_tokens, + // but ensure we don't go below thinking budget floor for Anthropic + let proposed_max_tokens = available.min(configured_max_tokens); + let proposed_max_tokens = if provider_type == "anthropic" { + if let Some(budget) = get_thinking_budget_tokens(config, provider_name) { + proposed_max_tokens.max(budget + 1024) + } else { + proposed_max_tokens + } + } else { + proposed_max_tokens + }; + + // Validate against thinking budget constraint + preflight_validate_max_tokens(config, provider_name, proposed_max_tokens) +} + +/// Get the provider-specific cap for summary max_tokens. +pub fn get_summary_max_tokens_cap(config: &Config, provider_name: &str) -> u32 { + let (provider_type, _) = parse_provider_ref(provider_name); + + // For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens + // So we set a higher cap when thinking is configured + match provider_type { + "anthropic" => { + match get_thinking_budget_tokens(config, provider_name) { + Some(budget) => (budget + 2000).max(10_000), + None => 10_000, + } + } + "databricks" => 10_000, + "embedded" => 3000, + _ => 5000, + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_provider_ref_with_dot() { + let (ptype, name) = parse_provider_ref("anthropic.default"); + assert_eq!(ptype, "anthropic"); + assert_eq!(name, "default"); + } + + #[test] + fn test_parse_provider_ref_simple() { + let (ptype, name) = parse_provider_ref("anthropic"); + assert_eq!(ptype, "anthropic"); + assert_eq!(name, "default"); + } + + #[test] + fn test_parse_provider_ref_with_custom_name() { + let (ptype, name) = parse_provider_ref("openai.gpt4"); + assert_eq!(ptype, "openai"); + assert_eq!(name, "gpt4"); + } +} diff --git a/crates/g3-core/src/session_continuation.rs b/crates/g3-core/src/session_continuation.rs index 7893dd2..5e13d56 100644 --- a/crates/g3-core/src/session_continuation.rs +++ b/crates/g3-core/src/session_continuation.rs @@ -347,8 +347,25 @@ pub fn find_incomplete_agent_session(agent_name: &str) -> Result