diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 6331507..36c5296 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -540,7 +540,14 @@ async fn run_agent_mode( }; output.print(&format!("šŸ¤– Running as agent: {}", agent_name)); - output.print(&format!("šŸ“ Working directory: {:?}", workspace_dir)); + // Format workspace path, replacing home dir with ~ + let workspace_display = { + let path_str = workspace_dir.display().to_string(); + dirs::home_dir() + .and_then(|home| path_str.strip_prefix(&home.display().to_string()).map(|s| format!("~{}", s))) + .unwrap_or(path_str) + }; + output.print(&format!("-> {}", workspace_display)); // Load config let mut config = g3_config::Config::load(config_path)?; diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 8aa97b6..6edd312 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -959,9 +959,11 @@ impl Agent { } } - /// Manually trigger context compaction regardless of context window size + /// Manually trigger context compaction regardless of context window size /// Returns Ok(true) if compaction was successful, Ok(false) if it failed pub async fn force_compact(&mut self) -> Result { + use crate::compaction::{CompactionConfig, perform_compaction}; + debug!("Manual compaction triggered"); self.ui_writer.print_context_status(&format!( @@ -973,115 +975,41 @@ impl Agent { let provider_name = provider.name().to_string(); let _ = provider; // Release borrow early - // Apply fallback sequence: thinnify -> skinnify -> hard-coded 5000 - let mut summary_max_tokens = self.apply_summary_fallback_sequence(&provider_name); - - // Apply provider-specific caps - // For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens - // So we set a higher cap when thinking is configured - let anthropic_cap = match self.get_thinking_budget_tokens(&provider_name) { - Some(budget) => (budget + 2000).max(10_000), // At least budget + 2000 for response - None => 10_000, - }; - summary_max_tokens = match provider_name.as_str() { - "anthropic" => summary_max_tokens.min(anthropic_cap), - "databricks" => summary_max_tokens.min(10_000), - "embedded" => summary_max_tokens.min(3000), - _ => summary_max_tokens.min(5000), - }; - - // Ensure minimum floor as defense-in-depth (primary protection is in calculate_summary_max_tokens) - summary_max_tokens = summary_max_tokens.max(Self::SUMMARY_MIN_TOKENS); - - debug!( - "Requesting summary with max_tokens: {} (current usage: {} tokens)", - summary_max_tokens, self.context_window.used_tokens - ); - - // Create summary request with FULL history - let summary_prompt = self.context_window.create_summary_prompt(); - - // Get the full conversation history - let conversation_text = self + // Get the latest user message to preserve it + let latest_user_msg = self .context_window .conversation_history .iter() - .map(|m| format!("{:?}: {}", m.role, m.content)) - .collect::>() - .join("\n\n"); + .rev() + .find(|m| matches!(m.role, MessageRole::User)) + .map(|m| m.content.clone()); - let summary_messages = vec![ - Message::new( - MessageRole::System, - "You are a helpful assistant that creates concise summaries.".to_string(), - ), - Message::new( - MessageRole::User, - format!( - "Based on this conversation history, {}\n\nConversation:\n{}", - summary_prompt, conversation_text - ), - ), - ]; - - let provider = self.providers.get(None)?; - - // Determine if we need to disable thinking mode for this request - // Anthropic requires: max_tokens > thinking.budget_tokens + 1024 - let disable_thinking = self.get_thinking_budget_tokens(provider.name()).map_or(false, |budget| { - let minimum_for_thinking = budget + 1024; - let should_disable = summary_max_tokens <= minimum_for_thinking; - if should_disable { - tracing::warn!("Disabling thinking mode for summary: max_tokens ({}) <= minimum_for_thinking ({})", summary_max_tokens, minimum_for_thinking); - } - should_disable - }); - - tracing::debug!("Creating summary request: max_tokens={}, disable_thinking={}", summary_max_tokens, disable_thinking); - - let summary_request = CompletionRequest { - messages: summary_messages, - max_tokens: Some(summary_max_tokens), - temperature: Some(self.resolve_temperature(provider.name())), - stream: false, - tools: None, - disable_thinking, + let compaction_config = CompactionConfig { + provider_name: &provider_name, + latest_user_msg, }; - // Get the summary - match provider.complete(summary_request).await { - Ok(summary_response) => { - self.ui_writer - .print_context_status("āœ… Context compacted successfully.\n"); + let result = perform_compaction( + &self.providers, + &mut self.context_window, + &self.config, + compaction_config, + &self.ui_writer, + &mut self.thinning_events, + ).await?; - // Get the latest user message to preserve it - let latest_user_msg = self - .context_window - .conversation_history - .iter() - .rev() - .find(|m| matches!(m.role, MessageRole::User)) - .map(|m| m.content.clone()); - - // Reset context with summary - let chars_saved = self - .context_window - .reset_with_summary(summary_response.content, latest_user_msg); - self.compaction_events.push(chars_saved); - - Ok(true) - } - Err(e) => { - error!("Failed to create summary: {}", e); - self.ui_writer.print_context_status( - "āš ļø Unable to create summary. Please try again or start a new session.\n", - ); - Ok(false) - } + if result.success { + self.ui_writer.print_context_status("āœ… Context compacted successfully.\n"); + self.compaction_events.push(result.chars_saved); + Ok(true) + } else { + self.ui_writer.print_context_status( + "āš ļø Unable to create summary. Please try again or start a new session.\n", + ); + Ok(false) } } - - /// Manually trigger context thinning regardless of thresholds +/// Manually trigger context thinning regardless of thresholds pub fn force_thin(&mut self) -> String { debug!("Manual context thinning triggered"); self.do_thin_context() @@ -1773,6 +1701,8 @@ impl Agent { // Only proceed with compaction if still needed after thinning if self.context_window.should_compact() { + use crate::compaction::{CompactionConfig, perform_compaction}; + // Notify user about compaction self.ui_writer.print_context_status(&format!( "\nšŸ—œļø Context window reaching capacity ({}%). Compacting...", @@ -1783,112 +1713,41 @@ impl Agent { let provider_name = provider.name().to_string(); let _ = provider; // Release borrow early - // Apply fallback sequence: thinnify -> skinnify -> hard-coded 5000 - let mut summary_max_tokens = self.apply_summary_fallback_sequence(&provider_name); - - // Apply provider-specific caps - // For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens - // So we set a higher cap when thinking is configured - let anthropic_cap = match self.get_thinking_budget_tokens(&provider_name) { - Some(budget) => (budget + 2000).max(10_000), // At least budget + 2000 for response - None => 10_000, - }; - summary_max_tokens = match provider_name.as_str() { - "anthropic" => summary_max_tokens.min(anthropic_cap), - "databricks" => summary_max_tokens.min(10_000), - "embedded" => summary_max_tokens.min(3000), - _ => summary_max_tokens.min(5000), - }; - - // Ensure minimum floor as defense-in-depth (primary protection is in calculate_summary_max_tokens) - summary_max_tokens = summary_max_tokens.max(Self::SUMMARY_MIN_TOKENS); - - debug!( - "Requesting summary with max_tokens: {} (current usage: {} tokens)", - summary_max_tokens, self.context_window.used_tokens - ); - - // Create summary request with FULL history - let summary_prompt = self.context_window.create_summary_prompt(); - - // Get the full conversation history - let conversation_text = self - .context_window - .conversation_history + // Extract the latest user message from the request (not context_window) + let latest_user_msg = request + .messages .iter() - .map(|m| format!("{:?}: {}", m.role, m.content)) - .collect::>() - .join("\n\n"); + .rev() + .find(|m| matches!(m.role, MessageRole::User)) + .map(|m| m.content.clone()); - let summary_messages = vec![ - Message::new( - MessageRole::System, - "You are a helpful assistant that creates concise summaries.".to_string(), - ), - Message::new( - MessageRole::User, - format!( - "Based on this conversation history, {}\n\nConversation:\n{}", - summary_prompt, conversation_text - ), - ), - ]; - - let provider = self.providers.get(None)?; - - // Determine if we need to disable thinking mode for this request - // Anthropic requires: max_tokens > thinking.budget_tokens + 1024 - let disable_thinking = self.get_thinking_budget_tokens(provider.name()).map_or(false, |budget| { - let minimum_for_thinking = budget + 1024; - let should_disable = summary_max_tokens <= minimum_for_thinking; - if should_disable { - tracing::warn!("Disabling thinking mode for summary: max_tokens ({}) <= minimum_for_thinking ({})", summary_max_tokens, minimum_for_thinking); - } - should_disable - }); - - tracing::debug!("Creating auto-summary request: max_tokens={}, disable_thinking={}", summary_max_tokens, disable_thinking); - - let summary_request = CompletionRequest { - messages: summary_messages, - max_tokens: Some(summary_max_tokens), - temperature: Some(self.resolve_temperature(provider.name())), - stream: false, - tools: None, - disable_thinking, + let compaction_config = CompactionConfig { + provider_name: &provider_name, + latest_user_msg, }; - // Get the summary - match provider.complete(summary_request).await { - Ok(summary_response) => { - self.ui_writer.print_context_status( - "āœ… Context compacted successfully. Continuing...\n", - ); + let result = perform_compaction( + &self.providers, + &mut self.context_window, + &self.config, + compaction_config, + &self.ui_writer, + &mut self.thinning_events, + ).await?; - // Extract the latest user message from the request - let latest_user_msg = request - .messages - .iter() - .rev() - .find(|m| matches!(m.role, MessageRole::User)) - .map(|m| m.content.clone()); + if result.success { + self.ui_writer.print_context_status( + "āœ… Context compacted successfully. Continuing...\n", + ); + self.compaction_events.push(result.chars_saved); - // Reset context with summary - let chars_saved = self - .context_window - .reset_with_summary(summary_response.content, latest_user_msg); - self.compaction_events.push(chars_saved); - - // Update the request with new context - request.messages = self.context_window.conversation_history.clone(); - } - Err(e) => { - error!("Failed to create summary: {}", e); - self.ui_writer.print_context_status("āš ļø Unable to compact context. Consider starting a new session if you continue to see errors.\n"); - // Don't continue with the original request if compaction failed - // as we're likely at token limit - return Err(anyhow::anyhow!("Context window at capacity and compaction failed. Please start a new session.")); - } + // Update the request with new context + request.messages = self.context_window.conversation_history.clone(); + } else { + self.ui_writer.print_context_status("āš ļø Unable to compact context. Consider starting a new session if you continue to see errors.\n"); + // Don't continue with the original request if compaction failed + // as we're likely at token limit + return Err(anyhow::anyhow!("Context window at capacity and compaction failed. Please start a new session.")); } } }