diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index fc1c06b..978a689 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -1011,6 +1011,73 @@ impl Agent { message } + /// Ensure context window has capacity before streaming. + /// Tries thinning first (cheap), then compaction (requires LLM call). + /// Returns Ok(true) if request.messages was updated, Ok(false) if unchanged. + async fn ensure_context_capacity(&mut self, request: &mut CompletionRequest) -> Result { + if !self.context_window.should_compact() { + return Ok(false); + } + + // Try thinning first if at high capacity + if self.context_window.percentage_used() > 90.0 && self.context_window.should_thin() { + self.ui_writer.print_context_status(&format!( + "\nšŸ„’ Context window at {}%. Trying thinning first...", + self.context_window.percentage_used() as u32 + )); + + let thin_summary = self.do_thin_context(); + self.ui_writer.print_context_thinning(&thin_summary); + + if !self.context_window.should_compact() { + self.ui_writer.print_context_status("āœ… Thinning resolved capacity issue. Continuing...\n"); + return Ok(false); + } + self.ui_writer.print_context_status("āš ļø Thinning insufficient. Proceeding with compaction...\n"); + } + + // Compaction still needed + if !self.context_window.should_compact() { + return Ok(false); + } + + use crate::compaction::{CompactionConfig, perform_compaction}; + + self.ui_writer.print_context_status(&format!( + "\nšŸ—œļø Context window reaching capacity ({}%). Compacting...", + self.context_window.percentage_used() as u32 + )); + + let provider_name = self.providers.get(None)?.name().to_string(); + let latest_user_msg = request.messages.iter().rev() + .find(|m| matches!(m.role, MessageRole::User)) + .map(|m| m.content.clone()); + + let compaction_config = CompactionConfig { + provider_name: &provider_name, + latest_user_msg, + }; + + let result = perform_compaction( + &self.providers, + &mut self.context_window, + &self.config, + compaction_config, + &self.ui_writer, + &mut self.thinning_events, + ).await?; + + if result.success { + self.ui_writer.print_context_status("āœ… Context compacted successfully. Continuing...\n"); + self.compaction_events.push(result.chars_saved); + request.messages = self.context_window.conversation_history.clone(); + return Ok(true); + } + + self.ui_writer.print_context_status("āš ļø Unable to compact context. Consider starting a new session if you continue to see errors.\n"); + Err(anyhow::anyhow!("Context window at capacity and compaction failed. Please start a new session.")) + } + /// Check if a tool call is a duplicate of the last tool call in the previous assistant message. /// Returns Some("DUP IN MSG") if it's a duplicate, None otherwise. fn check_duplicate_in_previous_message(&self, tool_call: &ToolCall) -> Option { @@ -1703,82 +1770,7 @@ Skip if nothing new. Be brief."#; let mut turn_accumulated_usage: Option = None; // Track token usage for timing footer // --- Phase 1: Pre-loop Context Capacity Check --- - if self.context_window.should_compact() { - // First try thinning if we are at capacity, don't call the LLM for compaction (might fail) - if self.context_window.percentage_used() > 90.0 && self.context_window.should_thin() { - self.ui_writer.print_context_status(&format!( - "\nšŸ„’ Context window at {}%. Trying thinning first...", - self.context_window.percentage_used() as u32 - )); - - let thin_summary = self.do_thin_context(); - self.ui_writer.print_context_thinning(&thin_summary); - - // Check if thinning was sufficient - if !self.context_window.should_compact() { - self.ui_writer.print_context_status( - "āœ… Thinning resolved capacity issue. Continuing...\n", - ); - // Continue with the original request without compaction - } else { - self.ui_writer.print_context_status( - "āš ļø Thinning insufficient. Proceeding with compaction...\n", - ); - } - } - - // Only proceed with compaction if still needed after thinning - if self.context_window.should_compact() { - use crate::compaction::{CompactionConfig, perform_compaction}; - - // Notify user about compaction - self.ui_writer.print_context_status(&format!( - "\nšŸ—œļø Context window reaching capacity ({}%). Compacting...", - self.context_window.percentage_used() as u32 - )); - - let provider = self.providers.get(None)?; - let provider_name = provider.name().to_string(); - let _ = provider; // Release borrow early - - // Extract the latest user message from the request (not context_window) - let latest_user_msg = request - .messages - .iter() - .rev() - .find(|m| matches!(m.role, MessageRole::User)) - .map(|m| m.content.clone()); - - let compaction_config = CompactionConfig { - provider_name: &provider_name, - latest_user_msg, - }; - - let result = perform_compaction( - &self.providers, - &mut self.context_window, - &self.config, - compaction_config, - &self.ui_writer, - &mut self.thinning_events, - ).await?; - - if result.success { - self.ui_writer.print_context_status( - "āœ… Context compacted successfully. Continuing...\n", - ); - self.compaction_events.push(result.chars_saved); - - // Update the request with new context - request.messages = self.context_window.conversation_history.clone(); - } else { - self.ui_writer.print_context_status("āš ļø Unable to compact context. Consider starting a new session if you continue to see errors.\n"); - // Don't continue with the original request if compaction failed - // as we're likely at token limit - return Err(anyhow::anyhow!("Context window at capacity and compaction failed. Please start a new session.")); - } - } - } + self.ensure_context_capacity(&mut request).await?; // --- Phase 2: Main Streaming Loop --- loop { @@ -1868,7 +1860,7 @@ Skip if nothing new. Be brief."#; let mut tool_executed = false; let mut chunks_received = 0; let mut raw_chunks: Vec = Vec::new(); // Store raw chunks for debugging - let mut _last_error: Option = None; + let mut accumulated_usage: Option = None; let mut stream_stop_reason: Option = None; // Track why the stream stopped @@ -1935,83 +1927,48 @@ Skip if nothing new. Be brief."#; for (tool_call, duplicate_type) in deduplicated_tools { debug!("Processing completed tool call: {:?}", tool_call); - // If it's a duplicate, log it and skip - don't set tool_executed! - // Setting tool_executed for duplicates would trigger auto-continue - // even when no actual tool execution occurred. + // Skip duplicates (don't set tool_executed - would trigger spurious auto-continue) if let Some(dup_type) = &duplicate_type { - // Log the duplicate with red prefix - let prefixed_tool_name = - format!("🟄 {} {}", tool_call.tool, dup_type); - let warning_msg = format!( - "āš ļø Duplicate tool call detected ({}): Skipping execution of {} with args {}", + debug!( + "Skipping duplicate tool call ({}): {} with args {}", dup_type, tool_call.tool, serde_json::to_string(&tool_call.args).unwrap_or_else(|_| "".to_string()) ); - - // Log to tool log with red prefix - let mut modified_tool_call = tool_call.clone(); - modified_tool_call.tool = prefixed_tool_name; - debug!("{}", warning_msg); - - // NOTE: Do NOT call parser.reset() here! - // Resetting the parser clears the entire text buffer, which would - // lose any subsequent (non-duplicate) tool calls that haven't been - // processed yet. - continue; // Skip execution of duplicate + continue; } - // Check if we should auto-compact at 90% BEFORE executing the tool - // We need to do this before any borrows of self + // Flag for post-turn compaction if at 90% capacity if self.auto_compact && self.context_window.percentage_used() >= 90.0 { - // Set flag to trigger compaction after this turn completes - // We can't do it now due to borrow checker constraints self.pending_90_compaction = true; } - // Check if we should thin the context BEFORE executing the tool + // Thin context if needed before tool execution if self.context_window.should_thin() { let thin_summary = self.do_thin_context(); - // Print the thinning summary self.ui_writer.print_context_thinning(&thin_summary); } - // Track what we've already displayed before getting new text - // This prevents re-displaying old content after tool execution + // Calculate new content to display (skip already-shown text) let already_displayed_chars = current_response.chars().count(); - - // Get the text content accumulated so far let text_content = parser.get_text_content(); - - // Clean the content let clean_content = streaming::clean_llm_tokens(&text_content); - - // Store the raw content BEFORE filtering for the context window log let raw_content_for_log = clean_content.clone(); - - // Filter out JSON tool calls from the display - let filtered_content = - self.ui_writer.filter_json_tool_calls(&clean_content); + let filtered_content = self.ui_writer.filter_json_tool_calls(&clean_content); let final_display_content = filtered_content.trim(); - // Display any new content before tool execution - // We need to skip what was already shown (tracked in current_response) - // but also account for the fact that parser.text_buffer accumulates - // across iterations and is never cleared until reset() - let new_content = - if current_response.len() <= final_display_content.len() { - // Only show content that hasn't been displayed yet - final_display_content - .chars() - .skip(already_displayed_chars) - .collect::() - } else { - // Nothing new to display - String::new() - }; + // Extract only the new (undisplayed) portion + let new_content = if current_response.len() <= final_display_content.len() { + final_display_content + .chars() + .skip(already_displayed_chars) + .collect::() + } else { + String::new() + }; - // Display any new text content - if !new_content.trim().is_empty() { + // Display new text before tool execution + if !new_content.trim().is_empty() { #[allow(unused_assignments)] if !response_started { self.ui_writer.print_agent_prompt(); @@ -2019,19 +1976,14 @@ Skip if nothing new. Be brief."#; } self.ui_writer.print_agent_response(&new_content); self.ui_writer.flush(); - // Update current_response to track what we've displayed current_response.push_str(&new_content); } - // Execute the tool with formatted output - - // Finish streaming markdown before showing tool output self.ui_writer.finish_streaming_markdown(); - // Check if this is a TODO tool (they handle their own output) let is_todo_tool = tool_call.tool == "todo_read" || tool_call.tool == "todo_write"; - // Tool call header (skip for TODO tools - they print their own compact header) + // Tool call header (TODO tools print their own) if !is_todo_tool { self.ui_writer.print_tool_header(&tool_call.tool, Some(&tool_call.args)); if let Some(args_obj) = tool_call.args.as_object() { @@ -2084,75 +2036,46 @@ Skip if nothing new. Be brief."#; // Display tool execution result with proper indentation let compact_summary = { let output_lines: Vec<&str> = tool_result.lines().collect(); - - // Check if UI wants full output (machine mode) or truncated (human mode) let wants_full = self.ui_writer.wants_full_output(); - const MAX_LINES: usize = 5; const MAX_LINE_WIDTH: usize = 80; let output_len = output_lines.len(); - // Skip printing content for todo tools - they already print their content - let is_todo_tool = - tool_call.tool == "todo_read" || tool_call.tool == "todo_write"; - - if is_compact_tool { - // For failed compact tools, show truncated error message + // Determine output format based on tool type + if is_todo_tool { + // TODO tools handle their own output + None + } else if is_compact_tool { + // Compact tools: show one-line summary if !tool_success { - let error_msg = streaming::truncate_for_display(&tool_result, 60); - Some(error_msg) + Some(streaming::truncate_for_display(&tool_result, 60)) } else { - // Generate appropriate summary based on tool type match tool_call.tool.as_str() { "read_file" => Some(streaming::format_read_file_summary(output_len, tool_result.len())), - "write_file" => { - // The tool result already contains the formatted summary - // Format: "āœ… wrote N lines | M chars" - Some(streaming::format_write_file_result(&tool_result)) - } + "write_file" => Some(streaming::format_write_file_result(&tool_result)), "str_replace" => { - // Parse insertions/deletions from result - // Result format: "āœ… +N insertions | -M deletions" let (ins, del) = parse_diff_stats(&tool_result); Some(streaming::format_str_replace_summary(ins, del)) } - "remember" => { - // Extract size from result like "Memory updated. Size: 1.2k" - Some(streaming::format_remember_summary(&tool_result)) - } - "screenshot" => { - // Extract path from result - Some(streaming::format_screenshot_summary(&tool_result)) - } - "coverage" => { - // Show coverage summary - Some(streaming::format_coverage_summary(&tool_result)) - } - "rehydrate" => { - // Show fragment info - Some(streaming::format_rehydrate_summary(&tool_result)) - } - "code_search" => { - // Show search summary (matches and files) - Some(streaming::format_code_search_summary(&tool_result)) - } - _ => Some(format!("āœ… completed")) + "remember" => Some(streaming::format_remember_summary(&tool_result)), + "screenshot" => Some(streaming::format_screenshot_summary(&tool_result)), + "coverage" => Some(streaming::format_coverage_summary(&tool_result)), + "rehydrate" => Some(streaming::format_rehydrate_summary(&tool_result)), + "code_search" => Some(streaming::format_code_search_summary(&tool_result)), + _ => Some("āœ… completed".to_string()), } } - } else if is_todo_tool { - // Skip - todo tools print their own content - None } else { + // Regular tools: show truncated output lines let max_lines_to_show = if wants_full { output_len } else { MAX_LINES }; - for (idx, line) in output_lines.iter().enumerate() { if !wants_full && idx >= max_lines_to_show { break; } - let clipped_line = streaming::truncate_line(line, MAX_LINE_WIDTH, !wants_full); - self.ui_writer.update_tool_output_line(&clipped_line); + self.ui_writer.update_tool_output_line( + &streaming::truncate_line(line, MAX_LINE_WIDTH, !wants_full) + ); } - if !wants_full && output_len > MAX_LINES { self.ui_writer.print_tool_output_summary(output_len); } @@ -2292,7 +2215,7 @@ Skip if nothing new. Be brief."#; // Clear current_response for next iteration to prevent buffered text // from being incorrectly displayed after tool execution current_response.clear(); - // Reset response_started flag for next iteration + // Reset for next iteration (value read in next loop pass) response_started = false; // Continue processing - don't break mid-stream @@ -2443,8 +2366,6 @@ Skip if nothing new. Be brief."#; error!("Parser state at error: text_buffer_len={}, has_incomplete={}, message_stopped={}", parser.text_buffer_len(), parser.has_incomplete_tool_call(), parser.is_message_stopped()); - // Store the error for potential logging later - _last_error = Some(error_details.clone()); // Check if this is a recoverable connection error let is_connection_error = streaming::is_connection_error(&error_msg);