diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 9f10458..60e7d52 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -278,6 +278,7 @@ pub struct ContextWindow { pub total_tokens: u32, pub cumulative_tokens: u32, // Track cumulative tokens across all interactions pub conversation_history: Vec, + pub last_thinning_percentage: u32, // Track the last percentage at which we thinned } impl ContextWindow { @@ -287,6 +288,7 @@ impl ContextWindow { total_tokens, cumulative_tokens: 0, conversation_history: Vec::new(), + last_thinning_percentage: 0, } } @@ -416,6 +418,104 @@ Format this as a detailed but concise summary that can be used to resume the con }); } } + + /// Check if we should trigger context thinning + /// Triggers at 50%, 60%, 70%, and 80% thresholds + pub fn should_thin(&self) -> bool { + let current_percentage = self.percentage_used() as u32; + + // Check if we've crossed a new 10% threshold starting at 50% + if current_percentage >= 50 { + let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10% + if current_threshold > self.last_thinning_percentage && current_threshold <= 80 { + return true; + } + } + + false + } + + /// Perform context thinning: scan first third of conversation and replace large tool results + /// Returns a summary message about what was thinned + pub fn thin_context(&mut self) -> String { + let current_percentage = self.percentage_used() as u32; + let current_threshold = (current_percentage / 10) * 10; + + // Update the last thinning percentage + self.last_thinning_percentage = current_threshold; + + // Calculate the first third of the conversation + let total_messages = self.conversation_history.len(); + let first_third_end = (total_messages / 3).max(1); + + let mut leaned_count = 0; + let mut chars_saved = 0; + + // Create ~/tmp directory if it doesn't exist + let tmp_dir = shellexpand::tilde("~/tmp").to_string(); + if let Err(e) = std::fs::create_dir_all(&tmp_dir) { + warn!("Failed to create ~/tmp directory: {}", e); + return format!("āš ļø Context thinning failed: could not create ~/tmp directory"); + } + + // Scan the first third of messages + for i in 0..first_third_end { + if let Some(message) = self.conversation_history.get_mut(i) { + // Only process User messages that look like tool results + if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") { + let content_len = message.content.len(); + + // Only thin if the content is greater than 1000 chars + if content_len > 1000 { + // Generate a unique filename based on timestamp and index + let timestamp = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + let filename = format!("leaned_tool_result_{}_{}.txt", timestamp, i); + let file_path = format!("{}/{}", tmp_dir, filename); + + // Write the content to file + if let Err(e) = std::fs::write(&file_path, &message.content) { + warn!("Failed to write thinned content to {}: {}", file_path, e); + continue; + } + + // Replace the message content with a note + let original_len = message.content.len(); + message.content = format!("Tool result saved to {}", file_path); + + leaned_count += 1; + chars_saved += original_len - message.content.len(); + + debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path); + } + } + } + } + + // Recalculate token usage after thinning + self.recalculate_tokens(); + + if leaned_count > 0 { + format!("šŸ„’ Context thinned at {}%: {} tool results, ~{} chars saved", + current_threshold, leaned_count, chars_saved) + } else { + format!("ℹ Context thinning triggered at {}% but no large tool results found in first third", + current_threshold) + } + } + + /// Recalculate token usage based on current conversation history + fn recalculate_tokens(&mut self) { + let mut total = 0; + for message in &self.conversation_history { + total += Self::estimate_tokens(&message.content); + } + self.used_tokens = total; + + debug!("Recalculated tokens after thinning: {} tokens", total); + } } pub struct Agent { @@ -1431,7 +1531,7 @@ Template: // Notify user about summarization self.ui_writer.print_context_status(&format!( - "\nšŸ“Š Context window reaching capacity ({}%). Creating summary...", + "\nšŸ—œļø Context window reaching capacity ({}%). Creating summary...", self.context_window.percentage_used() as u32 )); @@ -1497,7 +1597,7 @@ Template: } }; - info!( + debug!( "Requesting summary with max_tokens: {:?} (current usage: {} tokens)", summary_max_tokens, self.context_window.used_tokens ); @@ -1514,7 +1614,7 @@ Template: match provider.complete(summary_request).await { Ok(summary_response) => { self.ui_writer.print_context_status( - "āœ… Summary created successfully. Resetting context window...\n", + "āœ… Context compacted successfully. Continuing...\n", ); // Extract the latest user message from the request @@ -1531,11 +1631,7 @@ Template: // Update the request with new context request.messages = self.context_window.conversation_history.clone(); - - self.ui_writer.print_context_status( - "šŸ”„ Context reset complete. Continuing with your request...\n", - ); - } + } Err(e) => { error!("Failed to create summary: {}", e); self.ui_writer.print_context_status("āš ļø Unable to create summary. Consider starting a new session if you continue to see errors.\n"); @@ -1677,6 +1773,14 @@ Template: // Handle completed tool calls for tool_call in completed_tools { debug!("Processing completed tool call: {:?}", tool_call); + + // Check if we should thin the context BEFORE executing the tool + if self.context_window.should_thin() { + let thin_summary = self.context_window.thin_context(); + // Print the thinning summary to the user + self.ui_writer.println(""); + self.ui_writer.print_context_status(&format!("{}\n", thin_summary)); + } // Track what we've already displayed before getting new text // This prevents re-displaying old content after tool execution diff --git a/crates/g3-core/tests/test_context_thinning.rs b/crates/g3-core/tests/test_context_thinning.rs new file mode 100644 index 0000000..760524f --- /dev/null +++ b/crates/g3-core/tests/test_context_thinning.rs @@ -0,0 +1,157 @@ +use g3_core::ContextWindow; +use g3_providers::{Message, MessageRole}; + +#[test] +fn test_thinning_thresholds() { + let mut context = ContextWindow::new(10000); + + // At 0%, should not thin + assert!(!context.should_thin()); + + // Simulate reaching 50% usage + context.used_tokens = 5000; + assert!(context.should_thin()); + + // After thinning at 50%, should not thin again until next threshold + context.last_thinning_percentage = 50; + assert!(!context.should_thin()); + + // At 60%, should thin again + context.used_tokens = 6000; + assert!(context.should_thin()); + + // After thinning at 60%, should not thin + context.last_thinning_percentage = 60; + assert!(!context.should_thin()); + + // At 70%, should thin + context.used_tokens = 7000; + assert!(context.should_thin()); + + // At 80%, should thin + context.last_thinning_percentage = 70; + context.used_tokens = 8000; + assert!(context.should_thin()); + + // After 80%, should not thin (compaction takes over) + context.last_thinning_percentage = 80; + context.used_tokens = 8500; + assert!(!context.should_thin()); +} + +#[test] +fn test_thin_context_basic() { + let mut context = ContextWindow::new(10000); + + // Add some messages to the first third + for i in 0..9 { + if i % 2 == 0 { + context.add_message(Message { + role: MessageRole::Assistant, + content: format!("Assistant message {}", i), + }); + } else { + // Add tool results with varying sizes + let content = if i == 1 { + // Large tool result (> 1000 chars) + format!("Tool result: {}", "x".repeat(1500)) + } else if i == 3 { + // Another large tool result + format!("Tool result: {}", "y".repeat(2000)) + } else { + // Small tool result (< 1000 chars) + format!("Tool result: small result {}", i) + }; + + context.add_message(Message { + role: MessageRole::User, + content, + }); + } + } + + // Trigger thinning at 50% + context.used_tokens = 5000; + let summary = context.thin_context(); + + println!("Thinning summary: {}", summary); + + // Should have thinned at least 1 large tool result in the first third + assert!(summary.contains("1 tool result"), "Summary was: {}", summary); + assert!(summary.contains("50%")); + + // Check that the large tool results were replaced + let first_third_end = context.conversation_history.len() / 3; + for i in 0..first_third_end { + if let Some(msg) = context.conversation_history.get(i) { + if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") { + if msg.content.len() > 1000 { + panic!("Found un-thinned large tool result at index {}", i); + } + } + } + } +} + +#[test] +fn test_thin_context_no_large_results() { + let mut context = ContextWindow::new(10000); + + // Add only small messages + for i in 0..9 { + context.add_message(Message { + role: MessageRole::User, + content: format!("Tool result: small {}", i), + }); + } + + context.used_tokens = 5000; + let summary = context.thin_context(); + + // Should report no large results found + assert!(summary.contains("no large tool results found")); +} + +#[test] +fn test_thin_context_only_affects_first_third() { + let mut context = ContextWindow::new(10000); + + // Add 12 messages (first third = 4 messages) + for i in 0..12 { + let content = if i % 2 == 1 { + // All odd indices are large tool results + format!("Tool result: {}", "x".repeat(1500)) + } else { + format!("Assistant message {}", i) + }; + + let role = if i % 2 == 1 { + MessageRole::User + } else { + MessageRole::Assistant + }; + + context.add_message(Message { role, content }); + } + + context.used_tokens = 5000; + let summary = context.thin_context(); + + // First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned + // That's 2 tool results + assert!(summary.contains("2 tool results")); + + // Check that messages after the first third are NOT thinned + let first_third_end = context.conversation_history.len() / 3; + for i in first_third_end..context.conversation_history.len() { + if let Some(msg) = context.conversation_history.get(i) { + if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") { + // These should still be large (not thinned) + if i % 2 == 1 { + assert!(msg.content.len() > 1000, + "Message at index {} should not have been thinned", i); + } + } + } + } +}