progressive context thinning

2025-10-20 15:29:44 +11:00
parent 2488cc54d5
commit 3afad3d61f
2 changed files with 269 additions and 8 deletions
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -278,6 +278,7 @@ pub struct ContextWindow {
    pub total_tokens: u32,
    pub cumulative_tokens: u32, // Track cumulative tokens across all interactions
    pub conversation_history: Vec<Message>,
    pub last_thinning_percentage: u32, // Track the last percentage at which we thinned
 }
 impl ContextWindow {
@@ -287,6 +288,7 @@ impl ContextWindow {
            total_tokens,
            cumulative_tokens: 0,
            conversation_history: Vec::new(),
            last_thinning_percentage: 0,
        }
    }
@@ -416,6 +418,104 @@ Format this as a detailed but concise summary that can be used to resume the con
            });
        }
    }
    /// Check if we should trigger context thinning
    /// Triggers at 50%, 60%, 70%, and 80% thresholds
    pub fn should_thin(&self) -> bool {
        let current_percentage = self.percentage_used() as u32;
        // Check if we've crossed a new 10% threshold starting at 50%
        if current_percentage >= 50 {
            let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
            if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
                return true;
            }
        }
        false
    }
    /// Perform context thinning: scan first third of conversation and replace large tool results
    /// Returns a summary message about what was thinned
    pub fn thin_context(&mut self) -> String {
        let current_percentage = self.percentage_used() as u32;
        let current_threshold = (current_percentage / 10) * 10;
        // Update the last thinning percentage
        self.last_thinning_percentage = current_threshold;
        // Calculate the first third of the conversation
        let total_messages = self.conversation_history.len();
        let first_third_end = (total_messages / 3).max(1);
        let mut leaned_count = 0;
        let mut chars_saved = 0;
        // Create ~/tmp directory if it doesn't exist
        let tmp_dir = shellexpand::tilde("~/tmp").to_string();
        if let Err(e) = std::fs::create_dir_all(&tmp_dir) {
            warn!("Failed to create ~/tmp directory: {}", e);
            return format!("⚠️  Context thinning failed: could not create ~/tmp directory");
        }
        // Scan the first third of messages
        for i in 0..first_third_end {
            if let Some(message) = self.conversation_history.get_mut(i) {
                // Only process User messages that look like tool results
                if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") {
                    let content_len = message.content.len();
                    // Only thin if the content is greater than 1000 chars
                    if content_len > 1000 {
                        // Generate a unique filename based on timestamp and index
                        let timestamp = std::time::SystemTime::now()
                            .duration_since(std::time::UNIX_EPOCH)
                            .unwrap_or_default()
                            .as_secs();
                        let filename = format!("leaned_tool_result_{}_{}.txt", timestamp, i);
                        let file_path = format!("{}/{}", tmp_dir, filename);
                        // Write the content to file
                        if let Err(e) = std::fs::write(&file_path, &message.content) {
                            warn!("Failed to write thinned content to {}: {}", file_path, e);
                            continue;
                        }
                        // Replace the message content with a note
                        let original_len = message.content.len();
                        message.content = format!("Tool result saved to {}", file_path);
                        leaned_count += 1;
                        chars_saved += original_len - message.content.len();
                        debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path);
                    }
                }
            }
        }
        // Recalculate token usage after thinning
        self.recalculate_tokens();
        if leaned_count > 0 {
            format!("🥒 Context thinned at {}%: {} tool results, ~{} chars saved", 
                    current_threshold, leaned_count, chars_saved)
        } else {
            format!("ℹ Context thinning triggered at {}% but no large tool results found in first third", 
                    current_threshold)
        }
    }
    /// Recalculate token usage based on current conversation history
    fn recalculate_tokens(&mut self) {
        let mut total = 0;
        for message in &self.conversation_history {
            total += Self::estimate_tokens(&message.content);
        }
        self.used_tokens = total;
        debug!("Recalculated tokens after thinning: {} tokens", total);
    }
 }
 pub struct Agent<W: UiWriter> {
@@ -1431,7 +1531,7 @@ Template:
            // Notify user about summarization
            self.ui_writer.print_context_status(&format!(
-                "\n📊 Context window reaching capacity ({}%). Creating summary...",
+                "\n🗜️ Context window reaching capacity ({}%). Creating summary...",
                self.context_window.percentage_used() as u32
            ));
@@ -1497,7 +1597,7 @@ Template:
                }
            };
-            info!(
+            debug!(
                "Requesting summary with max_tokens: {:?} (current usage: {} tokens)",
                summary_max_tokens, self.context_window.used_tokens
            );
@@ -1514,7 +1614,7 @@ Template:
            match provider.complete(summary_request).await {
                Ok(summary_response) => {
                    self.ui_writer.print_context_status(
-                        "✅ Summary created successfully. Resetting context window...\n",
+                        "✅ Context compacted successfully. Continuing...\n",
                    );
                    // Extract the latest user message from the request
@@ -1531,11 +1631,7 @@ Template:
                    // Update the request with new context
                    request.messages = self.context_window.conversation_history.clone();
-
+               }
                    self.ui_writer.print_context_status(
                        "🔄 Context reset complete. Continuing with your request...\n",
                    );
                }
                Err(e) => {
                    error!("Failed to create summary: {}", e);
                    self.ui_writer.print_context_status("⚠️ Unable to create summary. Consider starting a new session if you continue to see errors.\n");
@@ -1678,6 +1774,14 @@ Template:
                        for tool_call in completed_tools {
                            debug!("Processing completed tool call: {:?}", tool_call);
                            // Check if we should thin the context BEFORE executing the tool
                            if self.context_window.should_thin() {
                                let thin_summary = self.context_window.thin_context();
                                // Print the thinning summary to the user
                                self.ui_writer.println("");
                                self.ui_writer.print_context_status(&format!("{}\n", thin_summary));
                            }
                            // Track what we've already displayed before getting new text
                            // This prevents re-displaying old content after tool execution
                            let already_displayed_chars = current_response.chars().count();
--- a/crates/g3-core/tests/test_context_thinning.rs
+++ b/crates/g3-core/tests/test_context_thinning.rs
@@ -0,0 +1,157 @@
 use g3_core::ContextWindow;
 use g3_providers::{Message, MessageRole};
 #[test]
 fn test_thinning_thresholds() {
    let mut context = ContextWindow::new(10000);
    // At 0%, should not thin
    assert!(!context.should_thin());
    // Simulate reaching 50% usage
    context.used_tokens = 5000;
    assert!(context.should_thin());
    // After thinning at 50%, should not thin again until next threshold
    context.last_thinning_percentage = 50;
    assert!(!context.should_thin());
    // At 60%, should thin again
    context.used_tokens = 6000;
    assert!(context.should_thin());
    // After thinning at 60%, should not thin
    context.last_thinning_percentage = 60;
    assert!(!context.should_thin());
    // At 70%, should thin
    context.used_tokens = 7000;
    assert!(context.should_thin());
    // At 80%, should thin
    context.last_thinning_percentage = 70;
    context.used_tokens = 8000;
    assert!(context.should_thin());
    // After 80%, should not thin (compaction takes over)
    context.last_thinning_percentage = 80;
    context.used_tokens = 8500;
    assert!(!context.should_thin());
 }
 #[test]
 fn test_thin_context_basic() {
    let mut context = ContextWindow::new(10000);
    // Add some messages to the first third
    for i in 0..9 {
        if i % 2 == 0 {
            context.add_message(Message {
                role: MessageRole::Assistant,
                content: format!("Assistant message {}", i),
            });
        } else {
            // Add tool results with varying sizes
            let content = if i == 1 {
                // Large tool result (> 1000 chars)
                format!("Tool result: {}", "x".repeat(1500))
            } else if i == 3 {
                // Another large tool result
                format!("Tool result: {}", "y".repeat(2000))
            } else {
                // Small tool result (< 1000 chars)
                format!("Tool result: small result {}", i)
            };
            context.add_message(Message {
                role: MessageRole::User,
                content,
            });
        }
    }
    // Trigger thinning at 50%
    context.used_tokens = 5000;
    let summary = context.thin_context();
    println!("Thinning summary: {}", summary);
    // Should have thinned at least 1 large tool result in the first third
    assert!(summary.contains("1 tool result"), "Summary was: {}", summary);
    assert!(summary.contains("50%"));
    // Check that the large tool results were replaced
    let first_third_end = context.conversation_history.len() / 3;
    for i in 0..first_third_end {
        if let Some(msg) = context.conversation_history.get(i) {
            if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
                if msg.content.len() > 1000 {
                    panic!("Found un-thinned large tool result at index {}", i);
                }
            }
        }
    }
 }
 #[test]
 fn test_thin_context_no_large_results() {
    let mut context = ContextWindow::new(10000);
    // Add only small messages
    for i in 0..9 {
        context.add_message(Message {
            role: MessageRole::User,
            content: format!("Tool result: small {}", i),
        });
    }
    context.used_tokens = 5000;
    let summary = context.thin_context();
    // Should report no large results found
    assert!(summary.contains("no large tool results found"));
 }
 #[test]
 fn test_thin_context_only_affects_first_third() {
    let mut context = ContextWindow::new(10000);
    // Add 12 messages (first third = 4 messages)
    for i in 0..12 {
        let content = if i % 2 == 1 {
            // All odd indices are large tool results
            format!("Tool result: {}", "x".repeat(1500))
        } else {
            format!("Assistant message {}", i)
        };
        let role = if i % 2 == 1 {
            MessageRole::User
        } else {
            MessageRole::Assistant
        };
        context.add_message(Message { role, content });
    }
    context.used_tokens = 5000;
    let summary = context.thin_context();
    // First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
    // That's 2 tool results
    assert!(summary.contains("2 tool results"));
    // Check that messages after the first third are NOT thinned
    let first_third_end = context.conversation_history.len() / 3;
    for i in first_third_end..context.conversation_history.len() {
        if let Some(msg) = context.conversation_history.get(i) {
            if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
                // These should still be large (not thinned)
                if i % 2 == 1 {
                    assert!(msg.content.len() > 1000, 
                        "Message at index {} should not have been thinned", i);
                }
            }
        }
    }
 }