progressive context thinning
This commit is contained in:
@@ -278,6 +278,7 @@ pub struct ContextWindow {
|
|||||||
pub total_tokens: u32,
|
pub total_tokens: u32,
|
||||||
pub cumulative_tokens: u32, // Track cumulative tokens across all interactions
|
pub cumulative_tokens: u32, // Track cumulative tokens across all interactions
|
||||||
pub conversation_history: Vec<Message>,
|
pub conversation_history: Vec<Message>,
|
||||||
|
pub last_thinning_percentage: u32, // Track the last percentage at which we thinned
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ContextWindow {
|
impl ContextWindow {
|
||||||
@@ -287,6 +288,7 @@ impl ContextWindow {
|
|||||||
total_tokens,
|
total_tokens,
|
||||||
cumulative_tokens: 0,
|
cumulative_tokens: 0,
|
||||||
conversation_history: Vec::new(),
|
conversation_history: Vec::new(),
|
||||||
|
last_thinning_percentage: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -416,6 +418,104 @@ Format this as a detailed but concise summary that can be used to resume the con
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if we should trigger context thinning
|
||||||
|
/// Triggers at 50%, 60%, 70%, and 80% thresholds
|
||||||
|
pub fn should_thin(&self) -> bool {
|
||||||
|
let current_percentage = self.percentage_used() as u32;
|
||||||
|
|
||||||
|
// Check if we've crossed a new 10% threshold starting at 50%
|
||||||
|
if current_percentage >= 50 {
|
||||||
|
let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
|
||||||
|
if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform context thinning: scan first third of conversation and replace large tool results
|
||||||
|
/// Returns a summary message about what was thinned
|
||||||
|
pub fn thin_context(&mut self) -> String {
|
||||||
|
let current_percentage = self.percentage_used() as u32;
|
||||||
|
let current_threshold = (current_percentage / 10) * 10;
|
||||||
|
|
||||||
|
// Update the last thinning percentage
|
||||||
|
self.last_thinning_percentage = current_threshold;
|
||||||
|
|
||||||
|
// Calculate the first third of the conversation
|
||||||
|
let total_messages = self.conversation_history.len();
|
||||||
|
let first_third_end = (total_messages / 3).max(1);
|
||||||
|
|
||||||
|
let mut leaned_count = 0;
|
||||||
|
let mut chars_saved = 0;
|
||||||
|
|
||||||
|
// Create ~/tmp directory if it doesn't exist
|
||||||
|
let tmp_dir = shellexpand::tilde("~/tmp").to_string();
|
||||||
|
if let Err(e) = std::fs::create_dir_all(&tmp_dir) {
|
||||||
|
warn!("Failed to create ~/tmp directory: {}", e);
|
||||||
|
return format!("⚠️ Context thinning failed: could not create ~/tmp directory");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scan the first third of messages
|
||||||
|
for i in 0..first_third_end {
|
||||||
|
if let Some(message) = self.conversation_history.get_mut(i) {
|
||||||
|
// Only process User messages that look like tool results
|
||||||
|
if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") {
|
||||||
|
let content_len = message.content.len();
|
||||||
|
|
||||||
|
// Only thin if the content is greater than 1000 chars
|
||||||
|
if content_len > 1000 {
|
||||||
|
// Generate a unique filename based on timestamp and index
|
||||||
|
let timestamp = std::time::SystemTime::now()
|
||||||
|
.duration_since(std::time::UNIX_EPOCH)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.as_secs();
|
||||||
|
let filename = format!("leaned_tool_result_{}_{}.txt", timestamp, i);
|
||||||
|
let file_path = format!("{}/{}", tmp_dir, filename);
|
||||||
|
|
||||||
|
// Write the content to file
|
||||||
|
if let Err(e) = std::fs::write(&file_path, &message.content) {
|
||||||
|
warn!("Failed to write thinned content to {}: {}", file_path, e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace the message content with a note
|
||||||
|
let original_len = message.content.len();
|
||||||
|
message.content = format!("Tool result saved to {}", file_path);
|
||||||
|
|
||||||
|
leaned_count += 1;
|
||||||
|
chars_saved += original_len - message.content.len();
|
||||||
|
|
||||||
|
debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recalculate token usage after thinning
|
||||||
|
self.recalculate_tokens();
|
||||||
|
|
||||||
|
if leaned_count > 0 {
|
||||||
|
format!("🥒 Context thinned at {}%: {} tool results, ~{} chars saved",
|
||||||
|
current_threshold, leaned_count, chars_saved)
|
||||||
|
} else {
|
||||||
|
format!("ℹ Context thinning triggered at {}% but no large tool results found in first third",
|
||||||
|
current_threshold)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recalculate token usage based on current conversation history
|
||||||
|
fn recalculate_tokens(&mut self) {
|
||||||
|
let mut total = 0;
|
||||||
|
for message in &self.conversation_history {
|
||||||
|
total += Self::estimate_tokens(&message.content);
|
||||||
|
}
|
||||||
|
self.used_tokens = total;
|
||||||
|
|
||||||
|
debug!("Recalculated tokens after thinning: {} tokens", total);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Agent<W: UiWriter> {
|
pub struct Agent<W: UiWriter> {
|
||||||
@@ -1431,7 +1531,7 @@ Template:
|
|||||||
|
|
||||||
// Notify user about summarization
|
// Notify user about summarization
|
||||||
self.ui_writer.print_context_status(&format!(
|
self.ui_writer.print_context_status(&format!(
|
||||||
"\n📊 Context window reaching capacity ({}%). Creating summary...",
|
"\n🗜️ Context window reaching capacity ({}%). Creating summary...",
|
||||||
self.context_window.percentage_used() as u32
|
self.context_window.percentage_used() as u32
|
||||||
));
|
));
|
||||||
|
|
||||||
@@ -1497,7 +1597,7 @@ Template:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
info!(
|
debug!(
|
||||||
"Requesting summary with max_tokens: {:?} (current usage: {} tokens)",
|
"Requesting summary with max_tokens: {:?} (current usage: {} tokens)",
|
||||||
summary_max_tokens, self.context_window.used_tokens
|
summary_max_tokens, self.context_window.used_tokens
|
||||||
);
|
);
|
||||||
@@ -1514,7 +1614,7 @@ Template:
|
|||||||
match provider.complete(summary_request).await {
|
match provider.complete(summary_request).await {
|
||||||
Ok(summary_response) => {
|
Ok(summary_response) => {
|
||||||
self.ui_writer.print_context_status(
|
self.ui_writer.print_context_status(
|
||||||
"✅ Summary created successfully. Resetting context window...\n",
|
"✅ Context compacted successfully. Continuing...\n",
|
||||||
);
|
);
|
||||||
|
|
||||||
// Extract the latest user message from the request
|
// Extract the latest user message from the request
|
||||||
@@ -1531,11 +1631,7 @@ Template:
|
|||||||
|
|
||||||
// Update the request with new context
|
// Update the request with new context
|
||||||
request.messages = self.context_window.conversation_history.clone();
|
request.messages = self.context_window.conversation_history.clone();
|
||||||
|
}
|
||||||
self.ui_writer.print_context_status(
|
|
||||||
"🔄 Context reset complete. Continuing with your request...\n",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
error!("Failed to create summary: {}", e);
|
error!("Failed to create summary: {}", e);
|
||||||
self.ui_writer.print_context_status("⚠️ Unable to create summary. Consider starting a new session if you continue to see errors.\n");
|
self.ui_writer.print_context_status("⚠️ Unable to create summary. Consider starting a new session if you continue to see errors.\n");
|
||||||
@@ -1677,6 +1773,14 @@ Template:
|
|||||||
// Handle completed tool calls
|
// Handle completed tool calls
|
||||||
for tool_call in completed_tools {
|
for tool_call in completed_tools {
|
||||||
debug!("Processing completed tool call: {:?}", tool_call);
|
debug!("Processing completed tool call: {:?}", tool_call);
|
||||||
|
|
||||||
|
// Check if we should thin the context BEFORE executing the tool
|
||||||
|
if self.context_window.should_thin() {
|
||||||
|
let thin_summary = self.context_window.thin_context();
|
||||||
|
// Print the thinning summary to the user
|
||||||
|
self.ui_writer.println("");
|
||||||
|
self.ui_writer.print_context_status(&format!("{}\n", thin_summary));
|
||||||
|
}
|
||||||
|
|
||||||
// Track what we've already displayed before getting new text
|
// Track what we've already displayed before getting new text
|
||||||
// This prevents re-displaying old content after tool execution
|
// This prevents re-displaying old content after tool execution
|
||||||
|
|||||||
157
crates/g3-core/tests/test_context_thinning.rs
Normal file
157
crates/g3-core/tests/test_context_thinning.rs
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
use g3_core::ContextWindow;
|
||||||
|
use g3_providers::{Message, MessageRole};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_thinning_thresholds() {
|
||||||
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
|
// At 0%, should not thin
|
||||||
|
assert!(!context.should_thin());
|
||||||
|
|
||||||
|
// Simulate reaching 50% usage
|
||||||
|
context.used_tokens = 5000;
|
||||||
|
assert!(context.should_thin());
|
||||||
|
|
||||||
|
// After thinning at 50%, should not thin again until next threshold
|
||||||
|
context.last_thinning_percentage = 50;
|
||||||
|
assert!(!context.should_thin());
|
||||||
|
|
||||||
|
// At 60%, should thin again
|
||||||
|
context.used_tokens = 6000;
|
||||||
|
assert!(context.should_thin());
|
||||||
|
|
||||||
|
// After thinning at 60%, should not thin
|
||||||
|
context.last_thinning_percentage = 60;
|
||||||
|
assert!(!context.should_thin());
|
||||||
|
|
||||||
|
// At 70%, should thin
|
||||||
|
context.used_tokens = 7000;
|
||||||
|
assert!(context.should_thin());
|
||||||
|
|
||||||
|
// At 80%, should thin
|
||||||
|
context.last_thinning_percentage = 70;
|
||||||
|
context.used_tokens = 8000;
|
||||||
|
assert!(context.should_thin());
|
||||||
|
|
||||||
|
// After 80%, should not thin (compaction takes over)
|
||||||
|
context.last_thinning_percentage = 80;
|
||||||
|
context.used_tokens = 8500;
|
||||||
|
assert!(!context.should_thin());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_thin_context_basic() {
|
||||||
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
|
// Add some messages to the first third
|
||||||
|
for i in 0..9 {
|
||||||
|
if i % 2 == 0 {
|
||||||
|
context.add_message(Message {
|
||||||
|
role: MessageRole::Assistant,
|
||||||
|
content: format!("Assistant message {}", i),
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Add tool results with varying sizes
|
||||||
|
let content = if i == 1 {
|
||||||
|
// Large tool result (> 1000 chars)
|
||||||
|
format!("Tool result: {}", "x".repeat(1500))
|
||||||
|
} else if i == 3 {
|
||||||
|
// Another large tool result
|
||||||
|
format!("Tool result: {}", "y".repeat(2000))
|
||||||
|
} else {
|
||||||
|
// Small tool result (< 1000 chars)
|
||||||
|
format!("Tool result: small result {}", i)
|
||||||
|
};
|
||||||
|
|
||||||
|
context.add_message(Message {
|
||||||
|
role: MessageRole::User,
|
||||||
|
content,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trigger thinning at 50%
|
||||||
|
context.used_tokens = 5000;
|
||||||
|
let summary = context.thin_context();
|
||||||
|
|
||||||
|
println!("Thinning summary: {}", summary);
|
||||||
|
|
||||||
|
// Should have thinned at least 1 large tool result in the first third
|
||||||
|
assert!(summary.contains("1 tool result"), "Summary was: {}", summary);
|
||||||
|
assert!(summary.contains("50%"));
|
||||||
|
|
||||||
|
// Check that the large tool results were replaced
|
||||||
|
let first_third_end = context.conversation_history.len() / 3;
|
||||||
|
for i in 0..first_third_end {
|
||||||
|
if let Some(msg) = context.conversation_history.get(i) {
|
||||||
|
if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
|
||||||
|
if msg.content.len() > 1000 {
|
||||||
|
panic!("Found un-thinned large tool result at index {}", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_thin_context_no_large_results() {
|
||||||
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
|
// Add only small messages
|
||||||
|
for i in 0..9 {
|
||||||
|
context.add_message(Message {
|
||||||
|
role: MessageRole::User,
|
||||||
|
content: format!("Tool result: small {}", i),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
context.used_tokens = 5000;
|
||||||
|
let summary = context.thin_context();
|
||||||
|
|
||||||
|
// Should report no large results found
|
||||||
|
assert!(summary.contains("no large tool results found"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_thin_context_only_affects_first_third() {
|
||||||
|
let mut context = ContextWindow::new(10000);
|
||||||
|
|
||||||
|
// Add 12 messages (first third = 4 messages)
|
||||||
|
for i in 0..12 {
|
||||||
|
let content = if i % 2 == 1 {
|
||||||
|
// All odd indices are large tool results
|
||||||
|
format!("Tool result: {}", "x".repeat(1500))
|
||||||
|
} else {
|
||||||
|
format!("Assistant message {}", i)
|
||||||
|
};
|
||||||
|
|
||||||
|
let role = if i % 2 == 1 {
|
||||||
|
MessageRole::User
|
||||||
|
} else {
|
||||||
|
MessageRole::Assistant
|
||||||
|
};
|
||||||
|
|
||||||
|
context.add_message(Message { role, content });
|
||||||
|
}
|
||||||
|
|
||||||
|
context.used_tokens = 5000;
|
||||||
|
let summary = context.thin_context();
|
||||||
|
|
||||||
|
// First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
|
||||||
|
// That's 2 tool results
|
||||||
|
assert!(summary.contains("2 tool results"));
|
||||||
|
|
||||||
|
// Check that messages after the first third are NOT thinned
|
||||||
|
let first_third_end = context.conversation_history.len() / 3;
|
||||||
|
for i in first_third_end..context.conversation_history.len() {
|
||||||
|
if let Some(msg) = context.conversation_history.get(i) {
|
||||||
|
if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
|
||||||
|
// These should still be large (not thinned)
|
||||||
|
if i % 2 == 1 {
|
||||||
|
assert!(msg.content.len() > 1000,
|
||||||
|
"Message at index {} should not have been thinned", i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user