Fix ACD turn summary loss and add /dump command

ACD (Aggressive Context Dehydration) fixes:
- Fixed dehydrate_context() to extract turn summary from context window
  instead of using the passed-in final_response (which contained only
  the timing footer, not the actual LLM response)
- Removed final_response parameter from dehydrate_context() since it
  now self-extracts the last assistant message as the summary
- This ensures the actual turn summary is preserved after dehydration,
  not just the timing footer

New /dump command:
- Added /dump command to dump entire context window to tmp/ for debugging
- Shows message index, role, kind, content length, and full content
- Available in both console and machine modes

UTF-8 safety:
- Fixed truncate_to_word_boundary() to use character indices instead of
  byte indices, preventing panics on multi-byte UTF-8 characters
- Added UTF-8 string slicing guidance to AGENTS.md

Agent: g3
This commit is contained in:
Dhanji R. Prasanna
2026-01-12 05:13:02 +05:30
parent ac17b95b24
commit f415dbb84b
14 changed files with 1771 additions and 27 deletions

View File

@@ -1,3 +1,4 @@
pub mod acd;
pub mod context_window;
pub mod background_process;
pub mod compaction;
@@ -127,6 +128,8 @@ pub struct Agent<W: UiWriter> {
agent_name: Option<String>,
/// Whether auto-memory reminders are enabled (--auto-memory flag)
auto_memory: bool,
/// Whether aggressive context dehydration is enabled (--acd flag)
acd_enabled: bool,
}
impl<W: UiWriter> Agent<W> {
@@ -296,6 +299,7 @@ impl<W: UiWriter> Agent<W> {
is_agent_mode: false,
agent_name: None,
auto_memory: false,
acd_enabled: false,
})
}
@@ -1369,6 +1373,130 @@ impl<W: UiWriter> Agent<W> {
debug!("Auto-memory reminders: {}", if enabled { "enabled" } else { "disabled" });
}
/// Enable or disable aggressive context dehydration (ACD)
pub fn set_acd_enabled(&mut self, enabled: bool) {
self.acd_enabled = enabled;
debug!("ACD (aggressive context dehydration): {}", if enabled { "enabled" } else { "disabled" });
}
/// Perform ACD dehydration - save current conversation state to a fragment.
/// Called at the end of each turn when ACD is enabled.
///
/// This saves all non-system messages (except the final assistant response)
/// to a fragment, then replaces them with a compact stub. The final assistant
/// response is preserved as the turn summary after the stub.
///
/// in the context with a compact stub. The agent's final response (summary)
/// is preserved after the stub.
fn dehydrate_context(&mut self) {
if !self.acd_enabled {
return;
}
let session_id = match &self.session_id {
Some(id) => id.clone(),
None => {
debug!("ACD: No session_id, skipping dehydration");
return;
}
};
// Find the index of the last dehydration stub (marks the end of previously dehydrated content)
// We only want to dehydrate messages AFTER the last stub+summary pair
let last_stub_index = self.context_window
.conversation_history
.iter()
.rposition(|m| m.is_dehydrated_stub());
// Start index for messages to dehydrate:
// - If there's a previous stub, start after the stub AND its following summary (stub + 2)
// - Otherwise, start from the beginning (index 0)
let dehydrate_start = match last_stub_index {
Some(idx) => idx + 2, // Skip the stub and the summary that follows it
None => 0,
};
// Get the preceding fragment ID (if any)
let preceding_id = crate::acd::get_latest_fragment_id(&session_id).ok().flatten();
// Extract only NEW non-system messages to dehydrate (after the last stub+summary)
let messages_to_dehydrate: Vec<_> = self.context_window
.conversation_history
.iter()
.enumerate()
.filter(|(idx, m)| *idx >= dehydrate_start && !matches!(m.role, g3_providers::MessageRole::System))
.map(|(_, m)| m.clone())
.collect();
if messages_to_dehydrate.is_empty() {
return;
}
// Extract the last assistant message as the turn summary
// This is the actual LLM response, not the timing footer passed in final_response
let turn_summary: Option<String> = messages_to_dehydrate
.iter()
.rev()
.find(|m| matches!(m.role, g3_providers::MessageRole::Assistant))
.map(|m| m.content.clone());
// Use extracted summary, falling back to final_response only if no assistant message found
let summary_content = turn_summary.unwrap_or_default();
// Create the fragment and generate stub
let fragment = crate::acd::Fragment::new(messages_to_dehydrate, preceding_id);
let stub = fragment.generate_stub();
if let Err(e) = fragment.save(&session_id) {
warn!("Failed to save ACD fragment: {}", e);
return; // Don't modify context if save failed
}
println!("💾 Dehydrated {} messages to fragment {}", fragment.message_count, fragment.fragment_id);
// Now replace the context: keep system messages + previous stubs/summaries, add new stub, add new summary
// Extract messages to keep: system messages + everything up to (but not including) dehydrate_start
let messages_to_keep: Vec<_> = self.context_window
.conversation_history
.iter()
.enumerate()
.filter(|(idx, m)| {
// Keep all system messages OR keep previous stub+summary pairs
matches!(m.role, g3_providers::MessageRole::System) || *idx < dehydrate_start
})
.map(|(_, m)| m.clone())
.collect();
// Clear and rebuild context
self.context_window.conversation_history.clear();
// Add back kept messages (system + previous stubs/summaries)
for msg in messages_to_keep {
self.context_window.conversation_history.push(msg);
}
// Add the stub as a user message (so LLM sees it as context)
let stub_msg = g3_providers::Message::with_kind(
g3_providers::MessageRole::User,
stub,
g3_providers::MessageKind::DehydratedStub,
);
self.context_window.conversation_history.push(stub_msg);
// Add the final response as assistant message (the summary)
if !summary_content.trim().is_empty() {
let summary_msg = g3_providers::Message::with_kind(
g3_providers::MessageRole::Assistant,
summary_content,
g3_providers::MessageKind::Summary,
);
self.context_window.conversation_history.push(summary_msg);
}
// Recalculate token usage
self.context_window.recalculate_tokens();
}
/// Send an auto-memory reminder to the LLM if tools were called during the turn.
/// This prompts the LLM to call the `remember` tool if it discovered any key code locations.
/// Returns true if a reminder was sent and processed.
@@ -1503,6 +1631,7 @@ impl<W: UiWriter> Agent<W> {
id: String::new(),
images: Vec::new(),
content: content.to_string(),
kind: g3_providers::MessageKind::Regular,
cache_control: None,
});
}
@@ -1529,6 +1658,7 @@ impl<W: UiWriter> Agent<W> {
id: String::new(),
images: Vec::new(),
content: format!("[Session Resumed]\n\n{}", context_msg),
kind: g3_providers::MessageKind::Regular,
cache_control: None,
});
}
@@ -2077,11 +2207,26 @@ impl<W: UiWriter> Agent<W> {
const MAX_LINE_WIDTH: usize = 80;
let output_len = output_lines.len();
// Skip printing for todo tools - they already print their content
// Skip printing content for todo tools - they already print their content
let is_todo_tool =
tool_call.tool == "todo_read" || tool_call.tool == "todo_write";
if !is_todo_tool {
// For read_file, show a summary instead of file contents
let is_read_file = tool_call.tool == "read_file";
if is_read_file && tool_success {
// Calculate summary: lines and chars
let char_count = tool_result.len();
let char_display = if char_count >= 1000 {
format!("{:.1}k", char_count as f64 / 1000.0)
} else {
format!("{}", char_count)
};
let summary = format!("✅ read {} lines | {} chars", output_len, char_display);
self.ui_writer.update_tool_output_line(&summary);
} else if is_todo_tool {
// Skip - todo tools print their own content
} else {
let max_lines_to_show = if wants_full { output_len } else { MAX_LINES };
for (idx, line) in output_lines.iter().enumerate() {
@@ -2356,11 +2501,8 @@ impl<W: UiWriter> Agent<W> {
break;
}
// Set full_response to current_response (don't append)
// current_response already contains everything that was displayed
// Don't set full_response here - it would duplicate the output
// The text was already displayed during streaming
// Return empty string to avoid duplication
// Set full_response to empty to avoid duplication in return value
// (content was already displayed during streaming)
full_response = String::new();
// Finish the streaming markdown formatter before returning
@@ -2389,6 +2531,9 @@ impl<W: UiWriter> Agent<W> {
full_response
};
// Dehydrate context - the function extracts the summary from context itself
self.dehydrate_context();
return Ok(TaskResult::new(
final_response,
self.context_window.clone(),
@@ -2618,9 +2763,11 @@ impl<W: UiWriter> Agent<W> {
let _ttft = first_token_time.unwrap_or_else(|| stream_start.elapsed());
// Add the RAW unfiltered response to context window before returning
// This ensures the log contains the true raw content including any JSON
if !full_response.trim().is_empty() {
// Add the RAW unfiltered response to context window before returning.
// This ensures the log contains the true raw content including any JSON.
// Note: We check current_response, not full_response, because full_response
// may be empty to avoid display duplication (content was already streamed).
if !current_response.trim().is_empty() {
// Get the raw text from the parser (before filtering)
let raw_text = parser.get_text_content();
let raw_clean = streaming::clean_llm_tokens(&raw_text);
@@ -2652,6 +2799,9 @@ impl<W: UiWriter> Agent<W> {
full_response
};
// Dehydrate context - the function extracts the summary from context itself
self.dehydrate_context();
return Ok(TaskResult::new(final_response, self.context_window.clone()));
}
@@ -2679,6 +2829,9 @@ impl<W: UiWriter> Agent<W> {
full_response
};
// Dehydrate context - the function extracts the summary from context itself
self.dehydrate_context();
Ok(TaskResult::new(final_response, self.context_window.clone()))
}
@@ -2771,19 +2924,26 @@ pub use utils::apply_unified_diff_to_string;
/// Truncate a string to approximately max_len characters, ending at a word boundary
fn truncate_to_word_boundary(s: &str, max_len: usize) -> String {
if s.len() <= max_len {
let char_count = s.chars().count();
if char_count <= max_len {
return s.to_string();
}
// Find the last space before max_len
let truncated = &s[..max_len];
if let Some(last_space) = truncated.rfind(' ') {
if last_space > max_len / 2 {
// Only use word boundary if it's not too short
return format!("{}...", &s[..last_space]);
// Get the byte index of the max_len-th character
let byte_index: usize = s.char_indices()
.nth(max_len)
.map(|(i, _)| i)
.unwrap_or(s.len());
// Find the last space before the character limit
let truncated = &s[..byte_index];
if let Some(last_space_byte) = truncated.rfind(' ') {
if truncated[..last_space_byte].chars().count() > max_len / 2 {
// Only use word boundary if it's not too short (in characters)
return format!("{}...", &s[..last_space_byte]);
}
}
// Fall back to character truncation
// Fall back to truncation at character boundary
format!("{}...", truncated)
}