Files
g3/crates/g3-core/src/compaction.rs
Dhanji R. Prasanna 182f5f98fe Centralize g3 status message formatting
Extract a new g3_status module in g3-cli that provides consistent formatting
for all 'g3:' prefixed system status messages.

Key changes:
- Add G3Status struct with methods for progress, done, failed, error, etc.
- Add Status enum with Done, Failed, Error, Resolved, Insufficient, NoChanges
- Add ThinResult struct in g3-core for semantic thinning data
- Update UiWriter trait with print_thin_result() method
- Refactor context thinning to return ThinResult instead of formatted strings
- Update all callers to use the new centralized formatting
- Session resume/decline messages now use G3Status
- Compaction status messages now use G3Status

This maintains clean separation of concerns: g3-core emits semantic data,
g3-cli handles all terminal formatting and colors.
2026-01-20 09:50:55 +05:30

331 lines
11 KiB
Rust

//! Context compaction logic.
//!
//! This module provides unified compaction functionality used by both
//! manual compaction (`/compact` command) and automatic compaction
//! (when context window reaches capacity during streaming).
use anyhow::Result;
use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry};
use tracing::{debug, error, warn};
use crate::context_window::ContextWindow;
use crate::provider_config;
use crate::ui_writer::UiWriter;
/// Minimum tokens for summary requests to avoid API errors when context is nearly full.
pub const SUMMARY_MIN_TOKENS: u32 = 1000;
/// Result of a compaction operation.
#[derive(Debug)]
pub struct CompactionResult {
/// Whether compaction succeeded
pub success: bool,
/// Characters saved by compaction (if successful)
pub chars_saved: usize,
/// Error message (if failed)
pub error: Option<String>,
}
impl CompactionResult {
pub fn success(chars_saved: usize) -> Self {
Self {
success: true,
chars_saved,
error: None,
}
}
pub fn failure(error: String) -> Self {
Self {
success: false,
chars_saved: 0,
error: Some(error),
}
}
}
/// Configuration for a compaction operation.
#[derive(Debug, Clone)]
pub struct CompactionConfig<'a> {
/// Provider name (e.g., "anthropic", "openai")
pub provider_name: &'a str,
/// Latest user message to preserve after compaction
pub latest_user_msg: Option<String>,
}
/// Calculate the summary max_tokens with provider-specific caps applied.
///
/// This is the canonical implementation - both manual and auto compaction use this.
pub fn calculate_capped_summary_tokens(
config: &g3_config::Config,
provider_name: &str,
base_max_tokens: u32,
) -> u32 {
// Apply provider-specific caps
// For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens
// So we set a higher cap when thinking is configured
let anthropic_cap = match provider_config::get_thinking_budget_tokens(config, provider_name) {
Some(budget) => (budget + 2000).max(10_000), // At least budget + 2000 for response
None => 10_000,
};
let capped = match provider_name {
name if name.starts_with("anthropic") => base_max_tokens.min(anthropic_cap),
name if name.starts_with("databricks") => base_max_tokens.min(10_000),
name if name.starts_with("embedded") => base_max_tokens.min(3000),
_ => base_max_tokens.min(5000),
};
// Ensure minimum floor as defense-in-depth
capped.max(SUMMARY_MIN_TOKENS)
}
/// Check if thinking mode should be disabled for a summary request.
///
/// Anthropic requires: max_tokens > thinking.budget_tokens + 1024
pub fn should_disable_thinking(
config: &g3_config::Config,
provider_name: &str,
summary_max_tokens: u32,
) -> bool {
provider_config::get_thinking_budget_tokens(config, provider_name).map_or(false, |budget| {
let minimum_for_thinking = budget + 1024;
let should_disable = summary_max_tokens <= minimum_for_thinking;
if should_disable {
warn!(
"Disabling thinking mode for summary: max_tokens ({}) <= minimum_for_thinking ({})",
summary_max_tokens, minimum_for_thinking
);
}
should_disable
})
}
/// Build the summary request messages from conversation history.
pub fn build_summary_messages(context_window: &ContextWindow) -> Vec<Message> {
let summary_prompt = context_window.create_summary_prompt();
let conversation_text = context_window
.conversation_history
.iter()
.map(|m| format!("{:?}: {}", m.role, m.content))
.collect::<Vec<_>>()
.join("\n\n");
vec![
Message::new(
MessageRole::System,
"You are a helpful assistant that creates concise summaries.".to_string(),
),
Message::new(
MessageRole::User,
format!(
"Based on this conversation history, {}\n\nConversation:\n{}",
summary_prompt, conversation_text
),
),
]
}
/// Perform context compaction by summarizing conversation history.
///
/// This is the unified implementation used by both:
/// - `force_compact()` - manual compaction via `/compact` command
/// - `stream_completion_with_tools()` - automatic compaction when context is full
///
/// # Arguments
/// * `providers` - Provider registry to get the LLM provider
/// * `context_window` - Context window to compact
/// * `config` - Application config for provider settings
/// * `compaction_config` - Configuration for this compaction operation
/// * `ui_writer` - UI writer for status messages
///
/// # Returns
/// `CompactionResult` indicating success/failure and chars saved
pub async fn perform_compaction<W: UiWriter>(
providers: &ProviderRegistry,
context_window: &mut ContextWindow,
config: &g3_config::Config,
compaction_config: CompactionConfig<'_>,
ui_writer: &W,
thinning_events: &mut Vec<usize>,
) -> Result<CompactionResult> {
let provider_name = compaction_config.provider_name;
// Apply fallback sequence: thinnify -> skinnify -> hard-coded 5000
let base_max_tokens = apply_summary_fallback_sequence(
context_window,
config,
provider_name,
ui_writer,
thinning_events,
);
let summary_max_tokens = calculate_capped_summary_tokens(config, provider_name, base_max_tokens);
debug!(
"Requesting summary with max_tokens: {} (current usage: {} tokens)",
summary_max_tokens, context_window.used_tokens
);
// Build summary request
let summary_messages = build_summary_messages(context_window);
let provider = providers.get(None)?;
let disable_thinking = should_disable_thinking(config, provider.name(), summary_max_tokens);
debug!(
"Creating summary request: max_tokens={}, disable_thinking={}",
summary_max_tokens, disable_thinking
);
let summary_request = CompletionRequest {
messages: summary_messages,
max_tokens: Some(summary_max_tokens),
temperature: Some(provider_config::resolve_temperature(config, provider.name())),
stream: false,
tools: None,
disable_thinking,
};
// Execute summary request
match provider.complete(summary_request).await {
Ok(summary_response) => {
// Note: ACD dehydration now happens at the end of each turn in Agent::dehydrate_context()
// Compaction just does lossy summarization of the existing stubs + summaries
let chars_saved = context_window.reset_with_summary(
summary_response.content,
compaction_config.latest_user_msg,
);
Ok(CompactionResult::success(chars_saved))
}
Err(e) => {
error!("Failed to create summary: {}", e);
Ok(CompactionResult::failure(e.to_string()))
}
}
}
/// Apply the fallback sequence for summary requests to free up context space.
///
/// Sequence: thinnify -> skinnify -> hard-coded minimum
fn apply_summary_fallback_sequence<W: UiWriter>(
context_window: &mut ContextWindow,
config: &g3_config::Config,
provider_name: &str,
ui_writer: &W,
thinning_events: &mut Vec<usize>,
) -> u32 {
// Initial validation
let (mut max_tokens, needs_reduction) = provider_config::calculate_summary_max_tokens(
config,
provider_name,
context_window.total_tokens,
context_window.used_tokens,
);
if !needs_reduction {
return max_tokens;
}
ui_writer.print_context_status(
"⚠️ Context window too full for thinking budget. Applying fallback sequence...\n",
);
// Step 1: Try thinnify (first third of context)
ui_writer.print_context_status("🥒 Step 1: Trying thinnify...\n");
let thin_result = context_window.thin_context(None);
thinning_events.push(thin_result.chars_saved);
ui_writer.print_thin_result(&thin_result);
// Recalculate after thinnify
let (new_max, still_needs_reduction) = provider_config::calculate_summary_max_tokens(
config,
provider_name,
context_window.total_tokens,
context_window.used_tokens,
);
max_tokens = new_max;
if !still_needs_reduction {
ui_writer.print_context_status("✅ Thinnify resolved capacity issue. Continuing...\n");
return max_tokens;
}
// Step 2: Try skinnify (entire context)
ui_writer.print_context_status("🦴 Step 2: Trying skinnify...\n");
let skinny_result = context_window.thin_context_all(None);
thinning_events.push(skinny_result.chars_saved);
ui_writer.print_thin_result(&skinny_result);
// Recalculate after skinnify
let (final_max, final_needs_reduction) = provider_config::calculate_summary_max_tokens(
config,
provider_name,
context_window.total_tokens,
context_window.used_tokens,
);
if !final_needs_reduction {
ui_writer.print_context_status("✅ Skinnify resolved capacity issue. Continuing...\n");
return final_max;
}
// Step 3: Nothing worked, use hard-coded minimum
const HARD_CODED_MINIMUM: u32 = 5000;
ui_writer.print_context_status(&format!(
"⚠️ Step 3: Context reduction insufficient. Using hard-coded max_tokens={} as last resort...\n",
HARD_CODED_MINIMUM
));
HARD_CODED_MINIMUM
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_calculate_capped_summary_tokens_anthropic() {
let config = g3_config::Config::default();
// Without thinking budget, should cap at 10_000
let result = calculate_capped_summary_tokens(&config, "anthropic", 20_000);
assert_eq!(result, 10_000);
}
#[test]
fn test_calculate_capped_summary_tokens_databricks() {
let config = g3_config::Config::default();
let result = calculate_capped_summary_tokens(&config, "databricks", 20_000);
assert_eq!(result, 10_000);
}
#[test]
fn test_calculate_capped_summary_tokens_embedded() {
let config = g3_config::Config::default();
let result = calculate_capped_summary_tokens(&config, "embedded", 20_000);
assert_eq!(result, 3000);
}
#[test]
fn test_calculate_capped_summary_tokens_minimum_floor() {
let config = g3_config::Config::default();
// Even with very low input, should return at least SUMMARY_MIN_TOKENS
let result = calculate_capped_summary_tokens(&config, "embedded", 100);
assert_eq!(result, SUMMARY_MIN_TOKENS);
}
#[test]
fn test_compaction_result_success() {
let result = CompactionResult::success(5000);
assert!(result.success);
assert_eq!(result.chars_saved, 5000);
assert!(result.error.is_none());
}
#[test]
fn test_compaction_result_failure() {
let result = CompactionResult::failure("test error".to_string());
assert!(!result.success);
assert_eq!(result.chars_saved, 0);
assert_eq!(result.error, Some("test error".to_string()));
}
}