Add status line showing loaded context in agent mode

Shows checkmarks for README, AGENTS.md, and Memory if loaded,
or dots if not found. Displayed below the working directory line.
This commit is contained in:
Dhanji R. Prasanna
2026-01-11 17:13:32 +05:30
parent 2b87a89617
commit f83ae7fd39
3 changed files with 342 additions and 6 deletions

View File

@@ -0,0 +1,328 @@
//! Context compaction logic.
//!
//! This module provides unified compaction functionality used by both
//! manual compaction (`/compact` command) and automatic compaction
//! (when context window reaches capacity during streaming).
use anyhow::Result;
use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry};
use tracing::{debug, error, warn};
use crate::context_window::ContextWindow;
use crate::provider_config;
use crate::ui_writer::UiWriter;
/// Minimum tokens for summary requests to avoid API errors when context is nearly full.
pub const SUMMARY_MIN_TOKENS: u32 = 1000;
/// Result of a compaction operation.
#[derive(Debug)]
pub struct CompactionResult {
/// Whether compaction succeeded
pub success: bool,
/// Characters saved by compaction (if successful)
pub chars_saved: usize,
/// Error message (if failed)
pub error: Option<String>,
}
impl CompactionResult {
pub fn success(chars_saved: usize) -> Self {
Self {
success: true,
chars_saved,
error: None,
}
}
pub fn failure(error: String) -> Self {
Self {
success: false,
chars_saved: 0,
error: Some(error),
}
}
}
/// Configuration for a compaction operation.
#[derive(Debug, Clone)]
pub struct CompactionConfig<'a> {
/// Provider name (e.g., "anthropic", "openai")
pub provider_name: &'a str,
/// Latest user message to preserve after compaction
pub latest_user_msg: Option<String>,
}
/// Calculate the summary max_tokens with provider-specific caps applied.
///
/// This is the canonical implementation - both manual and auto compaction use this.
pub fn calculate_capped_summary_tokens(
config: &g3_config::Config,
provider_name: &str,
base_max_tokens: u32,
) -> u32 {
// Apply provider-specific caps
// For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens
// So we set a higher cap when thinking is configured
let anthropic_cap = match provider_config::get_thinking_budget_tokens(config, provider_name) {
Some(budget) => (budget + 2000).max(10_000), // At least budget + 2000 for response
None => 10_000,
};
let capped = match provider_name {
name if name.starts_with("anthropic") => base_max_tokens.min(anthropic_cap),
name if name.starts_with("databricks") => base_max_tokens.min(10_000),
name if name.starts_with("embedded") => base_max_tokens.min(3000),
_ => base_max_tokens.min(5000),
};
// Ensure minimum floor as defense-in-depth
capped.max(SUMMARY_MIN_TOKENS)
}
/// Check if thinking mode should be disabled for a summary request.
///
/// Anthropic requires: max_tokens > thinking.budget_tokens + 1024
pub fn should_disable_thinking(
config: &g3_config::Config,
provider_name: &str,
summary_max_tokens: u32,
) -> bool {
provider_config::get_thinking_budget_tokens(config, provider_name).map_or(false, |budget| {
let minimum_for_thinking = budget + 1024;
let should_disable = summary_max_tokens <= minimum_for_thinking;
if should_disable {
warn!(
"Disabling thinking mode for summary: max_tokens ({}) <= minimum_for_thinking ({})",
summary_max_tokens, minimum_for_thinking
);
}
should_disable
})
}
/// Build the summary request messages from conversation history.
pub fn build_summary_messages(context_window: &ContextWindow) -> Vec<Message> {
let summary_prompt = context_window.create_summary_prompt();
let conversation_text = context_window
.conversation_history
.iter()
.map(|m| format!("{:?}: {}", m.role, m.content))
.collect::<Vec<_>>()
.join("\n\n");
vec![
Message::new(
MessageRole::System,
"You are a helpful assistant that creates concise summaries.".to_string(),
),
Message::new(
MessageRole::User,
format!(
"Based on this conversation history, {}\n\nConversation:\n{}",
summary_prompt, conversation_text
),
),
]
}
/// Perform context compaction by summarizing conversation history.
///
/// This is the unified implementation used by both:
/// - `force_compact()` - manual compaction via `/compact` command
/// - `stream_completion_with_tools()` - automatic compaction when context is full
///
/// # Arguments
/// * `providers` - Provider registry to get the LLM provider
/// * `context_window` - Context window to compact
/// * `config` - Application config for provider settings
/// * `compaction_config` - Configuration for this compaction operation
/// * `ui_writer` - UI writer for status messages
///
/// # Returns
/// `CompactionResult` indicating success/failure and chars saved
pub async fn perform_compaction<W: UiWriter>(
providers: &ProviderRegistry,
context_window: &mut ContextWindow,
config: &g3_config::Config,
compaction_config: CompactionConfig<'_>,
ui_writer: &W,
thinning_events: &mut Vec<usize>,
) -> Result<CompactionResult> {
let provider_name = compaction_config.provider_name;
// Apply fallback sequence: thinnify -> skinnify -> hard-coded 5000
let base_max_tokens = apply_summary_fallback_sequence(
context_window,
config,
provider_name,
ui_writer,
thinning_events,
);
let summary_max_tokens = calculate_capped_summary_tokens(config, provider_name, base_max_tokens);
debug!(
"Requesting summary with max_tokens: {} (current usage: {} tokens)",
summary_max_tokens, context_window.used_tokens
);
// Build summary request
let summary_messages = build_summary_messages(context_window);
let provider = providers.get(None)?;
let disable_thinking = should_disable_thinking(config, provider.name(), summary_max_tokens);
debug!(
"Creating summary request: max_tokens={}, disable_thinking={}",
summary_max_tokens, disable_thinking
);
let summary_request = CompletionRequest {
messages: summary_messages,
max_tokens: Some(summary_max_tokens),
temperature: Some(provider_config::resolve_temperature(config, provider.name())),
stream: false,
tools: None,
disable_thinking,
};
// Execute summary request
match provider.complete(summary_request).await {
Ok(summary_response) => {
let chars_saved = context_window.reset_with_summary(
summary_response.content,
compaction_config.latest_user_msg,
);
Ok(CompactionResult::success(chars_saved))
}
Err(e) => {
error!("Failed to create summary: {}", e);
Ok(CompactionResult::failure(e.to_string()))
}
}
}
/// Apply the fallback sequence for summary requests to free up context space.
///
/// Sequence: thinnify -> skinnify -> hard-coded minimum
fn apply_summary_fallback_sequence<W: UiWriter>(
context_window: &mut ContextWindow,
config: &g3_config::Config,
provider_name: &str,
ui_writer: &W,
thinning_events: &mut Vec<usize>,
) -> u32 {
// Initial validation
let (mut max_tokens, needs_reduction) = provider_config::calculate_summary_max_tokens(
config,
provider_name,
context_window.total_tokens,
context_window.used_tokens,
);
if !needs_reduction {
return max_tokens;
}
ui_writer.print_context_status(
"⚠️ Context window too full for thinking budget. Applying fallback sequence...\n",
);
// Step 1: Try thinnify (first third of context)
ui_writer.print_context_status("🥒 Step 1: Trying thinnify...\n");
let (thin_msg, chars_saved) = context_window.thin_context(None);
thinning_events.push(chars_saved);
ui_writer.print_context_thinning(&thin_msg);
// Recalculate after thinnify
let (new_max, still_needs_reduction) = provider_config::calculate_summary_max_tokens(
config,
provider_name,
context_window.total_tokens,
context_window.used_tokens,
);
max_tokens = new_max;
if !still_needs_reduction {
ui_writer.print_context_status("✅ Thinnify resolved capacity issue. Continuing...\n");
return max_tokens;
}
// Step 2: Try skinnify (entire context)
ui_writer.print_context_status("🦴 Step 2: Trying skinnify...\n");
let (skinny_msg, chars_saved) = context_window.thin_context_all(None);
thinning_events.push(chars_saved);
ui_writer.print_context_thinning(&skinny_msg);
// Recalculate after skinnify
let (final_max, final_needs_reduction) = provider_config::calculate_summary_max_tokens(
config,
provider_name,
context_window.total_tokens,
context_window.used_tokens,
);
if !final_needs_reduction {
ui_writer.print_context_status("✅ Skinnify resolved capacity issue. Continuing...\n");
return final_max;
}
// Step 3: Nothing worked, use hard-coded minimum
const HARD_CODED_MINIMUM: u32 = 5000;
ui_writer.print_context_status(&format!(
"⚠️ Step 3: Context reduction insufficient. Using hard-coded max_tokens={} as last resort...\n",
HARD_CODED_MINIMUM
));
HARD_CODED_MINIMUM
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_calculate_capped_summary_tokens_anthropic() {
let config = g3_config::Config::default();
// Without thinking budget, should cap at 10_000
let result = calculate_capped_summary_tokens(&config, "anthropic", 20_000);
assert_eq!(result, 10_000);
}
#[test]
fn test_calculate_capped_summary_tokens_databricks() {
let config = g3_config::Config::default();
let result = calculate_capped_summary_tokens(&config, "databricks", 20_000);
assert_eq!(result, 10_000);
}
#[test]
fn test_calculate_capped_summary_tokens_embedded() {
let config = g3_config::Config::default();
let result = calculate_capped_summary_tokens(&config, "embedded", 20_000);
assert_eq!(result, 3000);
}
#[test]
fn test_calculate_capped_summary_tokens_minimum_floor() {
let config = g3_config::Config::default();
// Even with very low input, should return at least SUMMARY_MIN_TOKENS
let result = calculate_capped_summary_tokens(&config, "embedded", 100);
assert_eq!(result, SUMMARY_MIN_TOKENS);
}
#[test]
fn test_compaction_result_success() {
let result = CompactionResult::success(5000);
assert!(result.success);
assert_eq!(result.chars_saved, 5000);
assert!(result.error.is_none());
}
#[test]
fn test_compaction_result_failure() {
let result = CompactionResult::failure("test error".to_string());
assert!(!result.success);
assert_eq!(result.chars_saved, 0);
assert_eq!(result.error, Some("test error".to_string()));
}
}