diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs
index e9fa0fd..6331507 100644
--- a/crates/g3-cli/src/lib.rs
+++ b/crates/g3-cli/src/lib.rs
@@ -562,20 +562,27 @@ async fn run_agent_mode(
     let system_prompt = get_agent_system_prompt(&agent_prompt, true);
     
     // Load AGENTS.md, README, and memory - same as normal mode
-    let agents_content = read_agents_config(&workspace_dir);
-    let readme_content = read_project_readme(&workspace_dir);
-    let memory_content = read_project_memory(&workspace_dir);
+    let agents_content_opt = read_agents_config(&workspace_dir);
+    let readme_content_opt = read_project_readme(&workspace_dir);
+    let memory_content_opt = read_project_memory(&workspace_dir);
+    
+    // Show what was loaded
+    let readme_status = if readme_content_opt.is_some() { "✓" } else { "·" };
+    let agents_status = if agents_content_opt.is_some() { "✓" } else { "·" };
+    let memory_status = if memory_content_opt.is_some() { "✓" } else { "·" };
+    output.print(&format!("   {} README | {} AGENTS.md | {} Memory",
+        readme_status, agents_status, memory_status));
     
     // Combine all content for the agent's context
     let combined_content = {
         let mut parts = Vec::new();
-        if let Some(agents) = agents_content {
+        if let Some(agents) = agents_content_opt {
             parts.push(agents);
         }
-        if let Some(readme) = readme_content {
+        if let Some(readme) = readme_content_opt {
             parts.push(readme);
         }
-        if let Some(memory) = memory_content {
+        if let Some(memory) = memory_content_opt {
             parts.push(memory);
         }
         if parts.is_empty() {
diff --git a/crates/g3-core/src/compaction.rs b/crates/g3-core/src/compaction.rs
new file mode 100644
index 0000000..a998b8c
--- /dev/null
+++ b/crates/g3-core/src/compaction.rs
@@ -0,0 +1,328 @@
+//! Context compaction logic.
+//!
+//! This module provides unified compaction functionality used by both
+//! manual compaction (`/compact` command) and automatic compaction
+//! (when context window reaches capacity during streaming).
+
+use anyhow::Result;
+use g3_providers::{CompletionRequest, Message, MessageRole, ProviderRegistry};
+use tracing::{debug, error, warn};
+
+use crate::context_window::ContextWindow;
+use crate::provider_config;
+use crate::ui_writer::UiWriter;
+
+/// Minimum tokens for summary requests to avoid API errors when context is nearly full.
+pub const SUMMARY_MIN_TOKENS: u32 = 1000;
+
+/// Result of a compaction operation.
+#[derive(Debug)]
+pub struct CompactionResult {
+    /// Whether compaction succeeded
+    pub success: bool,
+    /// Characters saved by compaction (if successful)
+    pub chars_saved: usize,
+    /// Error message (if failed)
+    pub error: Option<String>,
+}
+
+impl CompactionResult {
+    pub fn success(chars_saved: usize) -> Self {
+        Self {
+            success: true,
+            chars_saved,
+            error: None,
+        }
+    }
+
+    pub fn failure(error: String) -> Self {
+        Self {
+            success: false,
+            chars_saved: 0,
+            error: Some(error),
+        }
+    }
+}
+
+/// Configuration for a compaction operation.
+#[derive(Debug, Clone)]
+pub struct CompactionConfig<'a> {
+    /// Provider name (e.g., "anthropic", "openai")
+    pub provider_name: &'a str,
+    /// Latest user message to preserve after compaction
+    pub latest_user_msg: Option<String>,
+}
+
+/// Calculate the summary max_tokens with provider-specific caps applied.
+///
+/// This is the canonical implementation - both manual and auto compaction use this.
+pub fn calculate_capped_summary_tokens(
+    config: &g3_config::Config,
+    provider_name: &str,
+    base_max_tokens: u32,
+) -> u32 {
+    // Apply provider-specific caps
+    // For Anthropic with thinking enabled, we need max_tokens > thinking.budget_tokens
+    // So we set a higher cap when thinking is configured
+    let anthropic_cap = match provider_config::get_thinking_budget_tokens(config, provider_name) {
+        Some(budget) => (budget + 2000).max(10_000), // At least budget + 2000 for response
+        None => 10_000,
+    };
+    
+    let capped = match provider_name {
+        name if name.starts_with("anthropic") => base_max_tokens.min(anthropic_cap),
+        name if name.starts_with("databricks") => base_max_tokens.min(10_000),
+        name if name.starts_with("embedded") => base_max_tokens.min(3000),
+        _ => base_max_tokens.min(5000),
+    };
+    
+    // Ensure minimum floor as defense-in-depth
+    capped.max(SUMMARY_MIN_TOKENS)
+}
+
+/// Check if thinking mode should be disabled for a summary request.
+///
+/// Anthropic requires: max_tokens > thinking.budget_tokens + 1024
+pub fn should_disable_thinking(
+    config: &g3_config::Config,
+    provider_name: &str,
+    summary_max_tokens: u32,
+) -> bool {
+    provider_config::get_thinking_budget_tokens(config, provider_name).map_or(false, |budget| {
+        let minimum_for_thinking = budget + 1024;
+        let should_disable = summary_max_tokens <= minimum_for_thinking;
+        if should_disable {
+            warn!(
+                "Disabling thinking mode for summary: max_tokens ({}) <= minimum_for_thinking ({})",
+                summary_max_tokens, minimum_for_thinking
+            );
+        }
+        should_disable
+    })
+}
+
+/// Build the summary request messages from conversation history.
+pub fn build_summary_messages(context_window: &ContextWindow) -> Vec<Message> {
+    let summary_prompt = context_window.create_summary_prompt();
+    
+    let conversation_text = context_window
+        .conversation_history
+        .iter()
+        .map(|m| format!("{:?}: {}", m.role, m.content))
+        .collect::<Vec<_>>()
+        .join("\n\n");
+    
+    vec![
+        Message::new(
+            MessageRole::System,
+            "You are a helpful assistant that creates concise summaries.".to_string(),
+        ),
+        Message::new(
+            MessageRole::User,
+            format!(
+                "Based on this conversation history, {}\n\nConversation:\n{}",
+                summary_prompt, conversation_text
+            ),
+        ),
+    ]
+}
+
+/// Perform context compaction by summarizing conversation history.
+///
+/// This is the unified implementation used by both:
+/// - `force_compact()` - manual compaction via `/compact` command
+/// - `stream_completion_with_tools()` - automatic compaction when context is full
+///
+/// # Arguments
+/// * `providers` - Provider registry to get the LLM provider
+/// * `context_window` - Context window to compact
+/// * `config` - Application config for provider settings
+/// * `compaction_config` - Configuration for this compaction operation
+/// * `ui_writer` - UI writer for status messages
+///
+/// # Returns
+/// `CompactionResult` indicating success/failure and chars saved
+pub async fn perform_compaction<W: UiWriter>(
+    providers: &ProviderRegistry,
+    context_window: &mut ContextWindow,
+    config: &g3_config::Config,
+    compaction_config: CompactionConfig<'_>,
+    ui_writer: &W,
+    thinning_events: &mut Vec<usize>,
+) -> Result<CompactionResult> {
+    let provider_name = compaction_config.provider_name;
+    
+    // Apply fallback sequence: thinnify -> skinnify -> hard-coded 5000
+    let base_max_tokens = apply_summary_fallback_sequence(
+        context_window,
+        config,
+        provider_name,
+        ui_writer,
+        thinning_events,
+    );
+    
+    let summary_max_tokens = calculate_capped_summary_tokens(config, provider_name, base_max_tokens);
+    
+    debug!(
+        "Requesting summary with max_tokens: {} (current usage: {} tokens)",
+        summary_max_tokens, context_window.used_tokens
+    );
+    
+    // Build summary request
+    let summary_messages = build_summary_messages(context_window);
+    let provider = providers.get(None)?;
+    
+    let disable_thinking = should_disable_thinking(config, provider.name(), summary_max_tokens);
+    
+    debug!(
+        "Creating summary request: max_tokens={}, disable_thinking={}",
+        summary_max_tokens, disable_thinking
+    );
+    
+    let summary_request = CompletionRequest {
+        messages: summary_messages,
+        max_tokens: Some(summary_max_tokens),
+        temperature: Some(provider_config::resolve_temperature(config, provider.name())),
+        stream: false,
+        tools: None,
+        disable_thinking,
+    };
+    
+    // Execute summary request
+    match provider.complete(summary_request).await {
+        Ok(summary_response) => {
+            let chars_saved = context_window.reset_with_summary(
+                summary_response.content,
+                compaction_config.latest_user_msg,
+            );
+            Ok(CompactionResult::success(chars_saved))
+        }
+        Err(e) => {
+            error!("Failed to create summary: {}", e);
+            Ok(CompactionResult::failure(e.to_string()))
+        }
+    }
+}
+
+/// Apply the fallback sequence for summary requests to free up context space.
+///
+/// Sequence: thinnify -> skinnify -> hard-coded minimum
+fn apply_summary_fallback_sequence<W: UiWriter>(
+    context_window: &mut ContextWindow,
+    config: &g3_config::Config,
+    provider_name: &str,
+    ui_writer: &W,
+    thinning_events: &mut Vec<usize>,
+) -> u32 {
+    // Initial validation
+    let (mut max_tokens, needs_reduction) = provider_config::calculate_summary_max_tokens(
+        config,
+        provider_name,
+        context_window.total_tokens,
+        context_window.used_tokens,
+    );
+    
+    if !needs_reduction {
+        return max_tokens;
+    }
+    
+    ui_writer.print_context_status(
+        "⚠️ Context window too full for thinking budget. Applying fallback sequence...\n",
+    );
+    
+    // Step 1: Try thinnify (first third of context)
+    ui_writer.print_context_status("🥒 Step 1: Trying thinnify...\n");
+    let (thin_msg, chars_saved) = context_window.thin_context(None);
+    thinning_events.push(chars_saved);
+    ui_writer.print_context_thinning(&thin_msg);
+    
+    // Recalculate after thinnify
+    let (new_max, still_needs_reduction) = provider_config::calculate_summary_max_tokens(
+        config,
+        provider_name,
+        context_window.total_tokens,
+        context_window.used_tokens,
+    );
+    max_tokens = new_max;
+    if !still_needs_reduction {
+        ui_writer.print_context_status("✅ Thinnify resolved capacity issue. Continuing...\n");
+        return max_tokens;
+    }
+    
+    // Step 2: Try skinnify (entire context)
+    ui_writer.print_context_status("🦴 Step 2: Trying skinnify...\n");
+    let (skinny_msg, chars_saved) = context_window.thin_context_all(None);
+    thinning_events.push(chars_saved);
+    ui_writer.print_context_thinning(&skinny_msg);
+    
+    // Recalculate after skinnify
+    let (final_max, final_needs_reduction) = provider_config::calculate_summary_max_tokens(
+        config,
+        provider_name,
+        context_window.total_tokens,
+        context_window.used_tokens,
+    );
+    if !final_needs_reduction {
+        ui_writer.print_context_status("✅ Skinnify resolved capacity issue. Continuing...\n");
+        return final_max;
+    }
+    
+    // Step 3: Nothing worked, use hard-coded minimum
+    const HARD_CODED_MINIMUM: u32 = 5000;
+    ui_writer.print_context_status(&format!(
+        "⚠️ Step 3: Context reduction insufficient. Using hard-coded max_tokens={} as last resort...\n",
+        HARD_CODED_MINIMUM
+    ));
+    HARD_CODED_MINIMUM
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    
+    #[test]
+    fn test_calculate_capped_summary_tokens_anthropic() {
+        let config = g3_config::Config::default();
+        // Without thinking budget, should cap at 10_000
+        let result = calculate_capped_summary_tokens(&config, "anthropic", 20_000);
+        assert_eq!(result, 10_000);
+    }
+    
+    #[test]
+    fn test_calculate_capped_summary_tokens_databricks() {
+        let config = g3_config::Config::default();
+        let result = calculate_capped_summary_tokens(&config, "databricks", 20_000);
+        assert_eq!(result, 10_000);
+    }
+    
+    #[test]
+    fn test_calculate_capped_summary_tokens_embedded() {
+        let config = g3_config::Config::default();
+        let result = calculate_capped_summary_tokens(&config, "embedded", 20_000);
+        assert_eq!(result, 3000);
+    }
+    
+    #[test]
+    fn test_calculate_capped_summary_tokens_minimum_floor() {
+        let config = g3_config::Config::default();
+        // Even with very low input, should return at least SUMMARY_MIN_TOKENS
+        let result = calculate_capped_summary_tokens(&config, "embedded", 100);
+        assert_eq!(result, SUMMARY_MIN_TOKENS);
+    }
+    
+    #[test]
+    fn test_compaction_result_success() {
+        let result = CompactionResult::success(5000);
+        assert!(result.success);
+        assert_eq!(result.chars_saved, 5000);
+        assert!(result.error.is_none());
+    }
+    
+    #[test]
+    fn test_compaction_result_failure() {
+        let result = CompactionResult::failure("test error".to_string());
+        assert!(!result.success);
+        assert_eq!(result.chars_saved, 0);
+        assert_eq!(result.error, Some("test error".to_string()));
+    }
+}
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index 428e03d..8aa97b6 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -1,5 +1,6 @@
 pub mod context_window;
 pub mod background_process;
+pub mod compaction;
 pub mod code_search;
 pub mod error_handling;
 pub mod feedback_extraction;