g3/crates/g3-core/tests/compaction_test.rs

//! Integration tests for context compaction
//!
//! These tests verify that compaction correctly preserves important messages
//! when summarizing conversation history.

use g3_core::ui_writer::NullUiWriter;
use g3_core::Agent;
use g3_providers::mock::{MockProvider, MockResponse};
use g3_providers::{Message, MessageRole, ProviderRegistry};
use tempfile::TempDir;

/// Helper to create an agent with a mock provider
async fn create_agent_with_mock(provider: MockProvider) -> (Agent<NullUiWriter>, TempDir) {
    let temp_dir = TempDir::new().unwrap();

    // Create a provider registry with the mock provider
    let mut registry = ProviderRegistry::new();
    registry.register(provider);

    // Create a minimal config
    let config = g3_config::Config::default();

    let agent = Agent::new_for_test(config, NullUiWriter, registry)
        .await
        .expect("Failed to create agent");

    (agent, temp_dir)
}

/// Helper to find the last assistant message in history
fn find_last_assistant_message(history: &[Message]) -> Option<&Message> {
    history
        .iter()
        .rev()
        .find(|m| matches!(m.role, MessageRole::Assistant))
}

/// Helper to check if a message contains a substring
fn message_contains(history: &[Message], role: MessageRole, substring: &str) -> bool {
    history.iter().any(|m| {
        std::mem::discriminant(&m.role) == std::mem::discriminant(&role)
            && m.content.contains(substring)
    })
}

// =============================================================================
// Compaction Tests
// =============================================================================

/// Test: Last assistant message is preserved after compaction
///
/// This is the main feature test. After compaction:
/// 1. System prompt is preserved
/// 2. Summary is added as a USER message
/// 3. Last assistant message is preserved as ASSISTANT message
/// 4. Latest user message is preserved
///
/// The order should be:
/// [System] -> [Summary as User] -> [Last Assistant] -> [Latest User]
#[tokio::test]
async fn test_compaction_preserves_last_assistant_message() {
    // Create a provider that will:
    // 1. Respond to initial conversation
    // 2. Provide a summary when compaction is triggered
    let provider = MockProvider::new()
        // Response 1: Initial assistant response
        .with_response(MockResponse::text(
            "I understand you want to build a web server. Let me help you with that.",
        ))
        // Response 2: Second assistant response (this should be preserved after compaction)
        .with_response(MockResponse::text(
            "Here's the implementation plan:\n1. Create main.rs\n2. Add dependencies\n3. Implement routes",
        ))
        // Response 3: This will be the summary response during compaction
        .with_response(MockResponse::text(
            "Summary: User wants to build a web server. We discussed implementation plan with 3 steps.",
        ))
        // Response 4: Post-compaction response
        .with_response(MockResponse::text(
            "Continuing from where we left off...",
        ));

    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;

    // Build up conversation history
    agent
        .execute_task("I want to build a web server in Rust", None, false)
        .await
        .unwrap();

    agent
        .execute_task("What's the implementation plan?", None, false)
        .await
        .unwrap();

    // Verify the last assistant message before compaction
    let history_before = agent.get_context_window().conversation_history.clone();
    let last_assistant_before = find_last_assistant_message(&history_before)
        .expect("Should have assistant message before compaction");
    assert!(
        last_assistant_before.content.contains("implementation plan"),
        "Last assistant message should contain 'implementation plan', got: {}",
        last_assistant_before.content
    );

    // Trigger manual compaction
    let compaction_result = agent.force_compact().await;
    assert!(
        compaction_result.is_ok(),
        "Compaction should succeed: {:?}",
        compaction_result.err()
    );
    assert!(
        compaction_result.unwrap(),
        "Compaction should return true on success"
    );

    // Verify the context after compaction
    let history_after = &agent.get_context_window().conversation_history;

    // Debug: Print the history after compaction
    eprintln!("\n=== History after compaction ===");
    for (i, msg) in history_after.iter().enumerate() {
        eprintln!(
            "  {}: {:?} - {}...",
            i,
            msg.role,
            msg.content.chars().take(80).collect::<String>()
        );
    }

    // 1. Should have a summary message as USER role
    assert!(
        message_contains(history_after, MessageRole::User, "Summary:"),
        "Should have summary message as User role after compaction"
    );

    // 2. Should preserve the last assistant message as ASSISTANT role
    assert!(
        message_contains(history_after, MessageRole::Assistant, "implementation plan"),
        "Should preserve last assistant message with 'implementation plan' after compaction.\n\
         History: {:?}",
        history_after
            .iter()
            .map(|m| format!("{:?}: {}...", m.role, m.content.chars().take(50).collect::<String>()))
            .collect::<Vec<_>>()
    );

    // 3. Should preserve the latest user message
    assert!(
        message_contains(history_after, MessageRole::User, "implementation plan"),
        "Should preserve latest user message after compaction"
    );
}

/// Test: Compaction with no assistant messages doesn't crash
///
/// Edge case: If there are no assistant messages (e.g., fresh session),
/// compaction should still work without errors.
#[tokio::test]
async fn test_compaction_no_assistant_message() {
    // Provider that returns a summary
    let provider = MockProvider::new()
        .with_response(MockResponse::text("Summary: Empty conversation."));

    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;

    // Add a user message directly to context without getting a response.
    // This simulates a state where we have user input but no assistant response yet.
    use g3_providers::Message;
    agent.add_message_to_context(Message::new(MessageRole::User, "Hello".to_string()));

    // Trigger compaction - should not crash
    let result = agent.force_compact().await;
    assert!(result.is_ok(), "Compaction should succeed even with no assistant messages");
}

/// Test: Compaction preserves tool-call-only assistant message
///
/// Even if the last assistant message is just a tool call (no prose),
/// it should still be preserved as it contains important context.
#[tokio::test]
async fn test_compaction_preserves_tool_call_only_message() {
    let provider = MockProvider::new()
        // Response 1: Text response
        .with_response(MockResponse::text("Let me check that file."))
        // Response 2: Tool call response (this is the last assistant message)
        .with_response(MockResponse::text_with_json_tool(
            "",  // No prose, just tool call
            "read_file",
            serde_json::json!({"file_path": "important.rs"}),
        ))
        // Response 3: Summary
        .with_response(MockResponse::text("Summary: User asked to check a file."))
        // Response 4: Post-compaction
        .with_response(MockResponse::text("Continuing..."));

    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;

    // Build conversation
    agent.execute_task("Check the file", None, false).await.unwrap();
    agent.execute_task("Read important.rs", None, false).await.unwrap();

    // Get the last assistant message before compaction
    let history_before = agent.get_context_window().conversation_history.clone();
    let last_assistant_before = find_last_assistant_message(&history_before);

    // Verify we have an assistant message (might contain tool call JSON)
    assert!(
        last_assistant_before.is_some(),
        "Should have an assistant message before compaction"
    );

    // Trigger compaction
    let result = agent.force_compact().await;
    assert!(result.is_ok(), "Compaction should succeed");

    // The assistant message should be preserved as a separate Assistant message
    let history_after = &agent.get_context_window().conversation_history;
    let has_assistant = history_after
        .iter()
        .any(|m| matches!(m.role, MessageRole::Assistant));

    assert!(
        has_assistant,
        "Should have at least one assistant message after compaction (the preserved last one)"
    );
}

/// Test: Compaction with multiple assistant messages preserves only the last one
///
/// When there are multiple assistant messages, only the most recent one
/// should be preserved (in addition to the summary).
#[tokio::test]
async fn test_compaction_preserves_only_last_assistant() {
    let provider = MockProvider::new()
        // Response 1: First assistant response
        .with_response(MockResponse::text("FIRST_RESPONSE: Hello!"))
        // Response 2: Second assistant response
        .with_response(MockResponse::text("SECOND_RESPONSE: How can I help?"))
        // Response 3: Third assistant response (this should be preserved)
        .with_response(MockResponse::text("THIRD_RESPONSE: Let me assist you."))
        // Response 4: Summary
        .with_response(MockResponse::text("Summary: Greeted user three times."))
        // Response 5: Post-compaction
        .with_response(MockResponse::text("Continuing..."));

    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;

    // Build conversation with multiple exchanges
    agent.execute_task("Hi", None, false).await.unwrap();
    agent.execute_task("What can you do?", None, false).await.unwrap();
    agent.execute_task("Help me", None, false).await.unwrap();

    // Trigger compaction
    let result = agent.force_compact().await;
    assert!(result.is_ok(), "Compaction should succeed");

    let history_after = &agent.get_context_window().conversation_history;

    // Debug output
    eprintln!("\n=== History after compaction ===");
    for (i, msg) in history_after.iter().enumerate() {
        eprintln!(
            "  {}: {:?} - {}",
            i,
            msg.role,
            msg.content.chars().take(100).collect::<String>()
        );
    }

    // Should have THIRD_RESPONSE (the last one) as an Assistant message
    assert!(
        message_contains(history_after, MessageRole::Assistant, "THIRD_RESPONSE"),
        "Should preserve the LAST assistant message (THIRD_RESPONSE)"
    );

    // Should NOT have FIRST_RESPONSE or SECOND_RESPONSE as separate messages
    // (they might be mentioned in the summary, but not as standalone assistant messages)
    let assistant_messages: Vec<_> = history_after
        .iter()
        .filter(|m| matches!(m.role, MessageRole::Assistant))
        .collect();

    // Should have exactly one assistant message (the preserved last one)
    assert_eq!(
        assistant_messages.len(),
        1,
        "Should have exactly one assistant message after compaction (the last one), got {}: {:?}",
        assistant_messages.len(),
        assistant_messages.iter().map(|m| &m.content).collect::<Vec<_>>()
    );
}

/// Test: Compaction without a trailing user message
///
/// Edge case: The last message in history is from the assistant (no user follow-up).
/// The assistant message should still be preserved.
#[tokio::test]
async fn test_compaction_no_trailing_user_message() {
    let provider = MockProvider::new()
        // Response 1: Assistant response (will be the last message)
        .with_response(MockResponse::text("LAST_ASSISTANT_MESSAGE: Here's your answer."))
        // Response 2: Summary
        .with_response(MockResponse::text("Summary: Provided an answer."));

    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;

    // Single exchange - assistant response is the last message
    agent.execute_task("What's the answer?", None, false).await.unwrap();

    // Verify last message is from assistant
    let history_before = agent.get_context_window().conversation_history.clone();
    let last_msg = history_before.last().expect("Should have messages");
    assert!(
        matches!(last_msg.role, MessageRole::Assistant),
        "Last message should be from assistant before compaction"
    );

    // Trigger compaction
    let result = agent.force_compact().await;
    assert!(result.is_ok(), "Compaction should succeed");

    let history_after = &agent.get_context_window().conversation_history;

    // Should preserve the last assistant message
    assert!(
        message_contains(history_after, MessageRole::Assistant, "LAST_ASSISTANT_MESSAGE"),
        "Should preserve last assistant message even without trailing user message"
    );
}

/// Test: Message order after compaction is correct
///
/// The order should be:
/// [System Prompt] -> [README if present] -> [Summary as User] -> [Last Assistant] -> [Latest User]
#[tokio::test]
async fn test_compaction_message_order() {
    let provider = MockProvider::new()
        .with_response(MockResponse::text("ASSISTANT_TO_PRESERVE: I'll help you."))
        .with_response(MockResponse::text("SUMMARY_CONTENT: User asked for help."));

    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;

    agent.execute_task("USER_MESSAGE_TO_PRESERVE: Help me", None, false).await.unwrap();

    // Trigger compaction
    agent.force_compact().await.unwrap();

    let history = &agent.get_context_window().conversation_history;

    // Debug output
    eprintln!("\n=== Message order after compaction ===");
    for (i, msg) in history.iter().enumerate() {
        eprintln!(
            "  {}: {:?} - {}",
            i,
            msg.role,
            msg.content.chars().take(60).collect::<String>()
        );
    }

    // Find indices of key messages
    let summary_idx = history
        .iter()
        .position(|m| m.content.contains("SUMMARY_CONTENT"))
        .expect("Should have summary");

    // Summary should be a User message
    assert!(
        matches!(history[summary_idx].role, MessageRole::User),
        "Summary should be a User message, got {:?}",
        history[summary_idx].role
    );

    let assistant_idx = history
        .iter()
        .position(|m| matches!(m.role, MessageRole::Assistant) && m.content.contains("ASSISTANT_TO_PRESERVE"))
        .expect("Should have preserved assistant message");

    let user_idx = history
        .iter()
        .position(|m| matches!(m.role, MessageRole::User) && m.content.contains("USER_MESSAGE_TO_PRESERVE"));

    // Summary should come before assistant message
    assert!(
        summary_idx < assistant_idx,
        "Summary (idx {}) should come before assistant message (idx {})",
        summary_idx,
        assistant_idx
    );

    // If there's a latest user message, it should come after assistant message
    if let Some(user_idx) = user_idx {
        assert!(
            assistant_idx < user_idx,
            "Assistant message (idx {}) should come before latest user message (idx {})",
            assistant_idx,
            user_idx
        );
    }

    // The last message should be the latest user message
    let last_msg = history.last().expect("Should have messages");
    assert!(
        matches!(last_msg.role, MessageRole::User),
        "Last message should be User (the latest user message), got {:?}",
        last_msg.role
    );
}

/// Test: Second compaction doesn't bloat system messages
///
/// After multiple compactions, we should always have the same clean structure:
/// [System Prompt] -> [Summary as User] -> [Last Assistant] -> [Latest User]
///
/// The summary should be replaced, not accumulated.
#[tokio::test]
async fn test_second_compaction_no_bloat() {
    let provider = MockProvider::new()
        // First conversation
        .with_response(MockResponse::text("FIRST_ASSISTANT: I'll help with task 1."))
        .with_response(MockResponse::text("SECOND_ASSISTANT: Done with task 1."))
        // First compaction summary
        .with_response(MockResponse::text("FIRST_SUMMARY: Completed task 1."))
        // Second conversation (after first compaction)
        .with_response(MockResponse::text("THIRD_ASSISTANT: Starting task 2."))
        .with_response(MockResponse::text("FOURTH_ASSISTANT: Done with task 2."))
        // Second compaction summary
        .with_response(MockResponse::text("SECOND_SUMMARY: Completed tasks 1 and 2."));

    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;

    // === First conversation ===
    agent.execute_task("Start task 1", None, false).await.unwrap();
    agent.execute_task("Finish task 1", None, false).await.unwrap();

    // Count system messages before first compaction
    let system_count_before_first = agent
        .get_context_window()
        .conversation_history
        .iter()
        .filter(|m| matches!(m.role, MessageRole::System))
        .count();
    eprintln!("System messages before first compaction: {}", system_count_before_first);

    // === First compaction ===
    agent.force_compact().await.unwrap();

    let history_after_first = &agent.get_context_window().conversation_history;
    let system_count_after_first = history_after_first
        .iter()
        .filter(|m| matches!(m.role, MessageRole::System))
        .count();

    eprintln!("\n=== After FIRST compaction ===");
    for (i, msg) in history_after_first.iter().enumerate() {
        eprintln!(
            "  {}: {:?} - {}",
            i,
            msg.role,
            msg.content.chars().take(60).collect::<String>()
        );
    }
    eprintln!("System messages after first compaction: {}", system_count_after_first);

    // Verify first compaction structure
    assert!(
        message_contains(history_after_first, MessageRole::User, "FIRST_SUMMARY"),
        "Should have first summary as User message"
    );
    assert!(
        message_contains(history_after_first, MessageRole::Assistant, "SECOND_ASSISTANT"),
        "Should have last assistant from first conversation"
    );

    // === Second conversation (after first compaction) ===
    agent.execute_task("Start task 2", None, false).await.unwrap();
    agent.execute_task("Finish task 2", None, false).await.unwrap();

    // === Second compaction ===
    agent.force_compact().await.unwrap();

    let history_after_second = &agent.get_context_window().conversation_history;
    let system_count_after_second = history_after_second
        .iter()
        .filter(|m| matches!(m.role, MessageRole::System))
        .count();

    eprintln!("\n=== After SECOND compaction ===");
    for (i, msg) in history_after_second.iter().enumerate() {
        eprintln!(
            "  {}: {:?} - {}",
            i,
            msg.role,
            msg.content.chars().take(60).collect::<String>()
        );
    }
    eprintln!("System messages after second compaction: {}", system_count_after_second);

    // === KEY ASSERTIONS ===

    // 1. System message count should NOT increase after second compaction
    assert_eq!(
        system_count_after_first, system_count_after_second,
        "System message count should stay the same after second compaction (no bloat). First: {}, Second: {}",
        system_count_after_first, system_count_after_second
    );

    // 2. Should have the NEW summary (SECOND_SUMMARY), not the old one
    assert!(
        message_contains(history_after_second, MessageRole::User, "SECOND_SUMMARY"),
        "Should have second summary as User message"
    );

    // 3. Should NOT have the old summary anymore
    assert!(
        !message_contains(history_after_second, MessageRole::User, "FIRST_SUMMARY"),
        "Should NOT have first summary anymore - it should be replaced"
    );

    // 4. Should have the last assistant from second conversation
    assert!(
        message_contains(history_after_second, MessageRole::Assistant, "FOURTH_ASSISTANT"),
        "Should have last assistant from second conversation"
    );

    // 5. Should NOT have old assistant messages
    assert!(
        !message_contains(history_after_second, MessageRole::Assistant, "SECOND_ASSISTANT"),
        "Should NOT have assistant from first conversation"
    );

    // 6. Verify the clean structure: System... -> User (summary) -> Assistant -> User
    let non_system_messages: Vec<_> = history_after_second
        .iter()
        .filter(|m| !matches!(m.role, MessageRole::System))
        .collect();

    assert!(
        non_system_messages.len() >= 3,
        "Should have at least 3 non-system messages (summary, assistant, user)"
    );

    // First non-system should be User (summary)
    assert!(
        matches!(non_system_messages[0].role, MessageRole::User),
        "First non-system message should be User (summary), got {:?}",
        non_system_messages[0].role
    );

    // Second non-system should be Assistant
    assert!(
        matches!(non_system_messages[1].role, MessageRole::Assistant),
        "Second non-system message should be Assistant, got {:?}",
        non_system_messages[1].role
    );

    // Third non-system should be User (latest)
    assert!(
        matches!(non_system_messages[2].role, MessageRole::User),
        "Third non-system message should be User (latest), got {:?}",
        non_system_messages[2].role
    );

    eprintln!("\n✅ Second compaction maintains clean structure without bloat!");
}

/// Test: Compaction strips structured tool_calls from preserved assistant message
///
/// Reproduces the exact bug from the h3 session:
/// 1. Agent executes a task that triggers a native tool call (read_file)
/// 2. The assistant message is stored with structured `tool_calls` field
/// 3. Compaction preserves the last assistant message
/// 4. The tool_result message is summarized away
/// 5. Next API call would fail with "tool_use ids were found without tool_result blocks"
///
/// After the fix, compaction strips tool_calls from the preserved assistant message.
#[tokio::test]
async fn test_compaction_strips_structured_tool_calls() {
    use g3_providers::MessageToolCall;

    let provider = MockProvider::new()
        .with_native_tool_calling(true)
        // Response 1: Summary for compaction
        .with_response(MockResponse::text(
            "Summary: User asked to read a file. Assistant read test_file.txt which contained a greeting.",
        ))
        // Response 2: Post-compaction response (this would fail with 400 if tool_calls leaked)
        .with_response(MockResponse::text(
            "Continuing after compaction. What would you like to do next?",
        ));

    let (mut agent, _agent_temp) = create_agent_with_mock(provider).await;

    // Directly build the exact conversation state that triggers the bug:
    // The last assistant message has structured tool_calls, followed by a tool_result,
    // but the LAST message in the conversation is the assistant with tool_calls
    // (simulating the case where compaction happens mid-tool-execution or the
    // last assistant response was a tool call).

    // User asks to read a file
    agent.add_message_to_context(Message::new(
        MessageRole::User,
        "Please read the recognize.rs file".to_string(),
    ));

    // Assistant responds with text + structured tool_call (this will be the LAST assistant message)
    let mut assistant_with_tool = Message::new(
        MessageRole::Assistant,
        "You're right — the recognizer should serve the corpus. Let me research what it takes.".to_string(),
    );
    assistant_with_tool.tool_calls.push(MessageToolCall {
        id: "toolu_01QRFL8vGKDjZZkfHR586Srb".to_string(),
        name: "read_file".to_string(),
        input: serde_json::json!({"file_path": "/tmp/recognize.rs"}),
    });
    agent.add_message_to_context(assistant_with_tool);

    // Tool result follows
    let mut tool_result = Message::new(
        MessageRole::User,
        "Tool result: pub fn recognize(lexemes: &[Lexeme]) -> Result<RecognizedStream> { ... }".to_string(),
    );
    tool_result.tool_result_id = Some("toolu_01QRFL8vGKDjZZkfHR586Srb".to_string());
    agent.add_message_to_context(tool_result);

    // Verify the pre-compaction state
    let history_before = agent.get_context_window().conversation_history.clone();
    eprintln!("\n=== Before compaction ===");
    for (i, msg) in history_before.iter().enumerate() {
        eprintln!(
            "  {}: {:?} tool_calls={} tool_result_id={:?} content={}...",
            i,
            msg.role,
            msg.tool_calls.len(),
            msg.tool_result_id,
            msg.content.chars().take(60).collect::<String>()
        );
    }

    // Verify: last assistant message has tool_calls
    let last_assistant = history_before.iter().rev()
        .find(|m| matches!(m.role, MessageRole::Assistant))
        .expect("Should have assistant message");
    assert_eq!(last_assistant.tool_calls.len(), 1, "Last assistant should have 1 tool_call");
    assert_eq!(last_assistant.tool_calls[0].id, "toolu_01QRFL8vGKDjZZkfHR586Srb");

    // Trigger compaction
    let compact_result = agent.force_compact().await;
    assert!(compact_result.is_ok(), "Compaction should succeed: {:?}", compact_result.err());

    // Verify: no assistant messages with tool_calls after compaction
    let history_after = &agent.get_context_window().conversation_history;

    eprintln!("\n=== After compaction ===");
    for (i, msg) in history_after.iter().enumerate() {
        eprintln!(
            "  {}: {:?} tool_calls={} tool_result_id={:?} content={}...",
            i,
            msg.role,
            msg.tool_calls.len(),
            msg.tool_result_id,
            msg.content.chars().take(60).collect::<String>()
        );
    }

    let orphaned_tool_calls: Vec<_> = history_after
        .iter()
        .enumerate()
        .filter(|(_, m)| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty())
        .collect();

    assert!(
        orphaned_tool_calls.is_empty(),
        "After compaction, no assistant messages should have tool_calls. Found {} orphaned: {:?}",
        orphaned_tool_calls.len(),
        orphaned_tool_calls.iter().map(|(i, m)| {
            format!("msg[{}]: {} tool_calls", i, m.tool_calls.len())
        }).collect::<Vec<_>>()
    );

    // Verify the preserved assistant message has text content but no tool_calls
    let preserved_assistant = history_after.iter()
        .find(|m| matches!(m.role, MessageRole::Assistant))
        .expect("Should have preserved assistant message after compaction");
    assert!(preserved_assistant.tool_calls.is_empty(),
        "Preserved assistant message should have tool_calls stripped");
    assert!(preserved_assistant.content.contains("recognizer should serve the corpus"),
        "Preserved assistant message should retain text content");

    // Execute another task post-compaction to verify the conversation is valid
    // (this would fail with Anthropic 400 error if tool_calls leaked through)
    let post_compact_result = agent.execute_task("What should we do next?", None, false).await;
    assert!(
        post_compact_result.is_ok(),
        "Post-compaction task should succeed (no orphaned tool_use blocks): {:?}",
        post_compact_result.err()
    );

    eprintln!("\n✅ Compaction correctly strips structured tool_calls - no orphaned tool_use blocks!");
}