//! Integration tests for context compaction //! //! These tests verify that compaction correctly preserves important messages //! when summarizing conversation history. use g3_core::ui_writer::NullUiWriter; use g3_core::Agent; use g3_providers::mock::{MockProvider, MockResponse}; use g3_providers::{Message, MessageRole, ProviderRegistry}; use tempfile::TempDir; /// Helper to create an agent with a mock provider async fn create_agent_with_mock(provider: MockProvider) -> (Agent, TempDir) { let temp_dir = TempDir::new().unwrap(); // Create a provider registry with the mock provider let mut registry = ProviderRegistry::new(); registry.register(provider); // Create a minimal config let config = g3_config::Config::default(); let agent = Agent::new_for_test(config, NullUiWriter, registry) .await .expect("Failed to create agent"); (agent, temp_dir) } /// Helper to find the last assistant message in history fn find_last_assistant_message(history: &[Message]) -> Option<&Message> { history .iter() .rev() .find(|m| matches!(m.role, MessageRole::Assistant)) } /// Helper to check if a message contains a substring fn message_contains(history: &[Message], role: MessageRole, substring: &str) -> bool { history.iter().any(|m| { std::mem::discriminant(&m.role) == std::mem::discriminant(&role) && m.content.contains(substring) }) } // ============================================================================= // Compaction Tests // ============================================================================= /// Test: Last assistant message is preserved after compaction /// /// This is the main feature test. After compaction: /// 1. System prompt is preserved /// 2. Summary is added as a USER message /// 3. Last assistant message is preserved as ASSISTANT message /// 4. Latest user message is preserved /// /// The order should be: /// [System] -> [Summary as User] -> [Last Assistant] -> [Latest User] #[tokio::test] async fn test_compaction_preserves_last_assistant_message() { // Create a provider that will: // 1. Respond to initial conversation // 2. Provide a summary when compaction is triggered let provider = MockProvider::new() // Response 1: Initial assistant response .with_response(MockResponse::text( "I understand you want to build a web server. Let me help you with that.", )) // Response 2: Second assistant response (this should be preserved after compaction) .with_response(MockResponse::text( "Here's the implementation plan:\n1. Create main.rs\n2. Add dependencies\n3. Implement routes", )) // Response 3: This will be the summary response during compaction .with_response(MockResponse::text( "Summary: User wants to build a web server. We discussed implementation plan with 3 steps.", )) // Response 4: Post-compaction response .with_response(MockResponse::text( "Continuing from where we left off...", )); let (mut agent, _temp_dir) = create_agent_with_mock(provider).await; // Build up conversation history agent .execute_task("I want to build a web server in Rust", None, false) .await .unwrap(); agent .execute_task("What's the implementation plan?", None, false) .await .unwrap(); // Verify the last assistant message before compaction let history_before = agent.get_context_window().conversation_history.clone(); let last_assistant_before = find_last_assistant_message(&history_before) .expect("Should have assistant message before compaction"); assert!( last_assistant_before.content.contains("implementation plan"), "Last assistant message should contain 'implementation plan', got: {}", last_assistant_before.content ); // Trigger manual compaction let compaction_result = agent.force_compact().await; assert!( compaction_result.is_ok(), "Compaction should succeed: {:?}", compaction_result.err() ); assert!( compaction_result.unwrap(), "Compaction should return true on success" ); // Verify the context after compaction let history_after = &agent.get_context_window().conversation_history; // Debug: Print the history after compaction eprintln!("\n=== History after compaction ==="); for (i, msg) in history_after.iter().enumerate() { eprintln!( " {}: {:?} - {}...", i, msg.role, msg.content.chars().take(80).collect::() ); } // 1. Should have a summary message as USER role assert!( message_contains(history_after, MessageRole::User, "Summary:"), "Should have summary message as User role after compaction" ); // 2. Should preserve the last assistant message as ASSISTANT role assert!( message_contains(history_after, MessageRole::Assistant, "implementation plan"), "Should preserve last assistant message with 'implementation plan' after compaction.\n\ History: {:?}", history_after .iter() .map(|m| format!("{:?}: {}...", m.role, m.content.chars().take(50).collect::())) .collect::>() ); // 3. Should preserve the latest user message assert!( message_contains(history_after, MessageRole::User, "implementation plan"), "Should preserve latest user message after compaction" ); } /// Test: Compaction with no assistant messages doesn't crash /// /// Edge case: If there are no assistant messages (e.g., fresh session), /// compaction should still work without errors. #[tokio::test] async fn test_compaction_no_assistant_message() { // Provider that returns a summary let provider = MockProvider::new() .with_response(MockResponse::text("Summary: Empty conversation.")); let (mut agent, _temp_dir) = create_agent_with_mock(provider).await; // Add a user message directly to context without getting a response. // This simulates a state where we have user input but no assistant response yet. use g3_providers::Message; agent.add_message_to_context(Message::new(MessageRole::User, "Hello".to_string())); // Trigger compaction - should not crash let result = agent.force_compact().await; assert!(result.is_ok(), "Compaction should succeed even with no assistant messages"); } /// Test: Compaction preserves tool-call-only assistant message /// /// Even if the last assistant message is just a tool call (no prose), /// it should still be preserved as it contains important context. #[tokio::test] async fn test_compaction_preserves_tool_call_only_message() { let provider = MockProvider::new() // Response 1: Text response .with_response(MockResponse::text("Let me check that file.")) // Response 2: Tool call response (this is the last assistant message) .with_response(MockResponse::text_with_json_tool( "", // No prose, just tool call "read_file", serde_json::json!({"file_path": "important.rs"}), )) // Response 3: Summary .with_response(MockResponse::text("Summary: User asked to check a file.")) // Response 4: Post-compaction .with_response(MockResponse::text("Continuing...")); let (mut agent, _temp_dir) = create_agent_with_mock(provider).await; // Build conversation agent.execute_task("Check the file", None, false).await.unwrap(); agent.execute_task("Read important.rs", None, false).await.unwrap(); // Get the last assistant message before compaction let history_before = agent.get_context_window().conversation_history.clone(); let last_assistant_before = find_last_assistant_message(&history_before); // Verify we have an assistant message (might contain tool call JSON) assert!( last_assistant_before.is_some(), "Should have an assistant message before compaction" ); // Trigger compaction let result = agent.force_compact().await; assert!(result.is_ok(), "Compaction should succeed"); // The assistant message should be preserved as a separate Assistant message let history_after = &agent.get_context_window().conversation_history; let has_assistant = history_after .iter() .any(|m| matches!(m.role, MessageRole::Assistant)); assert!( has_assistant, "Should have at least one assistant message after compaction (the preserved last one)" ); } /// Test: Compaction with multiple assistant messages preserves only the last one /// /// When there are multiple assistant messages, only the most recent one /// should be preserved (in addition to the summary). #[tokio::test] async fn test_compaction_preserves_only_last_assistant() { let provider = MockProvider::new() // Response 1: First assistant response .with_response(MockResponse::text("FIRST_RESPONSE: Hello!")) // Response 2: Second assistant response .with_response(MockResponse::text("SECOND_RESPONSE: How can I help?")) // Response 3: Third assistant response (this should be preserved) .with_response(MockResponse::text("THIRD_RESPONSE: Let me assist you.")) // Response 4: Summary .with_response(MockResponse::text("Summary: Greeted user three times.")) // Response 5: Post-compaction .with_response(MockResponse::text("Continuing...")); let (mut agent, _temp_dir) = create_agent_with_mock(provider).await; // Build conversation with multiple exchanges agent.execute_task("Hi", None, false).await.unwrap(); agent.execute_task("What can you do?", None, false).await.unwrap(); agent.execute_task("Help me", None, false).await.unwrap(); // Trigger compaction let result = agent.force_compact().await; assert!(result.is_ok(), "Compaction should succeed"); let history_after = &agent.get_context_window().conversation_history; // Debug output eprintln!("\n=== History after compaction ==="); for (i, msg) in history_after.iter().enumerate() { eprintln!( " {}: {:?} - {}", i, msg.role, msg.content.chars().take(100).collect::() ); } // Should have THIRD_RESPONSE (the last one) as an Assistant message assert!( message_contains(history_after, MessageRole::Assistant, "THIRD_RESPONSE"), "Should preserve the LAST assistant message (THIRD_RESPONSE)" ); // Should NOT have FIRST_RESPONSE or SECOND_RESPONSE as separate messages // (they might be mentioned in the summary, but not as standalone assistant messages) let assistant_messages: Vec<_> = history_after .iter() .filter(|m| matches!(m.role, MessageRole::Assistant)) .collect(); // Should have exactly one assistant message (the preserved last one) assert_eq!( assistant_messages.len(), 1, "Should have exactly one assistant message after compaction (the last one), got {}: {:?}", assistant_messages.len(), assistant_messages.iter().map(|m| &m.content).collect::>() ); } /// Test: Compaction without a trailing user message /// /// Edge case: The last message in history is from the assistant (no user follow-up). /// The assistant message should still be preserved. #[tokio::test] async fn test_compaction_no_trailing_user_message() { let provider = MockProvider::new() // Response 1: Assistant response (will be the last message) .with_response(MockResponse::text("LAST_ASSISTANT_MESSAGE: Here's your answer.")) // Response 2: Summary .with_response(MockResponse::text("Summary: Provided an answer.")); let (mut agent, _temp_dir) = create_agent_with_mock(provider).await; // Single exchange - assistant response is the last message agent.execute_task("What's the answer?", None, false).await.unwrap(); // Verify last message is from assistant let history_before = agent.get_context_window().conversation_history.clone(); let last_msg = history_before.last().expect("Should have messages"); assert!( matches!(last_msg.role, MessageRole::Assistant), "Last message should be from assistant before compaction" ); // Trigger compaction let result = agent.force_compact().await; assert!(result.is_ok(), "Compaction should succeed"); let history_after = &agent.get_context_window().conversation_history; // Should preserve the last assistant message assert!( message_contains(history_after, MessageRole::Assistant, "LAST_ASSISTANT_MESSAGE"), "Should preserve last assistant message even without trailing user message" ); } /// Test: Message order after compaction is correct /// /// The order should be: /// [System Prompt] -> [README if present] -> [Summary as User] -> [Last Assistant] -> [Latest User] #[tokio::test] async fn test_compaction_message_order() { let provider = MockProvider::new() .with_response(MockResponse::text("ASSISTANT_TO_PRESERVE: I'll help you.")) .with_response(MockResponse::text("SUMMARY_CONTENT: User asked for help.")); let (mut agent, _temp_dir) = create_agent_with_mock(provider).await; agent.execute_task("USER_MESSAGE_TO_PRESERVE: Help me", None, false).await.unwrap(); // Trigger compaction agent.force_compact().await.unwrap(); let history = &agent.get_context_window().conversation_history; // Debug output eprintln!("\n=== Message order after compaction ==="); for (i, msg) in history.iter().enumerate() { eprintln!( " {}: {:?} - {}", i, msg.role, msg.content.chars().take(60).collect::() ); } // Find indices of key messages let summary_idx = history .iter() .position(|m| m.content.contains("SUMMARY_CONTENT")) .expect("Should have summary"); // Summary should be a User message assert!( matches!(history[summary_idx].role, MessageRole::User), "Summary should be a User message, got {:?}", history[summary_idx].role ); let assistant_idx = history .iter() .position(|m| matches!(m.role, MessageRole::Assistant) && m.content.contains("ASSISTANT_TO_PRESERVE")) .expect("Should have preserved assistant message"); let user_idx = history .iter() .position(|m| matches!(m.role, MessageRole::User) && m.content.contains("USER_MESSAGE_TO_PRESERVE")); // Summary should come before assistant message assert!( summary_idx < assistant_idx, "Summary (idx {}) should come before assistant message (idx {})", summary_idx, assistant_idx ); // If there's a latest user message, it should come after assistant message if let Some(user_idx) = user_idx { assert!( assistant_idx < user_idx, "Assistant message (idx {}) should come before latest user message (idx {})", assistant_idx, user_idx ); } // The last message should be the latest user message let last_msg = history.last().expect("Should have messages"); assert!( matches!(last_msg.role, MessageRole::User), "Last message should be User (the latest user message), got {:?}", last_msg.role ); } /// Test: Second compaction doesn't bloat system messages /// /// After multiple compactions, we should always have the same clean structure: /// [System Prompt] -> [Summary as User] -> [Last Assistant] -> [Latest User] /// /// The summary should be replaced, not accumulated. #[tokio::test] async fn test_second_compaction_no_bloat() { let provider = MockProvider::new() // First conversation .with_response(MockResponse::text("FIRST_ASSISTANT: I'll help with task 1.")) .with_response(MockResponse::text("SECOND_ASSISTANT: Done with task 1.")) // First compaction summary .with_response(MockResponse::text("FIRST_SUMMARY: Completed task 1.")) // Second conversation (after first compaction) .with_response(MockResponse::text("THIRD_ASSISTANT: Starting task 2.")) .with_response(MockResponse::text("FOURTH_ASSISTANT: Done with task 2.")) // Second compaction summary .with_response(MockResponse::text("SECOND_SUMMARY: Completed tasks 1 and 2.")); let (mut agent, _temp_dir) = create_agent_with_mock(provider).await; // === First conversation === agent.execute_task("Start task 1", None, false).await.unwrap(); agent.execute_task("Finish task 1", None, false).await.unwrap(); // Count system messages before first compaction let system_count_before_first = agent .get_context_window() .conversation_history .iter() .filter(|m| matches!(m.role, MessageRole::System)) .count(); eprintln!("System messages before first compaction: {}", system_count_before_first); // === First compaction === agent.force_compact().await.unwrap(); let history_after_first = &agent.get_context_window().conversation_history; let system_count_after_first = history_after_first .iter() .filter(|m| matches!(m.role, MessageRole::System)) .count(); eprintln!("\n=== After FIRST compaction ==="); for (i, msg) in history_after_first.iter().enumerate() { eprintln!( " {}: {:?} - {}", i, msg.role, msg.content.chars().take(60).collect::() ); } eprintln!("System messages after first compaction: {}", system_count_after_first); // Verify first compaction structure assert!( message_contains(history_after_first, MessageRole::User, "FIRST_SUMMARY"), "Should have first summary as User message" ); assert!( message_contains(history_after_first, MessageRole::Assistant, "SECOND_ASSISTANT"), "Should have last assistant from first conversation" ); // === Second conversation (after first compaction) === agent.execute_task("Start task 2", None, false).await.unwrap(); agent.execute_task("Finish task 2", None, false).await.unwrap(); // === Second compaction === agent.force_compact().await.unwrap(); let history_after_second = &agent.get_context_window().conversation_history; let system_count_after_second = history_after_second .iter() .filter(|m| matches!(m.role, MessageRole::System)) .count(); eprintln!("\n=== After SECOND compaction ==="); for (i, msg) in history_after_second.iter().enumerate() { eprintln!( " {}: {:?} - {}", i, msg.role, msg.content.chars().take(60).collect::() ); } eprintln!("System messages after second compaction: {}", system_count_after_second); // === KEY ASSERTIONS === // 1. System message count should NOT increase after second compaction assert_eq!( system_count_after_first, system_count_after_second, "System message count should stay the same after second compaction (no bloat). First: {}, Second: {}", system_count_after_first, system_count_after_second ); // 2. Should have the NEW summary (SECOND_SUMMARY), not the old one assert!( message_contains(history_after_second, MessageRole::User, "SECOND_SUMMARY"), "Should have second summary as User message" ); // 3. Should NOT have the old summary anymore assert!( !message_contains(history_after_second, MessageRole::User, "FIRST_SUMMARY"), "Should NOT have first summary anymore - it should be replaced" ); // 4. Should have the last assistant from second conversation assert!( message_contains(history_after_second, MessageRole::Assistant, "FOURTH_ASSISTANT"), "Should have last assistant from second conversation" ); // 5. Should NOT have old assistant messages assert!( !message_contains(history_after_second, MessageRole::Assistant, "SECOND_ASSISTANT"), "Should NOT have assistant from first conversation" ); // 6. Verify the clean structure: System... -> User (summary) -> Assistant -> User let non_system_messages: Vec<_> = history_after_second .iter() .filter(|m| !matches!(m.role, MessageRole::System)) .collect(); assert!( non_system_messages.len() >= 3, "Should have at least 3 non-system messages (summary, assistant, user)" ); // First non-system should be User (summary) assert!( matches!(non_system_messages[0].role, MessageRole::User), "First non-system message should be User (summary), got {:?}", non_system_messages[0].role ); // Second non-system should be Assistant assert!( matches!(non_system_messages[1].role, MessageRole::Assistant), "Second non-system message should be Assistant, got {:?}", non_system_messages[1].role ); // Third non-system should be User (latest) assert!( matches!(non_system_messages[2].role, MessageRole::User), "Third non-system message should be User (latest), got {:?}", non_system_messages[2].role ); eprintln!("\nāœ… Second compaction maintains clean structure without bloat!"); } /// Test: Compaction strips structured tool_calls from preserved assistant message /// /// Reproduces the exact bug from the h3 session: /// 1. Agent executes a task that triggers a native tool call (read_file) /// 2. The assistant message is stored with structured `tool_calls` field /// 3. Compaction preserves the last assistant message /// 4. The tool_result message is summarized away /// 5. Next API call would fail with "tool_use ids were found without tool_result blocks" /// /// After the fix, compaction strips tool_calls from the preserved assistant message. #[tokio::test] async fn test_compaction_strips_structured_tool_calls() { use g3_providers::MessageToolCall; let provider = MockProvider::new() .with_native_tool_calling(true) // Response 1: Summary for compaction .with_response(MockResponse::text( "Summary: User asked to read a file. Assistant read test_file.txt which contained a greeting.", )) // Response 2: Post-compaction response (this would fail with 400 if tool_calls leaked) .with_response(MockResponse::text( "Continuing after compaction. What would you like to do next?", )); let (mut agent, _agent_temp) = create_agent_with_mock(provider).await; // Directly build the exact conversation state that triggers the bug: // The last assistant message has structured tool_calls, followed by a tool_result, // but the LAST message in the conversation is the assistant with tool_calls // (simulating the case where compaction happens mid-tool-execution or the // last assistant response was a tool call). // User asks to read a file agent.add_message_to_context(Message::new( MessageRole::User, "Please read the recognize.rs file".to_string(), )); // Assistant responds with text + structured tool_call (this will be the LAST assistant message) let mut assistant_with_tool = Message::new( MessageRole::Assistant, "You're right — the recognizer should serve the corpus. Let me research what it takes.".to_string(), ); assistant_with_tool.tool_calls.push(MessageToolCall { id: "toolu_01QRFL8vGKDjZZkfHR586Srb".to_string(), name: "read_file".to_string(), input: serde_json::json!({"file_path": "/tmp/recognize.rs"}), }); agent.add_message_to_context(assistant_with_tool); // Tool result follows let mut tool_result = Message::new( MessageRole::User, "Tool result: pub fn recognize(lexemes: &[Lexeme]) -> Result { ... }".to_string(), ); tool_result.tool_result_id = Some("toolu_01QRFL8vGKDjZZkfHR586Srb".to_string()); agent.add_message_to_context(tool_result); // Verify the pre-compaction state let history_before = agent.get_context_window().conversation_history.clone(); eprintln!("\n=== Before compaction ==="); for (i, msg) in history_before.iter().enumerate() { eprintln!( " {}: {:?} tool_calls={} tool_result_id={:?} content={}...", i, msg.role, msg.tool_calls.len(), msg.tool_result_id, msg.content.chars().take(60).collect::() ); } // Verify: last assistant message has tool_calls let last_assistant = history_before.iter().rev() .find(|m| matches!(m.role, MessageRole::Assistant)) .expect("Should have assistant message"); assert_eq!(last_assistant.tool_calls.len(), 1, "Last assistant should have 1 tool_call"); assert_eq!(last_assistant.tool_calls[0].id, "toolu_01QRFL8vGKDjZZkfHR586Srb"); // Trigger compaction let compact_result = agent.force_compact().await; assert!(compact_result.is_ok(), "Compaction should succeed: {:?}", compact_result.err()); // Verify: no assistant messages with tool_calls after compaction let history_after = &agent.get_context_window().conversation_history; eprintln!("\n=== After compaction ==="); for (i, msg) in history_after.iter().enumerate() { eprintln!( " {}: {:?} tool_calls={} tool_result_id={:?} content={}...", i, msg.role, msg.tool_calls.len(), msg.tool_result_id, msg.content.chars().take(60).collect::() ); } let orphaned_tool_calls: Vec<_> = history_after .iter() .enumerate() .filter(|(_, m)| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty()) .collect(); assert!( orphaned_tool_calls.is_empty(), "After compaction, no assistant messages should have tool_calls. Found {} orphaned: {:?}", orphaned_tool_calls.len(), orphaned_tool_calls.iter().map(|(i, m)| { format!("msg[{}]: {} tool_calls", i, m.tool_calls.len()) }).collect::>() ); // Verify the preserved assistant message has text content but no tool_calls let preserved_assistant = history_after.iter() .find(|m| matches!(m.role, MessageRole::Assistant)) .expect("Should have preserved assistant message after compaction"); assert!(preserved_assistant.tool_calls.is_empty(), "Preserved assistant message should have tool_calls stripped"); assert!(preserved_assistant.content.contains("recognizer should serve the corpus"), "Preserved assistant message should retain text content"); // Execute another task post-compaction to verify the conversation is valid // (this would fail with Anthropic 400 error if tool_calls leaked through) let post_compact_result = agent.execute_task("What should we do next?", None, false).await; assert!( post_compact_result.is_ok(), "Post-compaction task should succeed (no orphaned tool_use blocks): {:?}", post_compact_result.err() ); eprintln!("\nāœ… Compaction correctly strips structured tool_calls - no orphaned tool_use blocks!"); }