From 6a5ce11e7b02b74efc6b94e5eb0819af0e7e1cb1 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Wed, 21 Jan 2026 10:27:07 +0530 Subject: [PATCH] Consolidate redundant assistant message test files Deleted 4 redundant test files (~956 lines): - assistant_message_dedup_test.rs (416 lines, 12 tests) - consecutive_assistant_message_test.rs (248 lines, 6 tests) - missing_assistant_message_test.rs (100 lines, 4 tests) - early_return_path_test.rs (192 lines, 5 tests) - whitebox test Created consolidated assistant_message_test.rs (369 lines, 14 tests): - Helper function tests for consecutive message detection - ContextWindow unit tests for normal and tool execution flows - Bug demonstration tests documenting what bugs looked like - Invariant tests for user/assistant alternation - Missing assistant message fallback logic tests The early_return_path_test was removed because it: - Referenced specific line numbers in production code (brittle) - Reimplemented internal logic (whitebox anti-pattern) - Duplicated coverage from mock_provider_integration_test.rs All 729 g3-core tests pass. --- ...edup_test.rs => assistant_message_test.rs} | 355 ++++++++---------- .../consecutive_assistant_message_test.rs | 248 ------------ .../g3-core/tests/early_return_path_test.rs | 192 ---------- .../tests/missing_assistant_message_test.rs | 100 ----- 4 files changed, 154 insertions(+), 741 deletions(-) rename crates/g3-core/tests/{assistant_message_dedup_test.rs => assistant_message_test.rs} (59%) delete mode 100644 crates/g3-core/tests/consecutive_assistant_message_test.rs delete mode 100644 crates/g3-core/tests/early_return_path_test.rs delete mode 100644 crates/g3-core/tests/missing_assistant_message_test.rs diff --git a/crates/g3-core/tests/assistant_message_dedup_test.rs b/crates/g3-core/tests/assistant_message_test.rs similarity index 59% rename from crates/g3-core/tests/assistant_message_dedup_test.rs rename to crates/g3-core/tests/assistant_message_test.rs index 2e0138a..353fee2 100644 --- a/crates/g3-core/tests/assistant_message_dedup_test.rs +++ b/crates/g3-core/tests/assistant_message_test.rs @@ -1,16 +1,20 @@ -//! Tests for Assistant Message Deduplication +//! Tests for Assistant Message Handling //! -//! These tests verify that the conversation history does NOT have -//! consecutive assistant messages, which was a bug that caused -//! compounding duplication in LLM responses. +//! These tests verify correct handling of assistant messages in conversation history: +//! 1. No consecutive assistant messages (deduplication bug) +//! 2. Proper user/assistant alternation +//! 3. Missing assistant message fallback logic //! -//! Bug fixed: When tools were executed in previous iterations (any_tool_executed=true) -//! and the current iteration finished without executing a tool (tool_executed=false), -//! the assistant message was being added twice: -//! 1. At the break point when any_tool_executed is true -//! 2. At the return point when !tool_executed +//! The original bugs: +//! - Consecutive assistant messages: When tools were executed in previous iterations +//! and the current iteration finished without executing a tool, the assistant +//! message was being added twice. +//! - Missing assistant messages: When the LLM responded with text-only (no tool calls), +//! the assistant message was sometimes not saved because the code checked +//! `raw_clean.trim().is_empty()` after already confirming `current_response` had content. //! -//! The fix uses an `assistant_message_added` flag to ensure only one add occurs. +//! These bugs are now tested through the public API in mock_provider_integration_test.rs. +//! This file contains unit tests for the helper functions and invariants. use g3_core::context_window::ContextWindow; use g3_providers::{Message, MessageRole}; @@ -19,7 +23,8 @@ use g3_providers::{Message, MessageRole}; // Helper Functions // ============================================================================= -/// Check if conversation history has consecutive assistant messages +/// Check if conversation history has consecutive assistant messages. +/// Returns the indices of the first consecutive pair found, if any. fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> { for i in 0..history.len().saturating_sub(1) { if matches!(history[i].role, MessageRole::Assistant) @@ -40,43 +45,48 @@ fn count_assistant_messages(history: &[Message]) -> usize { } // ============================================================================= -// Unit Tests for Helper Functions +// Helper Function Tests // ============================================================================= #[test] -fn test_has_consecutive_assistant_messages_none() { +fn test_consecutive_detection_no_consecutive() { let history = vec![ - Message::new(MessageRole::System, "System".to_string()), Message::new(MessageRole::User, "Hi".to_string()), Message::new(MessageRole::Assistant, "Hello".to_string()), Message::new(MessageRole::User, "Bye".to_string()), - Message::new(MessageRole::Assistant, "Goodbye".to_string()), ]; assert!(has_consecutive_assistant_messages(&history).is_none()); } #[test] -fn test_has_consecutive_assistant_messages_found_middle() { +fn test_consecutive_detection_at_start() { let history = vec![ - Message::new(MessageRole::System, "System".to_string()), - Message::new(MessageRole::User, "Hi".to_string()), Message::new(MessageRole::Assistant, "Hello".to_string()), - Message::new(MessageRole::Assistant, "Hello again".to_string()), // BUG! - Message::new(MessageRole::User, "Bye".to_string()), + Message::new(MessageRole::Assistant, "Hello again".to_string()), + Message::new(MessageRole::User, "Hi".to_string()), ]; - assert_eq!(has_consecutive_assistant_messages(&history), Some((2, 3))); + assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1))); } #[test] -fn test_has_consecutive_assistant_messages_found_end() { +fn test_consecutive_detection_in_middle() { let history = vec![ Message::new(MessageRole::User, "Hi".to_string()), Message::new(MessageRole::Assistant, "Hello".to_string()), + Message::new(MessageRole::Assistant, "Hello again".to_string()), Message::new(MessageRole::User, "Bye".to_string()), - Message::new(MessageRole::Assistant, "Goodbye".to_string()), - Message::new(MessageRole::Assistant, "Goodbye again".to_string()), // BUG! ]; - assert_eq!(has_consecutive_assistant_messages(&history), Some((3, 4))); + assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2))); +} + +#[test] +fn test_consecutive_detection_at_end() { + let history = vec![ + Message::new(MessageRole::User, "Hi".to_string()), + Message::new(MessageRole::Assistant, "Hello".to_string()), + Message::new(MessageRole::Assistant, "Hello again".to_string()), + ]; + assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2))); } #[test] @@ -95,15 +105,11 @@ fn test_count_assistant_messages() { // ContextWindow Unit Tests // ============================================================================= -/// Test that ContextWindow correctly tracks messages in normal flow #[test] -fn test_context_window_normal_flow() { +fn test_normal_conversation_flow() { let mut context = ContextWindow::new(200_000); - context.add_message(Message::new( - MessageRole::System, - "You are helpful.".to_string(), - )); + context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string())); context.add_message(Message::new(MessageRole::User, "Hello".to_string())); context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string())); context.add_message(Message::new(MessageRole::User, "How are you?".to_string())); @@ -117,16 +123,11 @@ fn test_context_window_normal_flow() { ); } -/// Test that simulates the correct flow after tool execution #[test] -fn test_context_window_tool_execution_correct_flow() { +fn test_tool_execution_correct_flow() { let mut context = ContextWindow::new(200_000); - // Setup - context.add_message(Message::new( - MessageRole::System, - "You are helpful.".to_string(), - )); + context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string())); context.add_message(Message::new(MessageRole::User, "Run a command".to_string())); // Tool call (assistant message with tool JSON) @@ -134,7 +135,7 @@ fn test_context_window_tool_execution_correct_flow() { MessageRole::Assistant, r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(), )); - + // Tool result (user message) context.add_message(Message::new( MessageRole::User, @@ -147,7 +148,6 @@ fn test_context_window_tool_execution_correct_flow() { "Here are the files: file1.txt, file2.txt".to_string(), )); - // Verify structure assert_eq!(context.conversation_history.len(), 5); assert_eq!(count_assistant_messages(&context.conversation_history), 2); assert!( @@ -156,58 +156,11 @@ fn test_context_window_tool_execution_correct_flow() { ); } -/// Test that demonstrates the bug scenario (what the bug looked like) #[test] -fn test_context_window_bug_scenario_demonstration() { +fn test_multiple_tools_correct_flow() { let mut context = ContextWindow::new(200_000); - // Setup - context.add_message(Message::new( - MessageRole::System, - "You are helpful.".to_string(), - )); - context.add_message(Message::new(MessageRole::User, "Run a command".to_string())); - - // Tool call - context.add_message(Message::new( - MessageRole::Assistant, - r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(), - )); - context.add_message(Message::new( - MessageRole::User, - "Tool result: file1.txt".to_string(), - )); - - // THE BUG: Summary added TWICE - let summary = "Here are the files: file1.txt".to_string(); - context.add_message(Message::new(MessageRole::Assistant, summary.clone())); - context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG! - - // This demonstrates what the bug looked like - let consecutive = has_consecutive_assistant_messages(&context.conversation_history); - assert!( - consecutive.is_some(), - "Bug scenario should have consecutive assistant messages" - ); - assert_eq!(consecutive, Some((4, 5))); - - // The content is duplicated - assert_eq!( - context.conversation_history[4].content, - context.conversation_history[5].content, - "Bug: consecutive messages have identical content" - ); -} - -/// Test multiple tool executions followed by summary (correct flow) -#[test] -fn test_context_window_multiple_tools_correct_flow() { - let mut context = ContextWindow::new(200_000); - - context.add_message(Message::new( - MessageRole::System, - "You are helpful.".to_string(), - )); + context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string())); context.add_message(Message::new( MessageRole::User, "List files and show current directory".to_string(), @@ -239,7 +192,6 @@ fn test_context_window_multiple_tools_correct_flow() { "Files: file1.txt, file2.txt. Current directory: /home/user".to_string(), )); - // Verify structure // System + User + (Assistant + User) * 2 + Assistant = 1 + 1 + 4 + 1 = 7 assert_eq!(context.conversation_history.len(), 7); assert_eq!(count_assistant_messages(&context.conversation_history), 3); @@ -249,72 +201,17 @@ fn test_context_window_multiple_tools_correct_flow() { ); } -/// Test the exact session log pattern from the original bug report -#[test] -fn test_session_log_pattern_from_bug_report() { - // This test recreates the exact pattern seen in the butler session log: - // - Index N: assistant message with content - // - Index N+1: assistant message with same content (consecutive!) - - let mut context = ContextWindow::new(200_000); - - // Simulate a conversation with many messages - context.add_message(Message::new(MessageRole::System, "You are butler.".to_string())); - - // Add some conversation history - for i in 0..10 { - context.add_message(Message::new(MessageRole::User, format!("Task {}", i))); - context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i))); - } - - // Now simulate the buggy pattern - context.add_message(Message::new( - MessageRole::User, - "Task: move appa_music into appa".to_string(), - )); - - // Tool call - context.add_message(Message::new( - MessageRole::Assistant, - r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(), - )); - context.add_message(Message::new( - MessageRole::User, - "Tool result: ✓ Moved".to_string(), - )); - - // The bug: two consecutive assistant messages with duplicated content - let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?"; - - // First add (correct) - context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string())); - // Second add (THE BUG) - context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string())); - - // Verify the bug pattern exists - let consecutive = has_consecutive_assistant_messages(&context.conversation_history); - assert!( - consecutive.is_some(), - "Should detect consecutive assistant messages from bug pattern" - ); - - // The consecutive messages should be the last two - let (idx1, idx2) = consecutive.unwrap(); - let history_len = context.conversation_history.len(); - assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last"); - assert_eq!(idx2, history_len - 1, "Second consecutive should be last"); -} +// ============================================================================= +// Bug Demonstration Tests (document what the bugs looked like) +// ============================================================================= -/// Test that the fix prevents consecutive assistant messages -/// This simulates what the fixed code does: check before adding #[test] -fn test_fix_prevents_consecutive_assistant_messages() { +fn test_bug_demonstration_consecutive_messages() { + // This test demonstrates what the consecutive message bug looked like. + // The bug is now fixed and tested through mock_provider_integration_test.rs. let mut context = ContextWindow::new(200_000); - context.add_message(Message::new( - MessageRole::System, - "You are helpful.".to_string(), - )); + context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string())); context.add_message(Message::new(MessageRole::User, "Run a command".to_string())); context.add_message(Message::new( MessageRole::Assistant, @@ -325,64 +222,31 @@ fn test_fix_prevents_consecutive_assistant_messages() { "Tool result: file1.txt".to_string(), )); - // Simulate the fix: use a flag to track if message was added - let mut assistant_message_added = false; + // THE BUG: Summary added TWICE let summary = "Here are the files: file1.txt".to_string(); + context.add_message(Message::new(MessageRole::Assistant, summary.clone())); + context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG! - // First add location (simulating line ~2529 in the fix) - if !assistant_message_added { - context.add_message(Message::new(MessageRole::Assistant, summary.clone())); - assistant_message_added = true; - } + // Verify the bug pattern + let consecutive = has_consecutive_assistant_messages(&context.conversation_history); + assert!(consecutive.is_some(), "Bug scenario should have consecutive assistant messages"); + assert_eq!(consecutive, Some((4, 5))); - // Second add location (simulating line ~2772 in the fix) - // This would have added a duplicate before the fix - if !assistant_message_added { - context.add_message(Message::new(MessageRole::Assistant, summary.clone())); - // assistant_message_added = true; // Not needed since we return after - } - - // Verify the fix works - assert!( - has_consecutive_assistant_messages(&context.conversation_history).is_none(), - "Fix should prevent consecutive assistant messages" - ); + // The content is duplicated assert_eq!( - count_assistant_messages(&context.conversation_history), - 2, - "Should have exactly 2 assistant messages (tool call + summary)" + context.conversation_history[4].content, + context.conversation_history[5].content, + "Bug: consecutive messages have identical content" ); } -/// Test edge case: empty response should not add message -#[test] -fn test_empty_response_not_added() { - let mut context = ContextWindow::new(200_000); +// ============================================================================= +// Invariant Tests +// ============================================================================= - context.add_message(Message::new( - MessageRole::System, - "You are helpful.".to_string(), - )); - context.add_message(Message::new(MessageRole::User, "Hello".to_string())); - context.add_message(Message::new(MessageRole::Assistant, "Hi!".to_string())); - - // Simulate the check: don't add empty/whitespace responses - let empty_response = " ".to_string(); - if !empty_response.trim().is_empty() { - context.add_message(Message::new(MessageRole::Assistant, empty_response)); - } - - // Should still have only 3 messages - assert_eq!(context.conversation_history.len(), 3); - assert!( - has_consecutive_assistant_messages(&context.conversation_history).is_none(), - "Empty response should not create consecutive messages" - ); -} - -/// Test the invariant: conversation should always alternate user/assistant after system #[test] fn test_alternating_pattern_invariant() { + // After the system message, conversation should alternate user/assistant let history = vec![ Message::new(MessageRole::System, "System".to_string()), Message::new(MessageRole::User, "Q1".to_string()), @@ -393,7 +257,6 @@ fn test_alternating_pattern_invariant() { Message::new(MessageRole::Assistant, "A3".to_string()), ]; - // Verify alternating pattern for i in 1..history.len() - 1 { let current = &history[i].role; let next = &history[i + 1].role; @@ -414,3 +277,93 @@ fn test_alternating_pattern_invariant() { } } } + +#[test] +fn test_fix_prevents_consecutive_messages() { + // This simulates what the fixed code does: use a flag to track if message was added + let mut context = ContextWindow::new(200_000); + + context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string())); + context.add_message(Message::new(MessageRole::User, "Run a command".to_string())); + context.add_message(Message::new( + MessageRole::Assistant, + r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(), + )); + context.add_message(Message::new( + MessageRole::User, + "Tool result: file1.txt".to_string(), + )); + + // Simulate the fix: use a flag to track if message was added + let mut assistant_message_added = false; + let summary = "Here are the files: file1.txt".to_string(); + + // First add location + if !assistant_message_added { + context.add_message(Message::new(MessageRole::Assistant, summary.clone())); + assistant_message_added = true; + } + + // Second add location (would have added duplicate before the fix) + if !assistant_message_added { + context.add_message(Message::new(MessageRole::Assistant, summary.clone())); + } + + assert!( + has_consecutive_assistant_messages(&context.conversation_history).is_none(), + "Fix should prevent consecutive assistant messages" + ); + assert_eq!( + count_assistant_messages(&context.conversation_history), + 2, + "Should have exactly 2 assistant messages (tool call + summary)" + ); +} + +// ============================================================================= +// Missing Assistant Message Fallback Tests +// ============================================================================= + +#[test] +fn test_fallback_to_current_response_when_raw_empty() { + // Simulate the scenario where raw_clean is empty but current_response has content + let current_response = "Here's my helpful response!"; + let raw_clean = ""; + + let content_to_save = if !raw_clean.trim().is_empty() { + raw_clean.to_string() + } else { + current_response.to_string() + }; + + assert_eq!(content_to_save, current_response); + assert!(!content_to_save.is_empty()); +} + +#[test] +fn test_prefer_raw_clean_when_available() { + let current_response = "Filtered response"; + let raw_clean = "Raw response with tool JSON"; + + let content_to_save = if !raw_clean.trim().is_empty() { + raw_clean.to_string() + } else { + current_response.to_string() + }; + + assert_eq!(content_to_save, raw_clean); +} + +#[test] +fn test_whitespace_raw_clean_triggers_fallback() { + let current_response = "Actual content"; + let raw_clean = " \n\t "; + + let content_to_save = if !raw_clean.trim().is_empty() { + raw_clean.to_string() + } else { + current_response.to_string() + }; + + assert_eq!(content_to_save, current_response); +} diff --git a/crates/g3-core/tests/consecutive_assistant_message_test.rs b/crates/g3-core/tests/consecutive_assistant_message_test.rs deleted file mode 100644 index d6616dd..0000000 --- a/crates/g3-core/tests/consecutive_assistant_message_test.rs +++ /dev/null @@ -1,248 +0,0 @@ -//! Tests for consecutive assistant message bug -//! -//! This test verifies that the streaming completion logic does not add -//! consecutive assistant messages to the conversation history. -//! -//! Bug description: -//! When tools are executed in previous iterations (any_tool_executed=true) -//! and the current iteration finishes without executing a tool (tool_executed=false), -//! the assistant message was being added twice: -//! 1. At line ~2529 when breaking to auto-continue (if any_tool_executed && !current_response.is_empty()) -//! 2. At line ~2772 when returning (if !current_response.is_empty()) -//! -//! This creates consecutive assistant messages in the conversation history, -//! which causes the LLM to see duplicated content and mimic the pattern, -//! leading to compounding duplication over time. - -use g3_core::context_window::ContextWindow; -use g3_providers::{Message, MessageRole}; - -/// Helper to check if conversation history has consecutive assistant messages -fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> { - for i in 0..history.len().saturating_sub(1) { - if matches!(history[i].role, MessageRole::Assistant) - && matches!(history[i + 1].role, MessageRole::Assistant) - { - return Some((i, i + 1)); - } - } - None -} - -/// Test that a well-formed conversation has no consecutive assistant messages -#[test] -fn test_no_consecutive_assistant_messages_in_normal_flow() { - let mut context = ContextWindow::new(200_000); - - // Simulate a normal conversation flow - context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string())); - context.add_message(Message::new(MessageRole::User, "Task: Hello".to_string())); - context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string())); - context.add_message(Message::new(MessageRole::User, "Task: How are you?".to_string())); - context.add_message(Message::new(MessageRole::Assistant, "I'm doing well!".to_string())); - - assert!( - has_consecutive_assistant_messages(&context.conversation_history).is_none(), - "Normal conversation should not have consecutive assistant messages" - ); -} - -/// Test that detects the bug: consecutive assistant messages -/// This test SHOULD FAIL until the bug is fixed -#[test] -fn test_detect_consecutive_assistant_messages_bug() { - let mut context = ContextWindow::new(200_000); - - // Simulate the buggy flow: - // 1. System message - context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string())); - - // 2. User asks a question - context.add_message(Message::new(MessageRole::User, "Task: List files".to_string())); - - // 3. Assistant responds with tool call (tool execution adds this) - context.add_message(Message::new( - MessageRole::Assistant, - r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(), - )); - - // 4. Tool result - context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string())); - - // 5. Assistant provides summary - THIS IS WHERE THE BUG OCCURS - // The bug adds this message TWICE - let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string(); - - // Simulate the bug: message added at line 2529 (when breaking to auto-continue) - context.add_message(Message::new(MessageRole::Assistant, summary.clone())); - - // Simulate the bug: message added AGAIN at line 2772 (when returning) - // This is the bug - the same message is added twice! - context.add_message(Message::new(MessageRole::Assistant, summary.clone())); - - // This assertion verifies the bug exists - let consecutive = has_consecutive_assistant_messages(&context.conversation_history); - assert!( - consecutive.is_some(), - "Bug reproduction: should have consecutive assistant messages at indices {:?}", - consecutive - ); - - // Verify the indices - let (idx1, idx2) = consecutive.unwrap(); - assert_eq!(idx1, 4, "First consecutive assistant message should be at index 4"); - assert_eq!(idx2, 5, "Second consecutive assistant message should be at index 5"); - - // Verify the content is duplicated - assert_eq!( - context.conversation_history[idx1].content, - context.conversation_history[idx2].content, - "Consecutive assistant messages should have identical content (the bug)" - ); -} - -/// Test that validates the invariant: no consecutive assistant messages should exist -/// This is the test that should PASS after the bug is fixed -#[test] -fn test_invariant_no_consecutive_assistant_messages() { - // This test documents the invariant that should hold - // After the bug is fixed, this test should pass - - let mut context = ContextWindow::new(200_000); - - // Build a conversation that would trigger the bug - context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string())); - context.add_message(Message::new(MessageRole::User, "Task: List files".to_string())); - context.add_message(Message::new( - MessageRole::Assistant, - r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(), - )); - context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string())); - - // After the fix, only ONE assistant message should be added - let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string(); - context.add_message(Message::new(MessageRole::Assistant, summary)); - - // Invariant: no consecutive assistant messages - assert!( - has_consecutive_assistant_messages(&context.conversation_history).is_none(), - "Invariant: conversation history should never have consecutive assistant messages" - ); -} - -/// Test helper function works correctly -#[test] -fn test_consecutive_detection_helper() { - // Test with no consecutive - let history = vec![ - Message::new(MessageRole::User, "Hi".to_string()), - Message::new(MessageRole::Assistant, "Hello".to_string()), - Message::new(MessageRole::User, "Bye".to_string()), - ]; - assert!(has_consecutive_assistant_messages(&history).is_none()); - - // Test with consecutive at start - let history = vec![ - Message::new(MessageRole::Assistant, "Hello".to_string()), - Message::new(MessageRole::Assistant, "Hello again".to_string()), - Message::new(MessageRole::User, "Hi".to_string()), - ]; - assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1))); - - // Test with consecutive in middle - let history = vec![ - Message::new(MessageRole::User, "Hi".to_string()), - Message::new(MessageRole::Assistant, "Hello".to_string()), - Message::new(MessageRole::Assistant, "Hello again".to_string()), - Message::new(MessageRole::User, "Bye".to_string()), - ]; - assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2))); - - // Test with consecutive at end - let history = vec![ - Message::new(MessageRole::User, "Hi".to_string()), - Message::new(MessageRole::Assistant, "Hello".to_string()), - Message::new(MessageRole::Assistant, "Hello again".to_string()), - ]; - assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2))); -} - -/// Test that simulates the exact session log pattern from the bug report -#[test] -fn test_session_log_pattern_from_bug_report() { - // This test recreates the exact pattern seen in the butler session log: - // - Index 368: assistant message with duplicated content - // - Index 369: assistant message with same duplicated content (consecutive!) - - let mut context = ContextWindow::new(200_000); - - // Simulate the conversation leading up to the bug - context.add_message(Message::new(MessageRole::System, "You are butler.".to_string())); - - // ... many messages ... - for i in 0..50 { - context.add_message(Message::new(MessageRole::User, format!("Task {}", i))); - context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i))); - } - - // Now simulate the buggy pattern - context.add_message(Message::new(MessageRole::User, "Task: move appa_music into appa".to_string())); - - // Tool call - context.add_message(Message::new( - MessageRole::Assistant, - r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(), - )); - context.add_message(Message::new(MessageRole::User, "Tool result: ✓ Moved".to_string())); - - // The bug: two consecutive assistant messages with duplicated content - let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?\n\nDone! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?"; - - // First add (line 2529) - context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string())); - // Second add (line 2772) - THE BUG - context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string())); - - // Verify the bug pattern exists - let consecutive = has_consecutive_assistant_messages(&context.conversation_history); - assert!( - consecutive.is_some(), - "Should detect consecutive assistant messages from bug pattern" - ); - - // The consecutive messages should be the last two - let (idx1, idx2) = consecutive.unwrap(); - let history_len = context.conversation_history.len(); - assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last"); - assert_eq!(idx2, history_len - 1, "Second consecutive should be last"); -} - -/// Test that the context window could have a guard against consecutive assistant messages -/// This is a proposed fix validation test -#[test] -fn test_proposed_fix_guard_against_consecutive() { - let mut context = ContextWindow::new(200_000); - - context.add_message(Message::new(MessageRole::System, "System".to_string())); - context.add_message(Message::new(MessageRole::User, "User".to_string())); - context.add_message(Message::new(MessageRole::Assistant, "First response".to_string())); - - // Proposed fix: before adding an assistant message, check if the last message - // is also an assistant message. If so, either: - // 1. Skip adding (if content is identical) - // 2. Merge the content (if content is different) - // 3. Log a warning and skip (safest option) - - let last_message = context.conversation_history.last(); - let is_last_assistant = last_message - .map(|m| matches!(m.role, MessageRole::Assistant)) - .unwrap_or(false); - - assert!( - is_last_assistant, - "Last message should be assistant for this test" - ); - - // The fix would check this condition before adding - // and skip adding if true (to prevent consecutive assistant messages) -} diff --git a/crates/g3-core/tests/early_return_path_test.rs b/crates/g3-core/tests/early_return_path_test.rs deleted file mode 100644 index 286fd7a..0000000 --- a/crates/g3-core/tests/early_return_path_test.rs +++ /dev/null @@ -1,192 +0,0 @@ -//! Tests for the early return path in stream_completion_with_tools -//! -//! This test documents the bug fix for missing assistant messages when: -//! - The LLM responds with text only (no tool calls) -//! - any_tool_executed = false (no tools in previous iterations) -//! - The stream finishes and hits the early return path -//! -//! The fix adds assistant message saving before the early return at ~line 2535. - -use g3_core::context_window::ContextWindow; -use g3_providers::{Message, MessageRole}; - -/// Simulates the early return path logic -/// This mirrors the code at lines 2535-2555 in lib.rs -fn simulate_early_return_path( - context_window: &mut ContextWindow, - current_response: &str, - assistant_message_added: bool, - any_tool_executed: bool, -) -> bool { - // This simulates the code path when stream finishes - - if any_tool_executed { - // If tools were executed in previous iterations, break to finalize - if !current_response.trim().is_empty() && !assistant_message_added { - let assistant_msg = Message::new( - MessageRole::Assistant, - current_response.to_string(), - ); - context_window.add_message(assistant_msg); - return true; // assistant_message_added = true - } - return assistant_message_added; - } - - // THE FIX: Save assistant message before returning (no tools were executed) - if !current_response.trim().is_empty() && !assistant_message_added { - let assistant_msg = Message::new( - MessageRole::Assistant, - current_response.to_string(), - ); - context_window.add_message(assistant_msg); - return true; - } - - assistant_message_added -} - -/// Test: Text-only response with no previous tool execution -/// This is the exact bug scenario from butler session -#[test] -fn test_text_only_response_no_tools() { - let mut context = ContextWindow::new(200_000); - - // Add initial user message - context.add_message(Message::new(MessageRole::User, "Hello".to_string())); - - let current_response = "Phew! 😅 Glad it's back. Sorry about that..."; - let assistant_message_added = false; - let any_tool_executed = false; - - let was_added = simulate_early_return_path( - &mut context, - current_response, - assistant_message_added, - any_tool_executed, - ); - - assert!(was_added, "Assistant message should be added"); - - let history = context.conversation_history.clone(); - assert_eq!(history.len(), 2, "Should have user + assistant messages"); - assert!(matches!(history[1].role, MessageRole::Assistant)); - assert!(history[1].content.contains("Phew!")); -} - -/// Test: Text-only response with previous tool execution -#[test] -fn test_text_only_response_with_previous_tools() { - let mut context = ContextWindow::new(200_000); - - context.add_message(Message::new(MessageRole::User, "Run ls".to_string())); - - let current_response = "Here are the files..."; - let assistant_message_added = false; - let any_tool_executed = true; // Tools were executed in previous iterations - - let was_added = simulate_early_return_path( - &mut context, - current_response, - assistant_message_added, - any_tool_executed, - ); - - assert!(was_added, "Assistant message should be added"); - - let history = context.conversation_history.clone(); - assert_eq!(history.len(), 2); - assert!(matches!(history[1].role, MessageRole::Assistant)); -} - -/// Test: Empty response should not add message -#[test] -fn test_empty_response_not_added() { - let mut context = ContextWindow::new(200_000); - - context.add_message(Message::new(MessageRole::User, "Hello".to_string())); - - let current_response = " "; // Whitespace only - let assistant_message_added = false; - let any_tool_executed = false; - - let was_added = simulate_early_return_path( - &mut context, - current_response, - assistant_message_added, - any_tool_executed, - ); - - assert!(!was_added, "Empty response should not be added"); - - let history = context.conversation_history.clone(); - assert_eq!(history.len(), 1, "Should only have user message"); -} - -/// Test: Already added flag prevents duplication -#[test] -fn test_already_added_prevents_duplication() { - let mut context = ContextWindow::new(200_000); - - context.add_message(Message::new(MessageRole::User, "Hello".to_string())); - context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string())); - - let current_response = "Hi there!"; - let assistant_message_added = true; // Already added - let any_tool_executed = false; - - let was_added = simulate_early_return_path( - &mut context, - current_response, - assistant_message_added, - any_tool_executed, - ); - - assert!(was_added, "Flag should remain true"); - - let history = context.conversation_history.clone(); - assert_eq!(history.len(), 2, "Should not add duplicate"); -} - -/// Test: Consecutive user messages scenario (the bug) -/// Before the fix, this would result in consecutive user messages -#[test] -fn test_bug_scenario_consecutive_user_messages() { - let mut context = ContextWindow::new(200_000); - - // Simulate the butler session bug: - // Message 80: Tool result (user) - // Message 81: User input (user) - SHOULD have assistant between! - // Message 82: User input (user) - - context.add_message(Message::new(MessageRole::User, "Tool result: Self care".to_string())); - - // Simulate assistant response that was displayed but not saved (THE BUG) - let current_response = "Phew! 😅 Glad it's back..."; - let assistant_message_added = false; - let any_tool_executed = false; - - // THE FIX: This should now save the assistant message - simulate_early_return_path( - &mut context, - current_response, - assistant_message_added, - any_tool_executed, - ); - - // Now add the next user message - context.add_message(Message::new(MessageRole::User, "Task: Ok it's back...".to_string())); - - let history = context.conversation_history.clone(); - - // Verify no consecutive user messages - for i in 0..history.len().saturating_sub(1) { - let current_is_user = matches!(history[i].role, MessageRole::User); - let next_is_user = matches!(history[i + 1].role, MessageRole::User); - assert!( - !(current_is_user && next_is_user), - "Found consecutive user messages at positions {} and {}", - i, i + 1 - ); - } -} diff --git a/crates/g3-core/tests/missing_assistant_message_test.rs b/crates/g3-core/tests/missing_assistant_message_test.rs deleted file mode 100644 index 3aa7a59..0000000 --- a/crates/g3-core/tests/missing_assistant_message_test.rs +++ /dev/null @@ -1,100 +0,0 @@ -//! Tests for the missing assistant message bug fix. -//! -//! Bug: When the LLM responds with text-only (no tool calls), the assistant message -//! was sometimes not saved to the context window because the code checked -//! `raw_clean.trim().is_empty()` after already confirming `current_response` had content. -//! If the parser buffer was empty/different from current_response, no message was added. -//! -//! Fix: Use current_response as fallback when raw_clean is empty. - -/// Test that the fix ensures assistant messages are always added when there's content. -/// This simulates the fixed behavior where current_response is used as fallback. -#[test] -fn test_fallback_to_current_response_when_raw_empty() { - // Simulate the scenario: - // - current_response has content (what was displayed) - // - raw_clean is empty (parser buffer was cleared) - // - The fix should use current_response as fallback - - let current_response = "Here's my helpful response!"; - let raw_clean = ""; // Parser buffer is empty - - // The fix logic: - let content_to_save = if !raw_clean.trim().is_empty() { - raw_clean.to_string() - } else { - current_response.to_string() - }; - - // Verify fallback works - assert_eq!(content_to_save, current_response); - assert!(!content_to_save.is_empty()); -} - -/// Test that raw_clean is preferred when available. -#[test] -fn test_prefer_raw_clean_when_available() { - let current_response = "Filtered response"; // What was displayed (filtered) - let raw_clean = "Raw response with {\"tool\": ...} JSON"; // Raw content - - // The fix logic: - let content_to_save = if !raw_clean.trim().is_empty() { - raw_clean.to_string() - } else { - current_response.to_string() - }; - - // Verify raw_clean is preferred - assert_eq!(content_to_save, raw_clean); -} - -/// Test that whitespace-only raw_clean triggers fallback. -#[test] -fn test_whitespace_raw_clean_triggers_fallback() { - let current_response = "Actual content"; - let raw_clean = " \n\t "; // Whitespace only - - // The fix logic: - let content_to_save = if !raw_clean.trim().is_empty() { - raw_clean.to_string() - } else { - current_response.to_string() - }; - - // Verify fallback to current_response - assert_eq!(content_to_save, current_response); -} - -/// Test that the fix logic handles various edge cases. -#[test] -fn test_fix_logic_edge_cases() { - // Test case 1: Both have content - prefer raw - let current = "displayed"; - let raw = "raw content"; - let result = if !raw.trim().is_empty() { raw } else { current }; - assert_eq!(result, "raw content"); - - // Test case 2: Raw is empty - use current - let current = "displayed"; - let raw = ""; - let result = if !raw.trim().is_empty() { raw } else { current }; - assert_eq!(result, "displayed"); - - // Test case 3: Raw is whitespace - use current - let current = "displayed"; - let raw = " \n "; - let result = if !raw.trim().is_empty() { raw } else { current }; - assert_eq!(result, "displayed"); - - // Test case 4: Both empty - still returns current (empty) - let current = ""; - let raw = ""; - let result = if !raw.trim().is_empty() { raw } else { current }; - assert_eq!(result, ""); - - // Test case 5: Current has content, raw has only newlines - let current = "Hello world!"; - let raw = "\n\n\n"; - let result = if !raw.trim().is_empty() { raw } else { current }; - assert_eq!(result, "Hello world!"); -}