Consolidate redundant assistant message test files

Deleted 4 redundant test files (~956 lines): - assistant_message_dedup_test.rs (416 lines, 12 tests) - consecutive_assistant_message_test.rs (248 lines, 6 tests) - missing_assistant_message_test.rs (100 lines, 4 tests) - early_return_path_test.rs (192 lines, 5 tests) - whitebox test Created consolidated assistant_message_test.rs (369 lines, 14 tests): - Helper function tests for consecutive message detection - ContextWindow unit tests for normal and tool execution flows - Bug demonstration tests documenting what bugs looked like - Invariant tests for user/assistant alternation - Missing assistant message fallback logic tests The early_return_path_test was removed because it: - Referenced specific line numbers in production code (brittle) - Reimplemented internal logic (whitebox anti-pattern) - Duplicated coverage from mock_provider_integration_test.rs All 729 g3-core tests pass.
2026-01-21 10:27:07 +05:30
parent c5d549c211
commit 6a5ce11e7b
4 changed files with 154 additions and 741 deletions
--- a/crates/g3-core/tests/assistant_message_dedup_test.rs
+++ b/crates/g3-core/tests/assistant_message_dedup_test.rs
@@ -1,16 +1,20 @@
-//! Tests for Assistant Message Deduplication
+//! Tests for Assistant Message Handling
 //!
-//! These tests verify that the conversation history does NOT have
+//! These tests verify correct handling of assistant messages in conversation history:
-//! consecutive assistant messages, which was a bug that caused
+//! 1. No consecutive assistant messages (deduplication bug)
-//! compounding duplication in LLM responses.
+//! 2. Proper user/assistant alternation
 //! 3. Missing assistant message fallback logic
 //!
-//! Bug fixed: When tools were executed in previous iterations (any_tool_executed=true)
+//! The original bugs:
-//! and the current iteration finished without executing a tool (tool_executed=false),
+//! - Consecutive assistant messages: When tools were executed in previous iterations
-//! the assistant message was being added twice:
+//!   and the current iteration finished without executing a tool, the assistant
-//! 1. At the break point when any_tool_executed is true
+//!   message was being added twice.
-//! 2. At the return point when !tool_executed
+//! - Missing assistant messages: When the LLM responded with text-only (no tool calls),
 //!   the assistant message was sometimes not saved because the code checked
 //!   `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
 //!
-//! The fix uses an `assistant_message_added` flag to ensure only one add occurs.
+//! These bugs are now tested through the public API in mock_provider_integration_test.rs.
 //! This file contains unit tests for the helper functions and invariants.
 use g3_core::context_window::ContextWindow;
 use g3_providers::{Message, MessageRole};
@@ -19,7 +23,8 @@ use g3_providers::{Message, MessageRole};
 // Helper Functions
 // =============================================================================
-/// Check if conversation history has consecutive assistant messages
+/// Check if conversation history has consecutive assistant messages.
 /// Returns the indices of the first consecutive pair found, if any.
 fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
    for i in 0..history.len().saturating_sub(1) {
        if matches!(history[i].role, MessageRole::Assistant)
@@ -40,43 +45,48 @@ fn count_assistant_messages(history: &[Message]) -> usize {
 }
 // =============================================================================
-// Unit Tests for Helper Functions
+// Helper Function Tests
 // =============================================================================
 #[test]
-fn test_has_consecutive_assistant_messages_none() {
+fn test_consecutive_detection_no_consecutive() {
    let history = vec![
        Message::new(MessageRole::System, "System".to_string()),
        Message::new(MessageRole::User, "Hi".to_string()),
        Message::new(MessageRole::Assistant, "Hello".to_string()),
        Message::new(MessageRole::User, "Bye".to_string()),
        Message::new(MessageRole::Assistant, "Goodbye".to_string()),
    ];
    assert!(has_consecutive_assistant_messages(&history).is_none());
 }
 #[test]
-fn test_has_consecutive_assistant_messages_found_middle() {
+fn test_consecutive_detection_at_start() {
    let history = vec![
        Message::new(MessageRole::System, "System".to_string()),
        Message::new(MessageRole::User, "Hi".to_string()),
        Message::new(MessageRole::Assistant, "Hello".to_string()),
-        Message::new(MessageRole::Assistant, "Hello again".to_string()), // BUG!
+        Message::new(MessageRole::Assistant, "Hello again".to_string()),
-        Message::new(MessageRole::User, "Bye".to_string()),
+        Message::new(MessageRole::User, "Hi".to_string()),
    ];
-    assert_eq!(has_consecutive_assistant_messages(&history), Some((2, 3)));
+    assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
 }
 #[test]
-fn test_has_consecutive_assistant_messages_found_end() {
+fn test_consecutive_detection_in_middle() {
    let history = vec![
        Message::new(MessageRole::User, "Hi".to_string()),
        Message::new(MessageRole::Assistant, "Hello".to_string()),
        Message::new(MessageRole::Assistant, "Hello again".to_string()),
        Message::new(MessageRole::User, "Bye".to_string()),
        Message::new(MessageRole::Assistant, "Goodbye".to_string()),
        Message::new(MessageRole::Assistant, "Goodbye again".to_string()), // BUG!
    ];
-    assert_eq!(has_consecutive_assistant_messages(&history), Some((3, 4)));
+    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
 }
 #[test]
 fn test_consecutive_detection_at_end() {
    let history = vec![
        Message::new(MessageRole::User, "Hi".to_string()),
        Message::new(MessageRole::Assistant, "Hello".to_string()),
        Message::new(MessageRole::Assistant, "Hello again".to_string()),
    ];
    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
 }
 #[test]
@@ -95,15 +105,11 @@ fn test_count_assistant_messages() {
 // ContextWindow Unit Tests
 // =============================================================================
 /// Test that ContextWindow correctly tracks messages in normal flow
 #[test]
-fn test_context_window_normal_flow() {
+fn test_normal_conversation_flow() {
    let mut context = ContextWindow::new(200_000);
-    context.add_message(Message::new(
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
        MessageRole::System,
        "You are helpful.".to_string(),
    ));
    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
    context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
    context.add_message(Message::new(MessageRole::User, "How are you?".to_string()));
@@ -117,16 +123,11 @@ fn test_context_window_normal_flow() {
    );
 }
 /// Test that simulates the correct flow after tool execution
 #[test]
-fn test_context_window_tool_execution_correct_flow() {
+fn test_tool_execution_correct_flow() {
    let mut context = ContextWindow::new(200_000);
-    // Setup
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
    context.add_message(Message::new(
        MessageRole::System,
        "You are helpful.".to_string(),
    ));
    context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
    // Tool call (assistant message with tool JSON)
@@ -134,7 +135,7 @@ fn test_context_window_tool_execution_correct_flow() {
        MessageRole::Assistant,
        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
    ));
-    
+
    // Tool result (user message)
    context.add_message(Message::new(
        MessageRole::User,
@@ -147,7 +148,6 @@ fn test_context_window_tool_execution_correct_flow() {
        "Here are the files: file1.txt, file2.txt".to_string(),
    ));
    // Verify structure
    assert_eq!(context.conversation_history.len(), 5);
    assert_eq!(count_assistant_messages(&context.conversation_history), 2);
    assert!(
@@ -156,58 +156,11 @@ fn test_context_window_tool_execution_correct_flow() {
    );
 }
 /// Test that demonstrates the bug scenario (what the bug looked like)
 #[test]
-fn test_context_window_bug_scenario_demonstration() {
+fn test_multiple_tools_correct_flow() {
    let mut context = ContextWindow::new(200_000);
-    // Setup
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
    context.add_message(Message::new(
        MessageRole::System,
        "You are helpful.".to_string(),
    ));
    context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
    // Tool call
    context.add_message(Message::new(
        MessageRole::Assistant,
        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
    ));
    context.add_message(Message::new(
        MessageRole::User,
        "Tool result: file1.txt".to_string(),
    ));
    // THE BUG: Summary added TWICE
    let summary = "Here are the files: file1.txt".to_string();
    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
    context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
    // This demonstrates what the bug looked like
    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
    assert!(
        consecutive.is_some(),
        "Bug scenario should have consecutive assistant messages"
    );
    assert_eq!(consecutive, Some((4, 5)));
    // The content is duplicated
    assert_eq!(
        context.conversation_history[4].content,
        context.conversation_history[5].content,
        "Bug: consecutive messages have identical content"
    );
 }
 /// Test multiple tool executions followed by summary (correct flow)
 #[test]
 fn test_context_window_multiple_tools_correct_flow() {
    let mut context = ContextWindow::new(200_000);
    context.add_message(Message::new(
        MessageRole::System,
        "You are helpful.".to_string(),
    ));
    context.add_message(Message::new(
        MessageRole::User,
        "List files and show current directory".to_string(),
@@ -239,7 +192,6 @@ fn test_context_window_multiple_tools_correct_flow() {
        "Files: file1.txt, file2.txt. Current directory: /home/user".to_string(),
    ));
    // Verify structure
    // System + User + (Assistant + User) * 2 + Assistant = 1 + 1 + 4 + 1 = 7
    assert_eq!(context.conversation_history.len(), 7);
    assert_eq!(count_assistant_messages(&context.conversation_history), 3);
@@ -249,72 +201,17 @@ fn test_context_window_multiple_tools_correct_flow() {
    );
 }
-/// Test the exact session log pattern from the original bug report
+// =============================================================================
-#[test]
+// Bug Demonstration Tests (document what the bugs looked like)
-fn test_session_log_pattern_from_bug_report() {
+// =============================================================================
    // This test recreates the exact pattern seen in the butler session log:
    // - Index N: assistant message with content
    // - Index N+1: assistant message with same content (consecutive!)
    let mut context = ContextWindow::new(200_000);
    // Simulate a conversation with many messages
    context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
    // Add some conversation history
    for i in 0..10 {
        context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
    }
    // Now simulate the buggy pattern
    context.add_message(Message::new(
        MessageRole::User,
        "Task: move appa_music into appa".to_string(),
    ));
    // Tool call
    context.add_message(Message::new(
        MessageRole::Assistant,
        r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
    ));
    context.add_message(Message::new(
        MessageRole::User,
        "Tool result: ✓ Moved".to_string(),
    ));
    // The bug: two consecutive assistant messages with duplicated content
    let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
    // First add (correct)
    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
    // Second add (THE BUG)
    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
    // Verify the bug pattern exists
    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
    assert!(
        consecutive.is_some(),
        "Should detect consecutive assistant messages from bug pattern"
    );
    // The consecutive messages should be the last two
    let (idx1, idx2) = consecutive.unwrap();
    let history_len = context.conversation_history.len();
    assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
    assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
 }
 /// Test that the fix prevents consecutive assistant messages
 /// This simulates what the fixed code does: check before adding
 #[test]
-fn test_fix_prevents_consecutive_assistant_messages() {
+fn test_bug_demonstration_consecutive_messages() {
    // This test demonstrates what the consecutive message bug looked like.
    // The bug is now fixed and tested through mock_provider_integration_test.rs.
    let mut context = ContextWindow::new(200_000);
-    context.add_message(Message::new(
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
        MessageRole::System,
        "You are helpful.".to_string(),
    ));
    context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
    context.add_message(Message::new(
        MessageRole::Assistant,
@@ -325,64 +222,31 @@ fn test_fix_prevents_consecutive_assistant_messages() {
        "Tool result: file1.txt".to_string(),
    ));
-    // Simulate the fix: use a flag to track if message was added
+    // THE BUG: Summary added TWICE
    let mut assistant_message_added = false;
    let summary = "Here are the files: file1.txt".to_string();
    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
    context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
-    // First add location (simulating line ~2529 in the fix)
+    // Verify the bug pattern
-    if !assistant_message_added {
+    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
-        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
+    assert!(consecutive.is_some(), "Bug scenario should have consecutive assistant messages");
-        assistant_message_added = true;
+    assert_eq!(consecutive, Some((4, 5)));
    }
-    // Second add location (simulating line ~2772 in the fix)
+    // The content is duplicated
    // This would have added a duplicate before the fix
    if !assistant_message_added {
        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
        // assistant_message_added = true; // Not needed since we return after
    }
    // Verify the fix works
    assert!(
        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
        "Fix should prevent consecutive assistant messages"
    );
    assert_eq!(
-        count_assistant_messages(&context.conversation_history),
+        context.conversation_history[4].content,
-        2,
+        context.conversation_history[5].content,
-        "Should have exactly 2 assistant messages (tool call + summary)"
+        "Bug: consecutive messages have identical content"
    );
 }
-/// Test edge case: empty response should not add message
+// =============================================================================
-#[test]
+// Invariant Tests
-fn test_empty_response_not_added() {
+// =============================================================================
    let mut context = ContextWindow::new(200_000);
    context.add_message(Message::new(
        MessageRole::System,
        "You are helpful.".to_string(),
    ));
    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
    context.add_message(Message::new(MessageRole::Assistant, "Hi!".to_string()));
    // Simulate the check: don't add empty/whitespace responses
    let empty_response = "   ".to_string();
    if !empty_response.trim().is_empty() {
        context.add_message(Message::new(MessageRole::Assistant, empty_response));
    }
    // Should still have only 3 messages
    assert_eq!(context.conversation_history.len(), 3);
    assert!(
        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
        "Empty response should not create consecutive messages"
    );
 }
 /// Test the invariant: conversation should always alternate user/assistant after system
 #[test]
 fn test_alternating_pattern_invariant() {
    // After the system message, conversation should alternate user/assistant
    let history = vec![
        Message::new(MessageRole::System, "System".to_string()),
        Message::new(MessageRole::User, "Q1".to_string()),
@@ -393,7 +257,6 @@ fn test_alternating_pattern_invariant() {
        Message::new(MessageRole::Assistant, "A3".to_string()),
    ];
    // Verify alternating pattern
    for i in 1..history.len() - 1 {
        let current = &history[i].role;
        let next = &history[i + 1].role;
@@ -414,3 +277,93 @@ fn test_alternating_pattern_invariant() {
        }
    }
 }
 #[test]
 fn test_fix_prevents_consecutive_messages() {
    // This simulates what the fixed code does: use a flag to track if message was added
    let mut context = ContextWindow::new(200_000);
    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
    context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
    context.add_message(Message::new(
        MessageRole::Assistant,
        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
    ));
    context.add_message(Message::new(
        MessageRole::User,
        "Tool result: file1.txt".to_string(),
    ));
    // Simulate the fix: use a flag to track if message was added
    let mut assistant_message_added = false;
    let summary = "Here are the files: file1.txt".to_string();
    // First add location
    if !assistant_message_added {
        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
        assistant_message_added = true;
    }
    // Second add location (would have added duplicate before the fix)
    if !assistant_message_added {
        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
    }
    assert!(
        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
        "Fix should prevent consecutive assistant messages"
    );
    assert_eq!(
        count_assistant_messages(&context.conversation_history),
        2,
        "Should have exactly 2 assistant messages (tool call + summary)"
    );
 }
 // =============================================================================
 // Missing Assistant Message Fallback Tests
 // =============================================================================
 #[test]
 fn test_fallback_to_current_response_when_raw_empty() {
    // Simulate the scenario where raw_clean is empty but current_response has content
    let current_response = "Here's my helpful response!";
    let raw_clean = "";
    let content_to_save = if !raw_clean.trim().is_empty() {
        raw_clean.to_string()
    } else {
        current_response.to_string()
    };
    assert_eq!(content_to_save, current_response);
    assert!(!content_to_save.is_empty());
 }
 #[test]
 fn test_prefer_raw_clean_when_available() {
    let current_response = "Filtered response";
    let raw_clean = "Raw response with tool JSON";
    let content_to_save = if !raw_clean.trim().is_empty() {
        raw_clean.to_string()
    } else {
        current_response.to_string()
    };
    assert_eq!(content_to_save, raw_clean);
 }
 #[test]
 fn test_whitespace_raw_clean_triggers_fallback() {
    let current_response = "Actual content";
    let raw_clean = "   \n\t  ";
    let content_to_save = if !raw_clean.trim().is_empty() {
        raw_clean.to_string()
    } else {
        current_response.to_string()
    };
    assert_eq!(content_to_save, current_response);
 }
--- a/crates/g3-core/tests/consecutive_assistant_message_test.rs
+++ b/crates/g3-core/tests/consecutive_assistant_message_test.rs
@@ -1,248 +0,0 @@
 //! Tests for consecutive assistant message bug
 //!
 //! This test verifies that the streaming completion logic does not add
 //! consecutive assistant messages to the conversation history.
 //!
 //! Bug description:
 //! When tools are executed in previous iterations (any_tool_executed=true)
 //! and the current iteration finishes without executing a tool (tool_executed=false),
 //! the assistant message was being added twice:
 //! 1. At line ~2529 when breaking to auto-continue (if any_tool_executed && !current_response.is_empty())
 //! 2. At line ~2772 when returning (if !current_response.is_empty())
 //!
 //! This creates consecutive assistant messages in the conversation history,
 //! which causes the LLM to see duplicated content and mimic the pattern,
 //! leading to compounding duplication over time.
 use g3_core::context_window::ContextWindow;
 use g3_providers::{Message, MessageRole};
 /// Helper to check if conversation history has consecutive assistant messages
 fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
    for i in 0..history.len().saturating_sub(1) {
        if matches!(history[i].role, MessageRole::Assistant)
            && matches!(history[i + 1].role, MessageRole::Assistant)
        {
            return Some((i, i + 1));
        }
    }
    None
 }
 /// Test that a well-formed conversation has no consecutive assistant messages
 #[test]
 fn test_no_consecutive_assistant_messages_in_normal_flow() {
    let mut context = ContextWindow::new(200_000);
    // Simulate a normal conversation flow
    context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
    context.add_message(Message::new(MessageRole::User, "Task: Hello".to_string()));
    context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
    context.add_message(Message::new(MessageRole::User, "Task: How are you?".to_string()));
    context.add_message(Message::new(MessageRole::Assistant, "I'm doing well!".to_string()));
    assert!(
        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
        "Normal conversation should not have consecutive assistant messages"
    );
 }
 /// Test that detects the bug: consecutive assistant messages
 /// This test SHOULD FAIL until the bug is fixed
 #[test]
 fn test_detect_consecutive_assistant_messages_bug() {
    let mut context = ContextWindow::new(200_000);
    // Simulate the buggy flow:
    // 1. System message
    context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
    // 2. User asks a question
    context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
    // 3. Assistant responds with tool call (tool execution adds this)
    context.add_message(Message::new(
        MessageRole::Assistant,
        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
    ));
    // 4. Tool result
    context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
    // 5. Assistant provides summary - THIS IS WHERE THE BUG OCCURS
    // The bug adds this message TWICE
    let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
    // Simulate the bug: message added at line 2529 (when breaking to auto-continue)
    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
    // Simulate the bug: message added AGAIN at line 2772 (when returning)
    // This is the bug - the same message is added twice!
    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
    // This assertion verifies the bug exists
    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
    assert!(
        consecutive.is_some(),
        "Bug reproduction: should have consecutive assistant messages at indices {:?}",
        consecutive
    );
    // Verify the indices
    let (idx1, idx2) = consecutive.unwrap();
    assert_eq!(idx1, 4, "First consecutive assistant message should be at index 4");
    assert_eq!(idx2, 5, "Second consecutive assistant message should be at index 5");
    // Verify the content is duplicated
    assert_eq!(
        context.conversation_history[idx1].content,
        context.conversation_history[idx2].content,
        "Consecutive assistant messages should have identical content (the bug)"
    );
 }
 /// Test that validates the invariant: no consecutive assistant messages should exist
 /// This is the test that should PASS after the bug is fixed
 #[test]
 fn test_invariant_no_consecutive_assistant_messages() {
    // This test documents the invariant that should hold
    // After the bug is fixed, this test should pass
    let mut context = ContextWindow::new(200_000);
    // Build a conversation that would trigger the bug
    context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
    context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
    context.add_message(Message::new(
        MessageRole::Assistant,
        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
    ));
    context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
    // After the fix, only ONE assistant message should be added
    let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
    context.add_message(Message::new(MessageRole::Assistant, summary));
    // Invariant: no consecutive assistant messages
    assert!(
        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
        "Invariant: conversation history should never have consecutive assistant messages"
    );
 }
 /// Test helper function works correctly
 #[test]
 fn test_consecutive_detection_helper() {
    // Test with no consecutive
    let history = vec![
        Message::new(MessageRole::User, "Hi".to_string()),
        Message::new(MessageRole::Assistant, "Hello".to_string()),
        Message::new(MessageRole::User, "Bye".to_string()),
    ];
    assert!(has_consecutive_assistant_messages(&history).is_none());
    // Test with consecutive at start
    let history = vec![
        Message::new(MessageRole::Assistant, "Hello".to_string()),
        Message::new(MessageRole::Assistant, "Hello again".to_string()),
        Message::new(MessageRole::User, "Hi".to_string()),
    ];
    assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
    // Test with consecutive in middle
    let history = vec![
        Message::new(MessageRole::User, "Hi".to_string()),
        Message::new(MessageRole::Assistant, "Hello".to_string()),
        Message::new(MessageRole::Assistant, "Hello again".to_string()),
        Message::new(MessageRole::User, "Bye".to_string()),
    ];
    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
    // Test with consecutive at end
    let history = vec![
        Message::new(MessageRole::User, "Hi".to_string()),
        Message::new(MessageRole::Assistant, "Hello".to_string()),
        Message::new(MessageRole::Assistant, "Hello again".to_string()),
    ];
    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
 }
 /// Test that simulates the exact session log pattern from the bug report
 #[test]
 fn test_session_log_pattern_from_bug_report() {
    // This test recreates the exact pattern seen in the butler session log:
    // - Index 368: assistant message with duplicated content
    // - Index 369: assistant message with same duplicated content (consecutive!)
    let mut context = ContextWindow::new(200_000);
    // Simulate the conversation leading up to the bug
    context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
    // ... many messages ...
    for i in 0..50 {
        context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
    }
    // Now simulate the buggy pattern
    context.add_message(Message::new(MessageRole::User, "Task: move appa_music into appa".to_string()));
    // Tool call
    context.add_message(Message::new(
        MessageRole::Assistant,
        r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
    ));
    context.add_message(Message::new(MessageRole::User, "Tool result: ✓ Moved".to_string()));
    // The bug: two consecutive assistant messages with duplicated content
    let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?\n\nDone! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
    // First add (line 2529)
    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
    // Second add (line 2772) - THE BUG
    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
    // Verify the bug pattern exists
    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
    assert!(
        consecutive.is_some(),
        "Should detect consecutive assistant messages from bug pattern"
    );
    // The consecutive messages should be the last two
    let (idx1, idx2) = consecutive.unwrap();
    let history_len = context.conversation_history.len();
    assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
    assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
 }
 /// Test that the context window could have a guard against consecutive assistant messages
 /// This is a proposed fix validation test
 #[test]
 fn test_proposed_fix_guard_against_consecutive() {
    let mut context = ContextWindow::new(200_000);
    context.add_message(Message::new(MessageRole::System, "System".to_string()));
    context.add_message(Message::new(MessageRole::User, "User".to_string()));
    context.add_message(Message::new(MessageRole::Assistant, "First response".to_string()));
    // Proposed fix: before adding an assistant message, check if the last message
    // is also an assistant message. If so, either:
    // 1. Skip adding (if content is identical)
    // 2. Merge the content (if content is different)
    // 3. Log a warning and skip (safest option)
    let last_message = context.conversation_history.last();
    let is_last_assistant = last_message
        .map(|m| matches!(m.role, MessageRole::Assistant))
        .unwrap_or(false);
    assert!(
        is_last_assistant,
        "Last message should be assistant for this test"
    );
    // The fix would check this condition before adding
    // and skip adding if true (to prevent consecutive assistant messages)
 }
--- a/crates/g3-core/tests/early_return_path_test.rs
+++ b/crates/g3-core/tests/early_return_path_test.rs
@@ -1,192 +0,0 @@
 //! Tests for the early return path in stream_completion_with_tools
 //!
 //! This test documents the bug fix for missing assistant messages when:
 //! - The LLM responds with text only (no tool calls)
 //! - any_tool_executed = false (no tools in previous iterations)
 //! - The stream finishes and hits the early return path
 //!
 //! The fix adds assistant message saving before the early return at ~line 2535.
 use g3_core::context_window::ContextWindow;
 use g3_providers::{Message, MessageRole};
 /// Simulates the early return path logic
 /// This mirrors the code at lines 2535-2555 in lib.rs
 fn simulate_early_return_path(
    context_window: &mut ContextWindow,
    current_response: &str,
    assistant_message_added: bool,
    any_tool_executed: bool,
 ) -> bool {
    // This simulates the code path when stream finishes
    if any_tool_executed {
        // If tools were executed in previous iterations, break to finalize
        if !current_response.trim().is_empty() && !assistant_message_added {
            let assistant_msg = Message::new(
                MessageRole::Assistant,
                current_response.to_string(),
            );
            context_window.add_message(assistant_msg);
            return true; // assistant_message_added = true
        }
        return assistant_message_added;
    }
    // THE FIX: Save assistant message before returning (no tools were executed)
    if !current_response.trim().is_empty() && !assistant_message_added {
        let assistant_msg = Message::new(
            MessageRole::Assistant,
            current_response.to_string(),
        );
        context_window.add_message(assistant_msg);
        return true;
    }
    assistant_message_added
 }
 /// Test: Text-only response with no previous tool execution
 /// This is the exact bug scenario from butler session
 #[test]
 fn test_text_only_response_no_tools() {
    let mut context = ContextWindow::new(200_000);
    // Add initial user message
    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
    let current_response = "Phew! 😅 Glad it's back. Sorry about that...";
    let assistant_message_added = false;
    let any_tool_executed = false;
    let was_added = simulate_early_return_path(
        &mut context,
        current_response,
        assistant_message_added,
        any_tool_executed,
    );
    assert!(was_added, "Assistant message should be added");
    let history = context.conversation_history.clone();
    assert_eq!(history.len(), 2, "Should have user + assistant messages");
    assert!(matches!(history[1].role, MessageRole::Assistant));
    assert!(history[1].content.contains("Phew!"));
 }
 /// Test: Text-only response with previous tool execution
 #[test]
 fn test_text_only_response_with_previous_tools() {
    let mut context = ContextWindow::new(200_000);
    context.add_message(Message::new(MessageRole::User, "Run ls".to_string()));
    let current_response = "Here are the files...";
    let assistant_message_added = false;
    let any_tool_executed = true; // Tools were executed in previous iterations
    let was_added = simulate_early_return_path(
        &mut context,
        current_response,
        assistant_message_added,
        any_tool_executed,
    );
    assert!(was_added, "Assistant message should be added");
    let history = context.conversation_history.clone();
    assert_eq!(history.len(), 2);
    assert!(matches!(history[1].role, MessageRole::Assistant));
 }
 /// Test: Empty response should not add message
 #[test]
 fn test_empty_response_not_added() {
    let mut context = ContextWindow::new(200_000);
    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
    let current_response = "   "; // Whitespace only
    let assistant_message_added = false;
    let any_tool_executed = false;
    let was_added = simulate_early_return_path(
        &mut context,
        current_response,
        assistant_message_added,
        any_tool_executed,
    );
    assert!(!was_added, "Empty response should not be added");
    let history = context.conversation_history.clone();
    assert_eq!(history.len(), 1, "Should only have user message");
 }
 /// Test: Already added flag prevents duplication
 #[test]
 fn test_already_added_prevents_duplication() {
    let mut context = ContextWindow::new(200_000);
    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
    context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
    let current_response = "Hi there!";
    let assistant_message_added = true; // Already added
    let any_tool_executed = false;
    let was_added = simulate_early_return_path(
        &mut context,
        current_response,
        assistant_message_added,
        any_tool_executed,
    );
    assert!(was_added, "Flag should remain true");
    let history = context.conversation_history.clone();
    assert_eq!(history.len(), 2, "Should not add duplicate");
 }
 /// Test: Consecutive user messages scenario (the bug)
 /// Before the fix, this would result in consecutive user messages
 #[test]
 fn test_bug_scenario_consecutive_user_messages() {
    let mut context = ContextWindow::new(200_000);
    // Simulate the butler session bug:
    // Message 80: Tool result (user)
    // Message 81: User input (user) - SHOULD have assistant between!
    // Message 82: User input (user)
    context.add_message(Message::new(MessageRole::User, "Tool result: Self care".to_string()));
    // Simulate assistant response that was displayed but not saved (THE BUG)
    let current_response = "Phew! 😅 Glad it's back...";
    let assistant_message_added = false;
    let any_tool_executed = false;
    // THE FIX: This should now save the assistant message
    simulate_early_return_path(
        &mut context,
        current_response,
        assistant_message_added,
        any_tool_executed,
    );
    // Now add the next user message
    context.add_message(Message::new(MessageRole::User, "Task: Ok it's back...".to_string()));
    let history = context.conversation_history.clone();
    // Verify no consecutive user messages
    for i in 0..history.len().saturating_sub(1) {
        let current_is_user = matches!(history[i].role, MessageRole::User);
        let next_is_user = matches!(history[i + 1].role, MessageRole::User);
        assert!(
            !(current_is_user && next_is_user),
            "Found consecutive user messages at positions {} and {}",
            i, i + 1
        );
    }
 }
--- a/crates/g3-core/tests/missing_assistant_message_test.rs
+++ b/crates/g3-core/tests/missing_assistant_message_test.rs
@@ -1,100 +0,0 @@
 //! Tests for the missing assistant message bug fix.
 //!
 //! Bug: When the LLM responds with text-only (no tool calls), the assistant message
 //! was sometimes not saved to the context window because the code checked
 //! `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
 //! If the parser buffer was empty/different from current_response, no message was added.
 //!
 //! Fix: Use current_response as fallback when raw_clean is empty.
 /// Test that the fix ensures assistant messages are always added when there's content.
 /// This simulates the fixed behavior where current_response is used as fallback.
 #[test]
 fn test_fallback_to_current_response_when_raw_empty() {
    // Simulate the scenario:
    // - current_response has content (what was displayed)
    // - raw_clean is empty (parser buffer was cleared)
    // - The fix should use current_response as fallback
    let current_response = "Here's my helpful response!";
    let raw_clean = ""; // Parser buffer is empty
    // The fix logic:
    let content_to_save = if !raw_clean.trim().is_empty() {
        raw_clean.to_string()
    } else {
        current_response.to_string()
    };
    // Verify fallback works
    assert_eq!(content_to_save, current_response);
    assert!(!content_to_save.is_empty());
 }
 /// Test that raw_clean is preferred when available.
 #[test]
 fn test_prefer_raw_clean_when_available() {
    let current_response = "Filtered response"; // What was displayed (filtered)
    let raw_clean = "Raw response with {\"tool\": ...} JSON"; // Raw content
    // The fix logic:
    let content_to_save = if !raw_clean.trim().is_empty() {
        raw_clean.to_string()
    } else {
        current_response.to_string()
    };
    // Verify raw_clean is preferred
    assert_eq!(content_to_save, raw_clean);
 }
 /// Test that whitespace-only raw_clean triggers fallback.
 #[test]
 fn test_whitespace_raw_clean_triggers_fallback() {
    let current_response = "Actual content";
    let raw_clean = "   \n\t  "; // Whitespace only
    // The fix logic:
    let content_to_save = if !raw_clean.trim().is_empty() {
        raw_clean.to_string()
    } else {
        current_response.to_string()
    };
    // Verify fallback to current_response
    assert_eq!(content_to_save, current_response);
 }
 /// Test that the fix logic handles various edge cases.
 #[test]
 fn test_fix_logic_edge_cases() {
    // Test case 1: Both have content - prefer raw
    let current = "displayed";
    let raw = "raw content";
    let result = if !raw.trim().is_empty() { raw } else { current };
    assert_eq!(result, "raw content");
    // Test case 2: Raw is empty - use current
    let current = "displayed";
    let raw = "";
    let result = if !raw.trim().is_empty() { raw } else { current };
    assert_eq!(result, "displayed");
    // Test case 3: Raw is whitespace - use current
    let current = "displayed";
    let raw = "  \n  ";
    let result = if !raw.trim().is_empty() { raw } else { current };
    assert_eq!(result, "displayed");
    // Test case 4: Both empty - still returns current (empty)
    let current = "";
    let raw = "";
    let result = if !raw.trim().is_empty() { raw } else { current };
    assert_eq!(result, "");
    // Test case 5: Current has content, raw has only newlines
    let current = "Hello world!";
    let raw = "\n\n\n";
    let result = if !raw.trim().is_empty() { raw } else { current };
    assert_eq!(result, "Hello world!");
 }