From 6a5ce11e7b02b74efc6b94e5eb0819af0e7e1cb1 Mon Sep 17 00:00:00 2001
From: "Dhanji R. Prasanna" <d@wideplay.com>
Date: Wed, 21 Jan 2026 10:27:07 +0530
Subject: [PATCH] Consolidate redundant assistant message test files

Deleted 4 redundant test files (~956 lines):
- assistant_message_dedup_test.rs (416 lines, 12 tests)
- consecutive_assistant_message_test.rs (248 lines, 6 tests)
- missing_assistant_message_test.rs (100 lines, 4 tests)
- early_return_path_test.rs (192 lines, 5 tests) - whitebox test

Created consolidated assistant_message_test.rs (369 lines, 14 tests):
- Helper function tests for consecutive message detection
- ContextWindow unit tests for normal and tool execution flows
- Bug demonstration tests documenting what bugs looked like
- Invariant tests for user/assistant alternation
- Missing assistant message fallback logic tests

The early_return_path_test was removed because it:
- Referenced specific line numbers in production code (brittle)
- Reimplemented internal logic (whitebox anti-pattern)
- Duplicated coverage from mock_provider_integration_test.rs

All 729 g3-core tests pass.
---
 ...edup_test.rs => assistant_message_test.rs} | 355 ++++++++----------
 .../consecutive_assistant_message_test.rs     | 248 ------------
 .../g3-core/tests/early_return_path_test.rs   | 192 ----------
 .../tests/missing_assistant_message_test.rs   | 100 -----
 4 files changed, 154 insertions(+), 741 deletions(-)
 rename crates/g3-core/tests/{assistant_message_dedup_test.rs => assistant_message_test.rs} (59%)
 delete mode 100644 crates/g3-core/tests/consecutive_assistant_message_test.rs
 delete mode 100644 crates/g3-core/tests/early_return_path_test.rs
 delete mode 100644 crates/g3-core/tests/missing_assistant_message_test.rs

diff --git a/crates/g3-core/tests/assistant_message_dedup_test.rs b/crates/g3-core/tests/assistant_message_test.rs
similarity index 59%
rename from crates/g3-core/tests/assistant_message_dedup_test.rs
rename to crates/g3-core/tests/assistant_message_test.rs
index 2e0138a..353fee2 100644
--- a/crates/g3-core/tests/assistant_message_dedup_test.rs
+++ b/crates/g3-core/tests/assistant_message_test.rs
@@ -1,16 +1,20 @@
-//! Tests for Assistant Message Deduplication
+//! Tests for Assistant Message Handling
 //!
-//! These tests verify that the conversation history does NOT have
-//! consecutive assistant messages, which was a bug that caused
-//! compounding duplication in LLM responses.
+//! These tests verify correct handling of assistant messages in conversation history:
+//! 1. No consecutive assistant messages (deduplication bug)
+//! 2. Proper user/assistant alternation
+//! 3. Missing assistant message fallback logic
 //!
-//! Bug fixed: When tools were executed in previous iterations (any_tool_executed=true)
-//! and the current iteration finished without executing a tool (tool_executed=false),
-//! the assistant message was being added twice:
-//! 1. At the break point when any_tool_executed is true
-//! 2. At the return point when !tool_executed
+//! The original bugs:
+//! - Consecutive assistant messages: When tools were executed in previous iterations
+//!   and the current iteration finished without executing a tool, the assistant
+//!   message was being added twice.
+//! - Missing assistant messages: When the LLM responded with text-only (no tool calls),
+//!   the assistant message was sometimes not saved because the code checked
+//!   `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
 //!
-//! The fix uses an `assistant_message_added` flag to ensure only one add occurs.
+//! These bugs are now tested through the public API in mock_provider_integration_test.rs.
+//! This file contains unit tests for the helper functions and invariants.
 
 use g3_core::context_window::ContextWindow;
 use g3_providers::{Message, MessageRole};
@@ -19,7 +23,8 @@ use g3_providers::{Message, MessageRole};
 // Helper Functions
 // =============================================================================
 
-/// Check if conversation history has consecutive assistant messages
+/// Check if conversation history has consecutive assistant messages.
+/// Returns the indices of the first consecutive pair found, if any.
 fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
     for i in 0..history.len().saturating_sub(1) {
         if matches!(history[i].role, MessageRole::Assistant)
@@ -40,43 +45,48 @@ fn count_assistant_messages(history: &[Message]) -> usize {
 }
 
 // =============================================================================
-// Unit Tests for Helper Functions
+// Helper Function Tests
 // =============================================================================
 
 #[test]
-fn test_has_consecutive_assistant_messages_none() {
+fn test_consecutive_detection_no_consecutive() {
     let history = vec![
-        Message::new(MessageRole::System, "System".to_string()),
         Message::new(MessageRole::User, "Hi".to_string()),
         Message::new(MessageRole::Assistant, "Hello".to_string()),
         Message::new(MessageRole::User, "Bye".to_string()),
-        Message::new(MessageRole::Assistant, "Goodbye".to_string()),
     ];
     assert!(has_consecutive_assistant_messages(&history).is_none());
 }
 
 #[test]
-fn test_has_consecutive_assistant_messages_found_middle() {
+fn test_consecutive_detection_at_start() {
     let history = vec![
-        Message::new(MessageRole::System, "System".to_string()),
-        Message::new(MessageRole::User, "Hi".to_string()),
         Message::new(MessageRole::Assistant, "Hello".to_string()),
-        Message::new(MessageRole::Assistant, "Hello again".to_string()), // BUG!
-        Message::new(MessageRole::User, "Bye".to_string()),
+        Message::new(MessageRole::Assistant, "Hello again".to_string()),
+        Message::new(MessageRole::User, "Hi".to_string()),
     ];
-    assert_eq!(has_consecutive_assistant_messages(&history), Some((2, 3)));
+    assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
 }
 
 #[test]
-fn test_has_consecutive_assistant_messages_found_end() {
+fn test_consecutive_detection_in_middle() {
     let history = vec![
         Message::new(MessageRole::User, "Hi".to_string()),
         Message::new(MessageRole::Assistant, "Hello".to_string()),
+        Message::new(MessageRole::Assistant, "Hello again".to_string()),
         Message::new(MessageRole::User, "Bye".to_string()),
-        Message::new(MessageRole::Assistant, "Goodbye".to_string()),
-        Message::new(MessageRole::Assistant, "Goodbye again".to_string()), // BUG!
     ];
-    assert_eq!(has_consecutive_assistant_messages(&history), Some((3, 4)));
+    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
+}
+
+#[test]
+fn test_consecutive_detection_at_end() {
+    let history = vec![
+        Message::new(MessageRole::User, "Hi".to_string()),
+        Message::new(MessageRole::Assistant, "Hello".to_string()),
+        Message::new(MessageRole::Assistant, "Hello again".to_string()),
+    ];
+    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
 }
 
 #[test]
@@ -95,15 +105,11 @@ fn test_count_assistant_messages() {
 // ContextWindow Unit Tests
 // =============================================================================
 
-/// Test that ContextWindow correctly tracks messages in normal flow
 #[test]
-fn test_context_window_normal_flow() {
+fn test_normal_conversation_flow() {
     let mut context = ContextWindow::new(200_000);
 
-    context.add_message(Message::new(
-        MessageRole::System,
-        "You are helpful.".to_string(),
-    ));
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
     context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
     context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
     context.add_message(Message::new(MessageRole::User, "How are you?".to_string()));
@@ -117,16 +123,11 @@ fn test_context_window_normal_flow() {
     );
 }
 
-/// Test that simulates the correct flow after tool execution
 #[test]
-fn test_context_window_tool_execution_correct_flow() {
+fn test_tool_execution_correct_flow() {
     let mut context = ContextWindow::new(200_000);
 
-    // Setup
-    context.add_message(Message::new(
-        MessageRole::System,
-        "You are helpful.".to_string(),
-    ));
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
     context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
 
     // Tool call (assistant message with tool JSON)
@@ -134,7 +135,7 @@ fn test_context_window_tool_execution_correct_flow() {
         MessageRole::Assistant,
         r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
     ));
-    
+
     // Tool result (user message)
     context.add_message(Message::new(
         MessageRole::User,
@@ -147,7 +148,6 @@ fn test_context_window_tool_execution_correct_flow() {
         "Here are the files: file1.txt, file2.txt".to_string(),
     ));
 
-    // Verify structure
     assert_eq!(context.conversation_history.len(), 5);
     assert_eq!(count_assistant_messages(&context.conversation_history), 2);
     assert!(
@@ -156,58 +156,11 @@ fn test_context_window_tool_execution_correct_flow() {
     );
 }
 
-/// Test that demonstrates the bug scenario (what the bug looked like)
 #[test]
-fn test_context_window_bug_scenario_demonstration() {
+fn test_multiple_tools_correct_flow() {
     let mut context = ContextWindow::new(200_000);
 
-    // Setup
-    context.add_message(Message::new(
-        MessageRole::System,
-        "You are helpful.".to_string(),
-    ));
-    context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
-
-    // Tool call
-    context.add_message(Message::new(
-        MessageRole::Assistant,
-        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
-    ));
-    context.add_message(Message::new(
-        MessageRole::User,
-        "Tool result: file1.txt".to_string(),
-    ));
-
-    // THE BUG: Summary added TWICE
-    let summary = "Here are the files: file1.txt".to_string();
-    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
-    context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
-
-    // This demonstrates what the bug looked like
-    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
-    assert!(
-        consecutive.is_some(),
-        "Bug scenario should have consecutive assistant messages"
-    );
-    assert_eq!(consecutive, Some((4, 5)));
-    
-    // The content is duplicated
-    assert_eq!(
-        context.conversation_history[4].content,
-        context.conversation_history[5].content,
-        "Bug: consecutive messages have identical content"
-    );
-}
-
-/// Test multiple tool executions followed by summary (correct flow)
-#[test]
-fn test_context_window_multiple_tools_correct_flow() {
-    let mut context = ContextWindow::new(200_000);
-
-    context.add_message(Message::new(
-        MessageRole::System,
-        "You are helpful.".to_string(),
-    ));
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
     context.add_message(Message::new(
         MessageRole::User,
         "List files and show current directory".to_string(),
@@ -239,7 +192,6 @@ fn test_context_window_multiple_tools_correct_flow() {
         "Files: file1.txt, file2.txt. Current directory: /home/user".to_string(),
     ));
 
-    // Verify structure
     // System + User + (Assistant + User) * 2 + Assistant = 1 + 1 + 4 + 1 = 7
     assert_eq!(context.conversation_history.len(), 7);
     assert_eq!(count_assistant_messages(&context.conversation_history), 3);
@@ -249,72 +201,17 @@ fn test_context_window_multiple_tools_correct_flow() {
     );
 }
 
-/// Test the exact session log pattern from the original bug report
-#[test]
-fn test_session_log_pattern_from_bug_report() {
-    // This test recreates the exact pattern seen in the butler session log:
-    // - Index N: assistant message with content
-    // - Index N+1: assistant message with same content (consecutive!)
-    
-    let mut context = ContextWindow::new(200_000);
-    
-    // Simulate a conversation with many messages
-    context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
-    
-    // Add some conversation history
-    for i in 0..10 {
-        context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
-        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
-    }
-    
-    // Now simulate the buggy pattern
-    context.add_message(Message::new(
-        MessageRole::User,
-        "Task: move appa_music into appa".to_string(),
-    ));
-    
-    // Tool call
-    context.add_message(Message::new(
-        MessageRole::Assistant,
-        r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
-    ));
-    context.add_message(Message::new(
-        MessageRole::User,
-        "Tool result: ✓ Moved".to_string(),
-    ));
-    
-    // The bug: two consecutive assistant messages with duplicated content
-    let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
-    
-    // First add (correct)
-    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
-    // Second add (THE BUG)
-    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
-    
-    // Verify the bug pattern exists
-    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
-    assert!(
-        consecutive.is_some(),
-        "Should detect consecutive assistant messages from bug pattern"
-    );
-    
-    // The consecutive messages should be the last two
-    let (idx1, idx2) = consecutive.unwrap();
-    let history_len = context.conversation_history.len();
-    assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
-    assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
-}
+// =============================================================================
+// Bug Demonstration Tests (document what the bugs looked like)
+// =============================================================================
 
-/// Test that the fix prevents consecutive assistant messages
-/// This simulates what the fixed code does: check before adding
 #[test]
-fn test_fix_prevents_consecutive_assistant_messages() {
+fn test_bug_demonstration_consecutive_messages() {
+    // This test demonstrates what the consecutive message bug looked like.
+    // The bug is now fixed and tested through mock_provider_integration_test.rs.
     let mut context = ContextWindow::new(200_000);
 
-    context.add_message(Message::new(
-        MessageRole::System,
-        "You are helpful.".to_string(),
-    ));
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
     context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
     context.add_message(Message::new(
         MessageRole::Assistant,
@@ -325,64 +222,31 @@ fn test_fix_prevents_consecutive_assistant_messages() {
         "Tool result: file1.txt".to_string(),
     ));
 
-    // Simulate the fix: use a flag to track if message was added
-    let mut assistant_message_added = false;
+    // THE BUG: Summary added TWICE
     let summary = "Here are the files: file1.txt".to_string();
+    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
+    context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
 
-    // First add location (simulating line ~2529 in the fix)
-    if !assistant_message_added {
-        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
-        assistant_message_added = true;
-    }
+    // Verify the bug pattern
+    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
+    assert!(consecutive.is_some(), "Bug scenario should have consecutive assistant messages");
+    assert_eq!(consecutive, Some((4, 5)));
 
-    // Second add location (simulating line ~2772 in the fix)
-    // This would have added a duplicate before the fix
-    if !assistant_message_added {
-        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
-        // assistant_message_added = true; // Not needed since we return after
-    }
-
-    // Verify the fix works
-    assert!(
-        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
-        "Fix should prevent consecutive assistant messages"
-    );
+    // The content is duplicated
     assert_eq!(
-        count_assistant_messages(&context.conversation_history),
-        2,
-        "Should have exactly 2 assistant messages (tool call + summary)"
+        context.conversation_history[4].content,
+        context.conversation_history[5].content,
+        "Bug: consecutive messages have identical content"
     );
 }
 
-/// Test edge case: empty response should not add message
-#[test]
-fn test_empty_response_not_added() {
-    let mut context = ContextWindow::new(200_000);
+// =============================================================================
+// Invariant Tests
+// =============================================================================
 
-    context.add_message(Message::new(
-        MessageRole::System,
-        "You are helpful.".to_string(),
-    ));
-    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
-    context.add_message(Message::new(MessageRole::Assistant, "Hi!".to_string()));
-
-    // Simulate the check: don't add empty/whitespace responses
-    let empty_response = "   ".to_string();
-    if !empty_response.trim().is_empty() {
-        context.add_message(Message::new(MessageRole::Assistant, empty_response));
-    }
-
-    // Should still have only 3 messages
-    assert_eq!(context.conversation_history.len(), 3);
-    assert!(
-        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
-        "Empty response should not create consecutive messages"
-    );
-}
-
-/// Test the invariant: conversation should always alternate user/assistant after system
 #[test]
 fn test_alternating_pattern_invariant() {
+    // After the system message, conversation should alternate user/assistant
     let history = vec![
         Message::new(MessageRole::System, "System".to_string()),
         Message::new(MessageRole::User, "Q1".to_string()),
@@ -393,7 +257,6 @@ fn test_alternating_pattern_invariant() {
         Message::new(MessageRole::Assistant, "A3".to_string()),
     ];
 
-    // Verify alternating pattern
     for i in 1..history.len() - 1 {
         let current = &history[i].role;
         let next = &history[i + 1].role;
@@ -414,3 +277,93 @@ fn test_alternating_pattern_invariant() {
         }
     }
 }
+
+#[test]
+fn test_fix_prevents_consecutive_messages() {
+    // This simulates what the fixed code does: use a flag to track if message was added
+    let mut context = ContextWindow::new(200_000);
+
+    context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
+    context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
+    context.add_message(Message::new(
+        MessageRole::Assistant,
+        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
+    ));
+    context.add_message(Message::new(
+        MessageRole::User,
+        "Tool result: file1.txt".to_string(),
+    ));
+
+    // Simulate the fix: use a flag to track if message was added
+    let mut assistant_message_added = false;
+    let summary = "Here are the files: file1.txt".to_string();
+
+    // First add location
+    if !assistant_message_added {
+        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
+        assistant_message_added = true;
+    }
+
+    // Second add location (would have added duplicate before the fix)
+    if !assistant_message_added {
+        context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
+    }
+
+    assert!(
+        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
+        "Fix should prevent consecutive assistant messages"
+    );
+    assert_eq!(
+        count_assistant_messages(&context.conversation_history),
+        2,
+        "Should have exactly 2 assistant messages (tool call + summary)"
+    );
+}
+
+// =============================================================================
+// Missing Assistant Message Fallback Tests
+// =============================================================================
+
+#[test]
+fn test_fallback_to_current_response_when_raw_empty() {
+    // Simulate the scenario where raw_clean is empty but current_response has content
+    let current_response = "Here's my helpful response!";
+    let raw_clean = "";
+
+    let content_to_save = if !raw_clean.trim().is_empty() {
+        raw_clean.to_string()
+    } else {
+        current_response.to_string()
+    };
+
+    assert_eq!(content_to_save, current_response);
+    assert!(!content_to_save.is_empty());
+}
+
+#[test]
+fn test_prefer_raw_clean_when_available() {
+    let current_response = "Filtered response";
+    let raw_clean = "Raw response with tool JSON";
+
+    let content_to_save = if !raw_clean.trim().is_empty() {
+        raw_clean.to_string()
+    } else {
+        current_response.to_string()
+    };
+
+    assert_eq!(content_to_save, raw_clean);
+}
+
+#[test]
+fn test_whitespace_raw_clean_triggers_fallback() {
+    let current_response = "Actual content";
+    let raw_clean = "   \n\t  ";
+
+    let content_to_save = if !raw_clean.trim().is_empty() {
+        raw_clean.to_string()
+    } else {
+        current_response.to_string()
+    };
+
+    assert_eq!(content_to_save, current_response);
+}
diff --git a/crates/g3-core/tests/consecutive_assistant_message_test.rs b/crates/g3-core/tests/consecutive_assistant_message_test.rs
deleted file mode 100644
index d6616dd..0000000
--- a/crates/g3-core/tests/consecutive_assistant_message_test.rs
+++ /dev/null
@@ -1,248 +0,0 @@
-//! Tests for consecutive assistant message bug
-//!
-//! This test verifies that the streaming completion logic does not add
-//! consecutive assistant messages to the conversation history.
-//!
-//! Bug description:
-//! When tools are executed in previous iterations (any_tool_executed=true)
-//! and the current iteration finishes without executing a tool (tool_executed=false),
-//! the assistant message was being added twice:
-//! 1. At line ~2529 when breaking to auto-continue (if any_tool_executed && !current_response.is_empty())
-//! 2. At line ~2772 when returning (if !current_response.is_empty())
-//!
-//! This creates consecutive assistant messages in the conversation history,
-//! which causes the LLM to see duplicated content and mimic the pattern,
-//! leading to compounding duplication over time.
-
-use g3_core::context_window::ContextWindow;
-use g3_providers::{Message, MessageRole};
-
-/// Helper to check if conversation history has consecutive assistant messages
-fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
-    for i in 0..history.len().saturating_sub(1) {
-        if matches!(history[i].role, MessageRole::Assistant)
-            && matches!(history[i + 1].role, MessageRole::Assistant)
-        {
-            return Some((i, i + 1));
-        }
-    }
-    None
-}
-
-/// Test that a well-formed conversation has no consecutive assistant messages
-#[test]
-fn test_no_consecutive_assistant_messages_in_normal_flow() {
-    let mut context = ContextWindow::new(200_000);
-
-    // Simulate a normal conversation flow
-    context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
-    context.add_message(Message::new(MessageRole::User, "Task: Hello".to_string()));
-    context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
-    context.add_message(Message::new(MessageRole::User, "Task: How are you?".to_string()));
-    context.add_message(Message::new(MessageRole::Assistant, "I'm doing well!".to_string()));
-
-    assert!(
-        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
-        "Normal conversation should not have consecutive assistant messages"
-    );
-}
-
-/// Test that detects the bug: consecutive assistant messages
-/// This test SHOULD FAIL until the bug is fixed
-#[test]
-fn test_detect_consecutive_assistant_messages_bug() {
-    let mut context = ContextWindow::new(200_000);
-
-    // Simulate the buggy flow:
-    // 1. System message
-    context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
-    
-    // 2. User asks a question
-    context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
-    
-    // 3. Assistant responds with tool call (tool execution adds this)
-    context.add_message(Message::new(
-        MessageRole::Assistant,
-        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
-    ));
-    
-    // 4. Tool result
-    context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
-    
-    // 5. Assistant provides summary - THIS IS WHERE THE BUG OCCURS
-    // The bug adds this message TWICE
-    let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
-    
-    // Simulate the bug: message added at line 2529 (when breaking to auto-continue)
-    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
-    
-    // Simulate the bug: message added AGAIN at line 2772 (when returning)
-    // This is the bug - the same message is added twice!
-    context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
-
-    // This assertion verifies the bug exists
-    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
-    assert!(
-        consecutive.is_some(),
-        "Bug reproduction: should have consecutive assistant messages at indices {:?}",
-        consecutive
-    );
-    
-    // Verify the indices
-    let (idx1, idx2) = consecutive.unwrap();
-    assert_eq!(idx1, 4, "First consecutive assistant message should be at index 4");
-    assert_eq!(idx2, 5, "Second consecutive assistant message should be at index 5");
-    
-    // Verify the content is duplicated
-    assert_eq!(
-        context.conversation_history[idx1].content,
-        context.conversation_history[idx2].content,
-        "Consecutive assistant messages should have identical content (the bug)"
-    );
-}
-
-/// Test that validates the invariant: no consecutive assistant messages should exist
-/// This is the test that should PASS after the bug is fixed
-#[test]
-fn test_invariant_no_consecutive_assistant_messages() {
-    // This test documents the invariant that should hold
-    // After the bug is fixed, this test should pass
-    
-    let mut context = ContextWindow::new(200_000);
-
-    // Build a conversation that would trigger the bug
-    context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
-    context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
-    context.add_message(Message::new(
-        MessageRole::Assistant,
-        r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
-    ));
-    context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
-    
-    // After the fix, only ONE assistant message should be added
-    let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
-    context.add_message(Message::new(MessageRole::Assistant, summary));
-    
-    // Invariant: no consecutive assistant messages
-    assert!(
-        has_consecutive_assistant_messages(&context.conversation_history).is_none(),
-        "Invariant: conversation history should never have consecutive assistant messages"
-    );
-}
-
-/// Test helper function works correctly
-#[test]
-fn test_consecutive_detection_helper() {
-    // Test with no consecutive
-    let history = vec![
-        Message::new(MessageRole::User, "Hi".to_string()),
-        Message::new(MessageRole::Assistant, "Hello".to_string()),
-        Message::new(MessageRole::User, "Bye".to_string()),
-    ];
-    assert!(has_consecutive_assistant_messages(&history).is_none());
-
-    // Test with consecutive at start
-    let history = vec![
-        Message::new(MessageRole::Assistant, "Hello".to_string()),
-        Message::new(MessageRole::Assistant, "Hello again".to_string()),
-        Message::new(MessageRole::User, "Hi".to_string()),
-    ];
-    assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
-
-    // Test with consecutive in middle
-    let history = vec![
-        Message::new(MessageRole::User, "Hi".to_string()),
-        Message::new(MessageRole::Assistant, "Hello".to_string()),
-        Message::new(MessageRole::Assistant, "Hello again".to_string()),
-        Message::new(MessageRole::User, "Bye".to_string()),
-    ];
-    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
-
-    // Test with consecutive at end
-    let history = vec![
-        Message::new(MessageRole::User, "Hi".to_string()),
-        Message::new(MessageRole::Assistant, "Hello".to_string()),
-        Message::new(MessageRole::Assistant, "Hello again".to_string()),
-    ];
-    assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
-}
-
-/// Test that simulates the exact session log pattern from the bug report
-#[test]
-fn test_session_log_pattern_from_bug_report() {
-    // This test recreates the exact pattern seen in the butler session log:
-    // - Index 368: assistant message with duplicated content
-    // - Index 369: assistant message with same duplicated content (consecutive!)
-    
-    let mut context = ContextWindow::new(200_000);
-    
-    // Simulate the conversation leading up to the bug
-    context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
-    
-    // ... many messages ...
-    for i in 0..50 {
-        context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
-        context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
-    }
-    
-    // Now simulate the buggy pattern
-    context.add_message(Message::new(MessageRole::User, "Task: move appa_music into appa".to_string()));
-    
-    // Tool call
-    context.add_message(Message::new(
-        MessageRole::Assistant,
-        r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
-    ));
-    context.add_message(Message::new(MessageRole::User, "Tool result: ✓ Moved".to_string()));
-    
-    // The bug: two consecutive assistant messages with duplicated content
-    let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?\n\nDone! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
-    
-    // First add (line 2529)
-    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
-    // Second add (line 2772) - THE BUG
-    context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
-    
-    // Verify the bug pattern exists
-    let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
-    assert!(
-        consecutive.is_some(),
-        "Should detect consecutive assistant messages from bug pattern"
-    );
-    
-    // The consecutive messages should be the last two
-    let (idx1, idx2) = consecutive.unwrap();
-    let history_len = context.conversation_history.len();
-    assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
-    assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
-}
-
-/// Test that the context window could have a guard against consecutive assistant messages
-/// This is a proposed fix validation test
-#[test]
-fn test_proposed_fix_guard_against_consecutive() {
-    let mut context = ContextWindow::new(200_000);
-    
-    context.add_message(Message::new(MessageRole::System, "System".to_string()));
-    context.add_message(Message::new(MessageRole::User, "User".to_string()));
-    context.add_message(Message::new(MessageRole::Assistant, "First response".to_string()));
-    
-    // Proposed fix: before adding an assistant message, check if the last message
-    // is also an assistant message. If so, either:
-    // 1. Skip adding (if content is identical)
-    // 2. Merge the content (if content is different)
-    // 3. Log a warning and skip (safest option)
-    
-    let last_message = context.conversation_history.last();
-    let is_last_assistant = last_message
-        .map(|m| matches!(m.role, MessageRole::Assistant))
-        .unwrap_or(false);
-    
-    assert!(
-        is_last_assistant,
-        "Last message should be assistant for this test"
-    );
-    
-    // The fix would check this condition before adding
-    // and skip adding if true (to prevent consecutive assistant messages)
-}
diff --git a/crates/g3-core/tests/early_return_path_test.rs b/crates/g3-core/tests/early_return_path_test.rs
deleted file mode 100644
index 286fd7a..0000000
--- a/crates/g3-core/tests/early_return_path_test.rs
+++ /dev/null
@@ -1,192 +0,0 @@
-//! Tests for the early return path in stream_completion_with_tools
-//!
-//! This test documents the bug fix for missing assistant messages when:
-//! - The LLM responds with text only (no tool calls)
-//! - any_tool_executed = false (no tools in previous iterations)
-//! - The stream finishes and hits the early return path
-//!
-//! The fix adds assistant message saving before the early return at ~line 2535.
-
-use g3_core::context_window::ContextWindow;
-use g3_providers::{Message, MessageRole};
-
-/// Simulates the early return path logic
-/// This mirrors the code at lines 2535-2555 in lib.rs
-fn simulate_early_return_path(
-    context_window: &mut ContextWindow,
-    current_response: &str,
-    assistant_message_added: bool,
-    any_tool_executed: bool,
-) -> bool {
-    // This simulates the code path when stream finishes
-    
-    if any_tool_executed {
-        // If tools were executed in previous iterations, break to finalize
-        if !current_response.trim().is_empty() && !assistant_message_added {
-            let assistant_msg = Message::new(
-                MessageRole::Assistant,
-                current_response.to_string(),
-            );
-            context_window.add_message(assistant_msg);
-            return true; // assistant_message_added = true
-        }
-        return assistant_message_added;
-    }
-    
-    // THE FIX: Save assistant message before returning (no tools were executed)
-    if !current_response.trim().is_empty() && !assistant_message_added {
-        let assistant_msg = Message::new(
-            MessageRole::Assistant,
-            current_response.to_string(),
-        );
-        context_window.add_message(assistant_msg);
-        return true;
-    }
-    
-    assistant_message_added
-}
-
-/// Test: Text-only response with no previous tool execution
-/// This is the exact bug scenario from butler session
-#[test]
-fn test_text_only_response_no_tools() {
-    let mut context = ContextWindow::new(200_000);
-    
-    // Add initial user message
-    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
-    
-    let current_response = "Phew! 😅 Glad it's back. Sorry about that...";
-    let assistant_message_added = false;
-    let any_tool_executed = false;
-    
-    let was_added = simulate_early_return_path(
-        &mut context,
-        current_response,
-        assistant_message_added,
-        any_tool_executed,
-    );
-    
-    assert!(was_added, "Assistant message should be added");
-    
-    let history = context.conversation_history.clone();
-    assert_eq!(history.len(), 2, "Should have user + assistant messages");
-    assert!(matches!(history[1].role, MessageRole::Assistant));
-    assert!(history[1].content.contains("Phew!"));
-}
-
-/// Test: Text-only response with previous tool execution
-#[test]
-fn test_text_only_response_with_previous_tools() {
-    let mut context = ContextWindow::new(200_000);
-    
-    context.add_message(Message::new(MessageRole::User, "Run ls".to_string()));
-    
-    let current_response = "Here are the files...";
-    let assistant_message_added = false;
-    let any_tool_executed = true; // Tools were executed in previous iterations
-    
-    let was_added = simulate_early_return_path(
-        &mut context,
-        current_response,
-        assistant_message_added,
-        any_tool_executed,
-    );
-    
-    assert!(was_added, "Assistant message should be added");
-    
-    let history = context.conversation_history.clone();
-    assert_eq!(history.len(), 2);
-    assert!(matches!(history[1].role, MessageRole::Assistant));
-}
-
-/// Test: Empty response should not add message
-#[test]
-fn test_empty_response_not_added() {
-    let mut context = ContextWindow::new(200_000);
-    
-    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
-    
-    let current_response = "   "; // Whitespace only
-    let assistant_message_added = false;
-    let any_tool_executed = false;
-    
-    let was_added = simulate_early_return_path(
-        &mut context,
-        current_response,
-        assistant_message_added,
-        any_tool_executed,
-    );
-    
-    assert!(!was_added, "Empty response should not be added");
-    
-    let history = context.conversation_history.clone();
-    assert_eq!(history.len(), 1, "Should only have user message");
-}
-
-/// Test: Already added flag prevents duplication
-#[test]
-fn test_already_added_prevents_duplication() {
-    let mut context = ContextWindow::new(200_000);
-    
-    context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
-    context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
-    
-    let current_response = "Hi there!";
-    let assistant_message_added = true; // Already added
-    let any_tool_executed = false;
-    
-    let was_added = simulate_early_return_path(
-        &mut context,
-        current_response,
-        assistant_message_added,
-        any_tool_executed,
-    );
-    
-    assert!(was_added, "Flag should remain true");
-    
-    let history = context.conversation_history.clone();
-    assert_eq!(history.len(), 2, "Should not add duplicate");
-}
-
-/// Test: Consecutive user messages scenario (the bug)
-/// Before the fix, this would result in consecutive user messages
-#[test]
-fn test_bug_scenario_consecutive_user_messages() {
-    let mut context = ContextWindow::new(200_000);
-    
-    // Simulate the butler session bug:
-    // Message 80: Tool result (user)
-    // Message 81: User input (user) - SHOULD have assistant between!
-    // Message 82: User input (user)
-    
-    context.add_message(Message::new(MessageRole::User, "Tool result: Self care".to_string()));
-    
-    // Simulate assistant response that was displayed but not saved (THE BUG)
-    let current_response = "Phew! 😅 Glad it's back...";
-    let assistant_message_added = false;
-    let any_tool_executed = false;
-    
-    // THE FIX: This should now save the assistant message
-    simulate_early_return_path(
-        &mut context,
-        current_response,
-        assistant_message_added,
-        any_tool_executed,
-    );
-    
-    // Now add the next user message
-    context.add_message(Message::new(MessageRole::User, "Task: Ok it's back...".to_string()));
-    
-    let history = context.conversation_history.clone();
-    
-    // Verify no consecutive user messages
-    for i in 0..history.len().saturating_sub(1) {
-        let current_is_user = matches!(history[i].role, MessageRole::User);
-        let next_is_user = matches!(history[i + 1].role, MessageRole::User);
-        assert!(
-            !(current_is_user && next_is_user),
-            "Found consecutive user messages at positions {} and {}",
-            i, i + 1
-        );
-    }
-}
diff --git a/crates/g3-core/tests/missing_assistant_message_test.rs b/crates/g3-core/tests/missing_assistant_message_test.rs
deleted file mode 100644
index 3aa7a59..0000000
--- a/crates/g3-core/tests/missing_assistant_message_test.rs
+++ /dev/null
@@ -1,100 +0,0 @@
-//! Tests for the missing assistant message bug fix.
-//!
-//! Bug: When the LLM responds with text-only (no tool calls), the assistant message
-//! was sometimes not saved to the context window because the code checked
-//! `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
-//! If the parser buffer was empty/different from current_response, no message was added.
-//!
-//! Fix: Use current_response as fallback when raw_clean is empty.
-
-/// Test that the fix ensures assistant messages are always added when there's content.
-/// This simulates the fixed behavior where current_response is used as fallback.
-#[test]
-fn test_fallback_to_current_response_when_raw_empty() {
-    // Simulate the scenario:
-    // - current_response has content (what was displayed)
-    // - raw_clean is empty (parser buffer was cleared)
-    // - The fix should use current_response as fallback
-    
-    let current_response = "Here's my helpful response!";
-    let raw_clean = ""; // Parser buffer is empty
-    
-    // The fix logic:
-    let content_to_save = if !raw_clean.trim().is_empty() {
-        raw_clean.to_string()
-    } else {
-        current_response.to_string()
-    };
-    
-    // Verify fallback works
-    assert_eq!(content_to_save, current_response);
-    assert!(!content_to_save.is_empty());
-}
-
-/// Test that raw_clean is preferred when available.
-#[test]
-fn test_prefer_raw_clean_when_available() {
-    let current_response = "Filtered response"; // What was displayed (filtered)
-    let raw_clean = "Raw response with {\"tool\": ...} JSON"; // Raw content
-    
-    // The fix logic:
-    let content_to_save = if !raw_clean.trim().is_empty() {
-        raw_clean.to_string()
-    } else {
-        current_response.to_string()
-    };
-    
-    // Verify raw_clean is preferred
-    assert_eq!(content_to_save, raw_clean);
-}
-
-/// Test that whitespace-only raw_clean triggers fallback.
-#[test]
-fn test_whitespace_raw_clean_triggers_fallback() {
-    let current_response = "Actual content";
-    let raw_clean = "   \n\t  "; // Whitespace only
-    
-    // The fix logic:
-    let content_to_save = if !raw_clean.trim().is_empty() {
-        raw_clean.to_string()
-    } else {
-        current_response.to_string()
-    };
-    
-    // Verify fallback to current_response
-    assert_eq!(content_to_save, current_response);
-}
-
-/// Test that the fix logic handles various edge cases.
-#[test]
-fn test_fix_logic_edge_cases() {
-    // Test case 1: Both have content - prefer raw
-    let current = "displayed";
-    let raw = "raw content";
-    let result = if !raw.trim().is_empty() { raw } else { current };
-    assert_eq!(result, "raw content");
-    
-    // Test case 2: Raw is empty - use current
-    let current = "displayed";
-    let raw = "";
-    let result = if !raw.trim().is_empty() { raw } else { current };
-    assert_eq!(result, "displayed");
-    
-    // Test case 3: Raw is whitespace - use current
-    let current = "displayed";
-    let raw = "  \n  ";
-    let result = if !raw.trim().is_empty() { raw } else { current };
-    assert_eq!(result, "displayed");
-    
-    // Test case 4: Both empty - still returns current (empty)
-    let current = "";
-    let raw = "";
-    let result = if !raw.trim().is_empty() { raw } else { current };
-    assert_eq!(result, "");
-    
-    // Test case 5: Current has content, raw has only newlines
-    let current = "Hello world!";
-    let raw = "\n\n\n";
-    let result = if !raw.trim().is_empty() { raw } else { current };
-    assert_eq!(result, "Hello world!");
-}