Consolidate redundant assistant message test files

Deleted 4 redundant test files (~956 lines):
- assistant_message_dedup_test.rs (416 lines, 12 tests)
- consecutive_assistant_message_test.rs (248 lines, 6 tests)
- missing_assistant_message_test.rs (100 lines, 4 tests)
- early_return_path_test.rs (192 lines, 5 tests) - whitebox test

Created consolidated assistant_message_test.rs (369 lines, 14 tests):
- Helper function tests for consecutive message detection
- ContextWindow unit tests for normal and tool execution flows
- Bug demonstration tests documenting what bugs looked like
- Invariant tests for user/assistant alternation
- Missing assistant message fallback logic tests

The early_return_path_test was removed because it:
- Referenced specific line numbers in production code (brittle)
- Reimplemented internal logic (whitebox anti-pattern)
- Duplicated coverage from mock_provider_integration_test.rs

All 729 g3-core tests pass.
This commit is contained in:
Dhanji R. Prasanna
2026-01-21 10:27:07 +05:30
parent c5d549c211
commit 6a5ce11e7b
4 changed files with 154 additions and 741 deletions

View File

@@ -1,16 +1,20 @@
//! Tests for Assistant Message Deduplication //! Tests for Assistant Message Handling
//! //!
//! These tests verify that the conversation history does NOT have //! These tests verify correct handling of assistant messages in conversation history:
//! consecutive assistant messages, which was a bug that caused //! 1. No consecutive assistant messages (deduplication bug)
//! compounding duplication in LLM responses. //! 2. Proper user/assistant alternation
//! 3. Missing assistant message fallback logic
//! //!
//! Bug fixed: When tools were executed in previous iterations (any_tool_executed=true) //! The original bugs:
//! and the current iteration finished without executing a tool (tool_executed=false), //! - Consecutive assistant messages: When tools were executed in previous iterations
//! the assistant message was being added twice: //! and the current iteration finished without executing a tool, the assistant
//! 1. At the break point when any_tool_executed is true //! message was being added twice.
//! 2. At the return point when !tool_executed //! - Missing assistant messages: When the LLM responded with text-only (no tool calls),
//! the assistant message was sometimes not saved because the code checked
//! `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
//! //!
//! The fix uses an `assistant_message_added` flag to ensure only one add occurs. //! These bugs are now tested through the public API in mock_provider_integration_test.rs.
//! This file contains unit tests for the helper functions and invariants.
use g3_core::context_window::ContextWindow; use g3_core::context_window::ContextWindow;
use g3_providers::{Message, MessageRole}; use g3_providers::{Message, MessageRole};
@@ -19,7 +23,8 @@ use g3_providers::{Message, MessageRole};
// Helper Functions // Helper Functions
// ============================================================================= // =============================================================================
/// Check if conversation history has consecutive assistant messages /// Check if conversation history has consecutive assistant messages.
/// Returns the indices of the first consecutive pair found, if any.
fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> { fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
for i in 0..history.len().saturating_sub(1) { for i in 0..history.len().saturating_sub(1) {
if matches!(history[i].role, MessageRole::Assistant) if matches!(history[i].role, MessageRole::Assistant)
@@ -40,43 +45,48 @@ fn count_assistant_messages(history: &[Message]) -> usize {
} }
// ============================================================================= // =============================================================================
// Unit Tests for Helper Functions // Helper Function Tests
// ============================================================================= // =============================================================================
#[test] #[test]
fn test_has_consecutive_assistant_messages_none() { fn test_consecutive_detection_no_consecutive() {
let history = vec![ let history = vec![
Message::new(MessageRole::System, "System".to_string()),
Message::new(MessageRole::User, "Hi".to_string()), Message::new(MessageRole::User, "Hi".to_string()),
Message::new(MessageRole::Assistant, "Hello".to_string()), Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::User, "Bye".to_string()), Message::new(MessageRole::User, "Bye".to_string()),
Message::new(MessageRole::Assistant, "Goodbye".to_string()),
]; ];
assert!(has_consecutive_assistant_messages(&history).is_none()); assert!(has_consecutive_assistant_messages(&history).is_none());
} }
#[test] #[test]
fn test_has_consecutive_assistant_messages_found_middle() { fn test_consecutive_detection_at_start() {
let history = vec![ let history = vec![
Message::new(MessageRole::System, "System".to_string()),
Message::new(MessageRole::User, "Hi".to_string()),
Message::new(MessageRole::Assistant, "Hello".to_string()), Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::Assistant, "Hello again".to_string()), // BUG! Message::new(MessageRole::Assistant, "Hello again".to_string()),
Message::new(MessageRole::User, "Bye".to_string()), Message::new(MessageRole::User, "Hi".to_string()),
]; ];
assert_eq!(has_consecutive_assistant_messages(&history), Some((2, 3))); assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
} }
#[test] #[test]
fn test_has_consecutive_assistant_messages_found_end() { fn test_consecutive_detection_in_middle() {
let history = vec![ let history = vec![
Message::new(MessageRole::User, "Hi".to_string()), Message::new(MessageRole::User, "Hi".to_string()),
Message::new(MessageRole::Assistant, "Hello".to_string()), Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::Assistant, "Hello again".to_string()),
Message::new(MessageRole::User, "Bye".to_string()), Message::new(MessageRole::User, "Bye".to_string()),
Message::new(MessageRole::Assistant, "Goodbye".to_string()),
Message::new(MessageRole::Assistant, "Goodbye again".to_string()), // BUG!
]; ];
assert_eq!(has_consecutive_assistant_messages(&history), Some((3, 4))); assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
}
#[test]
fn test_consecutive_detection_at_end() {
let history = vec![
Message::new(MessageRole::User, "Hi".to_string()),
Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::Assistant, "Hello again".to_string()),
];
assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
} }
#[test] #[test]
@@ -95,15 +105,11 @@ fn test_count_assistant_messages() {
// ContextWindow Unit Tests // ContextWindow Unit Tests
// ============================================================================= // =============================================================================
/// Test that ContextWindow correctly tracks messages in normal flow
#[test] #[test]
fn test_context_window_normal_flow() { fn test_normal_conversation_flow() {
let mut context = ContextWindow::new(200_000); let mut context = ContextWindow::new(200_000);
context.add_message(Message::new( context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
MessageRole::System,
"You are helpful.".to_string(),
));
context.add_message(Message::new(MessageRole::User, "Hello".to_string())); context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string())); context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
context.add_message(Message::new(MessageRole::User, "How are you?".to_string())); context.add_message(Message::new(MessageRole::User, "How are you?".to_string()));
@@ -117,16 +123,11 @@ fn test_context_window_normal_flow() {
); );
} }
/// Test that simulates the correct flow after tool execution
#[test] #[test]
fn test_context_window_tool_execution_correct_flow() { fn test_tool_execution_correct_flow() {
let mut context = ContextWindow::new(200_000); let mut context = ContextWindow::new(200_000);
// Setup context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
context.add_message(Message::new(
MessageRole::System,
"You are helpful.".to_string(),
));
context.add_message(Message::new(MessageRole::User, "Run a command".to_string())); context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
// Tool call (assistant message with tool JSON) // Tool call (assistant message with tool JSON)
@@ -134,7 +135,7 @@ fn test_context_window_tool_execution_correct_flow() {
MessageRole::Assistant, MessageRole::Assistant,
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(), r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
)); ));
// Tool result (user message) // Tool result (user message)
context.add_message(Message::new( context.add_message(Message::new(
MessageRole::User, MessageRole::User,
@@ -147,7 +148,6 @@ fn test_context_window_tool_execution_correct_flow() {
"Here are the files: file1.txt, file2.txt".to_string(), "Here are the files: file1.txt, file2.txt".to_string(),
)); ));
// Verify structure
assert_eq!(context.conversation_history.len(), 5); assert_eq!(context.conversation_history.len(), 5);
assert_eq!(count_assistant_messages(&context.conversation_history), 2); assert_eq!(count_assistant_messages(&context.conversation_history), 2);
assert!( assert!(
@@ -156,58 +156,11 @@ fn test_context_window_tool_execution_correct_flow() {
); );
} }
/// Test that demonstrates the bug scenario (what the bug looked like)
#[test] #[test]
fn test_context_window_bug_scenario_demonstration() { fn test_multiple_tools_correct_flow() {
let mut context = ContextWindow::new(200_000); let mut context = ContextWindow::new(200_000);
// Setup context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
context.add_message(Message::new(
MessageRole::System,
"You are helpful.".to_string(),
));
context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
// Tool call
context.add_message(Message::new(
MessageRole::Assistant,
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
));
context.add_message(Message::new(
MessageRole::User,
"Tool result: file1.txt".to_string(),
));
// THE BUG: Summary added TWICE
let summary = "Here are the files: file1.txt".to_string();
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
// This demonstrates what the bug looked like
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
assert!(
consecutive.is_some(),
"Bug scenario should have consecutive assistant messages"
);
assert_eq!(consecutive, Some((4, 5)));
// The content is duplicated
assert_eq!(
context.conversation_history[4].content,
context.conversation_history[5].content,
"Bug: consecutive messages have identical content"
);
}
/// Test multiple tool executions followed by summary (correct flow)
#[test]
fn test_context_window_multiple_tools_correct_flow() {
let mut context = ContextWindow::new(200_000);
context.add_message(Message::new(
MessageRole::System,
"You are helpful.".to_string(),
));
context.add_message(Message::new( context.add_message(Message::new(
MessageRole::User, MessageRole::User,
"List files and show current directory".to_string(), "List files and show current directory".to_string(),
@@ -239,7 +192,6 @@ fn test_context_window_multiple_tools_correct_flow() {
"Files: file1.txt, file2.txt. Current directory: /home/user".to_string(), "Files: file1.txt, file2.txt. Current directory: /home/user".to_string(),
)); ));
// Verify structure
// System + User + (Assistant + User) * 2 + Assistant = 1 + 1 + 4 + 1 = 7 // System + User + (Assistant + User) * 2 + Assistant = 1 + 1 + 4 + 1 = 7
assert_eq!(context.conversation_history.len(), 7); assert_eq!(context.conversation_history.len(), 7);
assert_eq!(count_assistant_messages(&context.conversation_history), 3); assert_eq!(count_assistant_messages(&context.conversation_history), 3);
@@ -249,72 +201,17 @@ fn test_context_window_multiple_tools_correct_flow() {
); );
} }
/// Test the exact session log pattern from the original bug report // =============================================================================
#[test] // Bug Demonstration Tests (document what the bugs looked like)
fn test_session_log_pattern_from_bug_report() { // =============================================================================
// This test recreates the exact pattern seen in the butler session log:
// - Index N: assistant message with content
// - Index N+1: assistant message with same content (consecutive!)
let mut context = ContextWindow::new(200_000);
// Simulate a conversation with many messages
context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
// Add some conversation history
for i in 0..10 {
context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
}
// Now simulate the buggy pattern
context.add_message(Message::new(
MessageRole::User,
"Task: move appa_music into appa".to_string(),
));
// Tool call
context.add_message(Message::new(
MessageRole::Assistant,
r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
));
context.add_message(Message::new(
MessageRole::User,
"Tool result: ✓ Moved".to_string(),
));
// The bug: two consecutive assistant messages with duplicated content
let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
// First add (correct)
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
// Second add (THE BUG)
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
// Verify the bug pattern exists
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
assert!(
consecutive.is_some(),
"Should detect consecutive assistant messages from bug pattern"
);
// The consecutive messages should be the last two
let (idx1, idx2) = consecutive.unwrap();
let history_len = context.conversation_history.len();
assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
}
/// Test that the fix prevents consecutive assistant messages
/// This simulates what the fixed code does: check before adding
#[test] #[test]
fn test_fix_prevents_consecutive_assistant_messages() { fn test_bug_demonstration_consecutive_messages() {
// This test demonstrates what the consecutive message bug looked like.
// The bug is now fixed and tested through mock_provider_integration_test.rs.
let mut context = ContextWindow::new(200_000); let mut context = ContextWindow::new(200_000);
context.add_message(Message::new( context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
MessageRole::System,
"You are helpful.".to_string(),
));
context.add_message(Message::new(MessageRole::User, "Run a command".to_string())); context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
context.add_message(Message::new( context.add_message(Message::new(
MessageRole::Assistant, MessageRole::Assistant,
@@ -325,64 +222,31 @@ fn test_fix_prevents_consecutive_assistant_messages() {
"Tool result: file1.txt".to_string(), "Tool result: file1.txt".to_string(),
)); ));
// Simulate the fix: use a flag to track if message was added // THE BUG: Summary added TWICE
let mut assistant_message_added = false;
let summary = "Here are the files: file1.txt".to_string(); let summary = "Here are the files: file1.txt".to_string();
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
// First add location (simulating line ~2529 in the fix) // Verify the bug pattern
if !assistant_message_added { let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
context.add_message(Message::new(MessageRole::Assistant, summary.clone())); assert!(consecutive.is_some(), "Bug scenario should have consecutive assistant messages");
assistant_message_added = true; assert_eq!(consecutive, Some((4, 5)));
}
// Second add location (simulating line ~2772 in the fix) // The content is duplicated
// This would have added a duplicate before the fix
if !assistant_message_added {
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
// assistant_message_added = true; // Not needed since we return after
}
// Verify the fix works
assert!(
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
"Fix should prevent consecutive assistant messages"
);
assert_eq!( assert_eq!(
count_assistant_messages(&context.conversation_history), context.conversation_history[4].content,
2, context.conversation_history[5].content,
"Should have exactly 2 assistant messages (tool call + summary)" "Bug: consecutive messages have identical content"
); );
} }
/// Test edge case: empty response should not add message // =============================================================================
#[test] // Invariant Tests
fn test_empty_response_not_added() { // =============================================================================
let mut context = ContextWindow::new(200_000);
context.add_message(Message::new(
MessageRole::System,
"You are helpful.".to_string(),
));
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "Hi!".to_string()));
// Simulate the check: don't add empty/whitespace responses
let empty_response = " ".to_string();
if !empty_response.trim().is_empty() {
context.add_message(Message::new(MessageRole::Assistant, empty_response));
}
// Should still have only 3 messages
assert_eq!(context.conversation_history.len(), 3);
assert!(
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
"Empty response should not create consecutive messages"
);
}
/// Test the invariant: conversation should always alternate user/assistant after system
#[test] #[test]
fn test_alternating_pattern_invariant() { fn test_alternating_pattern_invariant() {
// After the system message, conversation should alternate user/assistant
let history = vec![ let history = vec![
Message::new(MessageRole::System, "System".to_string()), Message::new(MessageRole::System, "System".to_string()),
Message::new(MessageRole::User, "Q1".to_string()), Message::new(MessageRole::User, "Q1".to_string()),
@@ -393,7 +257,6 @@ fn test_alternating_pattern_invariant() {
Message::new(MessageRole::Assistant, "A3".to_string()), Message::new(MessageRole::Assistant, "A3".to_string()),
]; ];
// Verify alternating pattern
for i in 1..history.len() - 1 { for i in 1..history.len() - 1 {
let current = &history[i].role; let current = &history[i].role;
let next = &history[i + 1].role; let next = &history[i + 1].role;
@@ -414,3 +277,93 @@ fn test_alternating_pattern_invariant() {
} }
} }
} }
#[test]
fn test_fix_prevents_consecutive_messages() {
// This simulates what the fixed code does: use a flag to track if message was added
let mut context = ContextWindow::new(200_000);
context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
context.add_message(Message::new(
MessageRole::Assistant,
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
));
context.add_message(Message::new(
MessageRole::User,
"Tool result: file1.txt".to_string(),
));
// Simulate the fix: use a flag to track if message was added
let mut assistant_message_added = false;
let summary = "Here are the files: file1.txt".to_string();
// First add location
if !assistant_message_added {
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
assistant_message_added = true;
}
// Second add location (would have added duplicate before the fix)
if !assistant_message_added {
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
}
assert!(
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
"Fix should prevent consecutive assistant messages"
);
assert_eq!(
count_assistant_messages(&context.conversation_history),
2,
"Should have exactly 2 assistant messages (tool call + summary)"
);
}
// =============================================================================
// Missing Assistant Message Fallback Tests
// =============================================================================
#[test]
fn test_fallback_to_current_response_when_raw_empty() {
// Simulate the scenario where raw_clean is empty but current_response has content
let current_response = "Here's my helpful response!";
let raw_clean = "";
let content_to_save = if !raw_clean.trim().is_empty() {
raw_clean.to_string()
} else {
current_response.to_string()
};
assert_eq!(content_to_save, current_response);
assert!(!content_to_save.is_empty());
}
#[test]
fn test_prefer_raw_clean_when_available() {
let current_response = "Filtered response";
let raw_clean = "Raw response with tool JSON";
let content_to_save = if !raw_clean.trim().is_empty() {
raw_clean.to_string()
} else {
current_response.to_string()
};
assert_eq!(content_to_save, raw_clean);
}
#[test]
fn test_whitespace_raw_clean_triggers_fallback() {
let current_response = "Actual content";
let raw_clean = " \n\t ";
let content_to_save = if !raw_clean.trim().is_empty() {
raw_clean.to_string()
} else {
current_response.to_string()
};
assert_eq!(content_to_save, current_response);
}

View File

@@ -1,248 +0,0 @@
//! Tests for consecutive assistant message bug
//!
//! This test verifies that the streaming completion logic does not add
//! consecutive assistant messages to the conversation history.
//!
//! Bug description:
//! When tools are executed in previous iterations (any_tool_executed=true)
//! and the current iteration finishes without executing a tool (tool_executed=false),
//! the assistant message was being added twice:
//! 1. At line ~2529 when breaking to auto-continue (if any_tool_executed && !current_response.is_empty())
//! 2. At line ~2772 when returning (if !current_response.is_empty())
//!
//! This creates consecutive assistant messages in the conversation history,
//! which causes the LLM to see duplicated content and mimic the pattern,
//! leading to compounding duplication over time.
use g3_core::context_window::ContextWindow;
use g3_providers::{Message, MessageRole};
/// Helper to check if conversation history has consecutive assistant messages
fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
for i in 0..history.len().saturating_sub(1) {
if matches!(history[i].role, MessageRole::Assistant)
&& matches!(history[i + 1].role, MessageRole::Assistant)
{
return Some((i, i + 1));
}
}
None
}
/// Test that a well-formed conversation has no consecutive assistant messages
#[test]
fn test_no_consecutive_assistant_messages_in_normal_flow() {
let mut context = ContextWindow::new(200_000);
// Simulate a normal conversation flow
context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
context.add_message(Message::new(MessageRole::User, "Task: Hello".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
context.add_message(Message::new(MessageRole::User, "Task: How are you?".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "I'm doing well!".to_string()));
assert!(
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
"Normal conversation should not have consecutive assistant messages"
);
}
/// Test that detects the bug: consecutive assistant messages
/// This test SHOULD FAIL until the bug is fixed
#[test]
fn test_detect_consecutive_assistant_messages_bug() {
let mut context = ContextWindow::new(200_000);
// Simulate the buggy flow:
// 1. System message
context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
// 2. User asks a question
context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
// 3. Assistant responds with tool call (tool execution adds this)
context.add_message(Message::new(
MessageRole::Assistant,
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
));
// 4. Tool result
context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
// 5. Assistant provides summary - THIS IS WHERE THE BUG OCCURS
// The bug adds this message TWICE
let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
// Simulate the bug: message added at line 2529 (when breaking to auto-continue)
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
// Simulate the bug: message added AGAIN at line 2772 (when returning)
// This is the bug - the same message is added twice!
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
// This assertion verifies the bug exists
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
assert!(
consecutive.is_some(),
"Bug reproduction: should have consecutive assistant messages at indices {:?}",
consecutive
);
// Verify the indices
let (idx1, idx2) = consecutive.unwrap();
assert_eq!(idx1, 4, "First consecutive assistant message should be at index 4");
assert_eq!(idx2, 5, "Second consecutive assistant message should be at index 5");
// Verify the content is duplicated
assert_eq!(
context.conversation_history[idx1].content,
context.conversation_history[idx2].content,
"Consecutive assistant messages should have identical content (the bug)"
);
}
/// Test that validates the invariant: no consecutive assistant messages should exist
/// This is the test that should PASS after the bug is fixed
#[test]
fn test_invariant_no_consecutive_assistant_messages() {
// This test documents the invariant that should hold
// After the bug is fixed, this test should pass
let mut context = ContextWindow::new(200_000);
// Build a conversation that would trigger the bug
context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
context.add_message(Message::new(
MessageRole::Assistant,
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
));
context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
// After the fix, only ONE assistant message should be added
let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
context.add_message(Message::new(MessageRole::Assistant, summary));
// Invariant: no consecutive assistant messages
assert!(
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
"Invariant: conversation history should never have consecutive assistant messages"
);
}
/// Test helper function works correctly
#[test]
fn test_consecutive_detection_helper() {
// Test with no consecutive
let history = vec![
Message::new(MessageRole::User, "Hi".to_string()),
Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::User, "Bye".to_string()),
];
assert!(has_consecutive_assistant_messages(&history).is_none());
// Test with consecutive at start
let history = vec![
Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::Assistant, "Hello again".to_string()),
Message::new(MessageRole::User, "Hi".to_string()),
];
assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
// Test with consecutive in middle
let history = vec![
Message::new(MessageRole::User, "Hi".to_string()),
Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::Assistant, "Hello again".to_string()),
Message::new(MessageRole::User, "Bye".to_string()),
];
assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
// Test with consecutive at end
let history = vec![
Message::new(MessageRole::User, "Hi".to_string()),
Message::new(MessageRole::Assistant, "Hello".to_string()),
Message::new(MessageRole::Assistant, "Hello again".to_string()),
];
assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
}
/// Test that simulates the exact session log pattern from the bug report
#[test]
fn test_session_log_pattern_from_bug_report() {
// This test recreates the exact pattern seen in the butler session log:
// - Index 368: assistant message with duplicated content
// - Index 369: assistant message with same duplicated content (consecutive!)
let mut context = ContextWindow::new(200_000);
// Simulate the conversation leading up to the bug
context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
// ... many messages ...
for i in 0..50 {
context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
}
// Now simulate the buggy pattern
context.add_message(Message::new(MessageRole::User, "Task: move appa_music into appa".to_string()));
// Tool call
context.add_message(Message::new(
MessageRole::Assistant,
r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
));
context.add_message(Message::new(MessageRole::User, "Tool result: ✓ Moved".to_string()));
// The bug: two consecutive assistant messages with duplicated content
let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?\n\nDone! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
// First add (line 2529)
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
// Second add (line 2772) - THE BUG
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
// Verify the bug pattern exists
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
assert!(
consecutive.is_some(),
"Should detect consecutive assistant messages from bug pattern"
);
// The consecutive messages should be the last two
let (idx1, idx2) = consecutive.unwrap();
let history_len = context.conversation_history.len();
assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
}
/// Test that the context window could have a guard against consecutive assistant messages
/// This is a proposed fix validation test
#[test]
fn test_proposed_fix_guard_against_consecutive() {
let mut context = ContextWindow::new(200_000);
context.add_message(Message::new(MessageRole::System, "System".to_string()));
context.add_message(Message::new(MessageRole::User, "User".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "First response".to_string()));
// Proposed fix: before adding an assistant message, check if the last message
// is also an assistant message. If so, either:
// 1. Skip adding (if content is identical)
// 2. Merge the content (if content is different)
// 3. Log a warning and skip (safest option)
let last_message = context.conversation_history.last();
let is_last_assistant = last_message
.map(|m| matches!(m.role, MessageRole::Assistant))
.unwrap_or(false);
assert!(
is_last_assistant,
"Last message should be assistant for this test"
);
// The fix would check this condition before adding
// and skip adding if true (to prevent consecutive assistant messages)
}

View File

@@ -1,192 +0,0 @@
//! Tests for the early return path in stream_completion_with_tools
//!
//! This test documents the bug fix for missing assistant messages when:
//! - The LLM responds with text only (no tool calls)
//! - any_tool_executed = false (no tools in previous iterations)
//! - The stream finishes and hits the early return path
//!
//! The fix adds assistant message saving before the early return at ~line 2535.
use g3_core::context_window::ContextWindow;
use g3_providers::{Message, MessageRole};
/// Simulates the early return path logic
/// This mirrors the code at lines 2535-2555 in lib.rs
fn simulate_early_return_path(
context_window: &mut ContextWindow,
current_response: &str,
assistant_message_added: bool,
any_tool_executed: bool,
) -> bool {
// This simulates the code path when stream finishes
if any_tool_executed {
// If tools were executed in previous iterations, break to finalize
if !current_response.trim().is_empty() && !assistant_message_added {
let assistant_msg = Message::new(
MessageRole::Assistant,
current_response.to_string(),
);
context_window.add_message(assistant_msg);
return true; // assistant_message_added = true
}
return assistant_message_added;
}
// THE FIX: Save assistant message before returning (no tools were executed)
if !current_response.trim().is_empty() && !assistant_message_added {
let assistant_msg = Message::new(
MessageRole::Assistant,
current_response.to_string(),
);
context_window.add_message(assistant_msg);
return true;
}
assistant_message_added
}
/// Test: Text-only response with no previous tool execution
/// This is the exact bug scenario from butler session
#[test]
fn test_text_only_response_no_tools() {
let mut context = ContextWindow::new(200_000);
// Add initial user message
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
let current_response = "Phew! 😅 Glad it's back. Sorry about that...";
let assistant_message_added = false;
let any_tool_executed = false;
let was_added = simulate_early_return_path(
&mut context,
current_response,
assistant_message_added,
any_tool_executed,
);
assert!(was_added, "Assistant message should be added");
let history = context.conversation_history.clone();
assert_eq!(history.len(), 2, "Should have user + assistant messages");
assert!(matches!(history[1].role, MessageRole::Assistant));
assert!(history[1].content.contains("Phew!"));
}
/// Test: Text-only response with previous tool execution
#[test]
fn test_text_only_response_with_previous_tools() {
let mut context = ContextWindow::new(200_000);
context.add_message(Message::new(MessageRole::User, "Run ls".to_string()));
let current_response = "Here are the files...";
let assistant_message_added = false;
let any_tool_executed = true; // Tools were executed in previous iterations
let was_added = simulate_early_return_path(
&mut context,
current_response,
assistant_message_added,
any_tool_executed,
);
assert!(was_added, "Assistant message should be added");
let history = context.conversation_history.clone();
assert_eq!(history.len(), 2);
assert!(matches!(history[1].role, MessageRole::Assistant));
}
/// Test: Empty response should not add message
#[test]
fn test_empty_response_not_added() {
let mut context = ContextWindow::new(200_000);
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
let current_response = " "; // Whitespace only
let assistant_message_added = false;
let any_tool_executed = false;
let was_added = simulate_early_return_path(
&mut context,
current_response,
assistant_message_added,
any_tool_executed,
);
assert!(!was_added, "Empty response should not be added");
let history = context.conversation_history.clone();
assert_eq!(history.len(), 1, "Should only have user message");
}
/// Test: Already added flag prevents duplication
#[test]
fn test_already_added_prevents_duplication() {
let mut context = ContextWindow::new(200_000);
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
let current_response = "Hi there!";
let assistant_message_added = true; // Already added
let any_tool_executed = false;
let was_added = simulate_early_return_path(
&mut context,
current_response,
assistant_message_added,
any_tool_executed,
);
assert!(was_added, "Flag should remain true");
let history = context.conversation_history.clone();
assert_eq!(history.len(), 2, "Should not add duplicate");
}
/// Test: Consecutive user messages scenario (the bug)
/// Before the fix, this would result in consecutive user messages
#[test]
fn test_bug_scenario_consecutive_user_messages() {
let mut context = ContextWindow::new(200_000);
// Simulate the butler session bug:
// Message 80: Tool result (user)
// Message 81: User input (user) - SHOULD have assistant between!
// Message 82: User input (user)
context.add_message(Message::new(MessageRole::User, "Tool result: Self care".to_string()));
// Simulate assistant response that was displayed but not saved (THE BUG)
let current_response = "Phew! 😅 Glad it's back...";
let assistant_message_added = false;
let any_tool_executed = false;
// THE FIX: This should now save the assistant message
simulate_early_return_path(
&mut context,
current_response,
assistant_message_added,
any_tool_executed,
);
// Now add the next user message
context.add_message(Message::new(MessageRole::User, "Task: Ok it's back...".to_string()));
let history = context.conversation_history.clone();
// Verify no consecutive user messages
for i in 0..history.len().saturating_sub(1) {
let current_is_user = matches!(history[i].role, MessageRole::User);
let next_is_user = matches!(history[i + 1].role, MessageRole::User);
assert!(
!(current_is_user && next_is_user),
"Found consecutive user messages at positions {} and {}",
i, i + 1
);
}
}

View File

@@ -1,100 +0,0 @@
//! Tests for the missing assistant message bug fix.
//!
//! Bug: When the LLM responds with text-only (no tool calls), the assistant message
//! was sometimes not saved to the context window because the code checked
//! `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
//! If the parser buffer was empty/different from current_response, no message was added.
//!
//! Fix: Use current_response as fallback when raw_clean is empty.
/// Test that the fix ensures assistant messages are always added when there's content.
/// This simulates the fixed behavior where current_response is used as fallback.
#[test]
fn test_fallback_to_current_response_when_raw_empty() {
// Simulate the scenario:
// - current_response has content (what was displayed)
// - raw_clean is empty (parser buffer was cleared)
// - The fix should use current_response as fallback
let current_response = "Here's my helpful response!";
let raw_clean = ""; // Parser buffer is empty
// The fix logic:
let content_to_save = if !raw_clean.trim().is_empty() {
raw_clean.to_string()
} else {
current_response.to_string()
};
// Verify fallback works
assert_eq!(content_to_save, current_response);
assert!(!content_to_save.is_empty());
}
/// Test that raw_clean is preferred when available.
#[test]
fn test_prefer_raw_clean_when_available() {
let current_response = "Filtered response"; // What was displayed (filtered)
let raw_clean = "Raw response with {\"tool\": ...} JSON"; // Raw content
// The fix logic:
let content_to_save = if !raw_clean.trim().is_empty() {
raw_clean.to_string()
} else {
current_response.to_string()
};
// Verify raw_clean is preferred
assert_eq!(content_to_save, raw_clean);
}
/// Test that whitespace-only raw_clean triggers fallback.
#[test]
fn test_whitespace_raw_clean_triggers_fallback() {
let current_response = "Actual content";
let raw_clean = " \n\t "; // Whitespace only
// The fix logic:
let content_to_save = if !raw_clean.trim().is_empty() {
raw_clean.to_string()
} else {
current_response.to_string()
};
// Verify fallback to current_response
assert_eq!(content_to_save, current_response);
}
/// Test that the fix logic handles various edge cases.
#[test]
fn test_fix_logic_edge_cases() {
// Test case 1: Both have content - prefer raw
let current = "displayed";
let raw = "raw content";
let result = if !raw.trim().is_empty() { raw } else { current };
assert_eq!(result, "raw content");
// Test case 2: Raw is empty - use current
let current = "displayed";
let raw = "";
let result = if !raw.trim().is_empty() { raw } else { current };
assert_eq!(result, "displayed");
// Test case 3: Raw is whitespace - use current
let current = "displayed";
let raw = " \n ";
let result = if !raw.trim().is_empty() { raw } else { current };
assert_eq!(result, "displayed");
// Test case 4: Both empty - still returns current (empty)
let current = "";
let raw = "";
let result = if !raw.trim().is_empty() { raw } else { current };
assert_eq!(result, "");
// Test case 5: Current has content, raw has only newlines
let current = "Hello world!";
let raw = "\n\n\n";
let result = if !raw.trim().is_empty() { raw } else { current };
assert_eq!(result, "Hello world!");
}