Consolidate redundant assistant message test files
Deleted 4 redundant test files (~956 lines): - assistant_message_dedup_test.rs (416 lines, 12 tests) - consecutive_assistant_message_test.rs (248 lines, 6 tests) - missing_assistant_message_test.rs (100 lines, 4 tests) - early_return_path_test.rs (192 lines, 5 tests) - whitebox test Created consolidated assistant_message_test.rs (369 lines, 14 tests): - Helper function tests for consecutive message detection - ContextWindow unit tests for normal and tool execution flows - Bug demonstration tests documenting what bugs looked like - Invariant tests for user/assistant alternation - Missing assistant message fallback logic tests The early_return_path_test was removed because it: - Referenced specific line numbers in production code (brittle) - Reimplemented internal logic (whitebox anti-pattern) - Duplicated coverage from mock_provider_integration_test.rs All 729 g3-core tests pass.
This commit is contained in:
@@ -1,16 +1,20 @@
|
||||
//! Tests for Assistant Message Deduplication
|
||||
//! Tests for Assistant Message Handling
|
||||
//!
|
||||
//! These tests verify that the conversation history does NOT have
|
||||
//! consecutive assistant messages, which was a bug that caused
|
||||
//! compounding duplication in LLM responses.
|
||||
//! These tests verify correct handling of assistant messages in conversation history:
|
||||
//! 1. No consecutive assistant messages (deduplication bug)
|
||||
//! 2. Proper user/assistant alternation
|
||||
//! 3. Missing assistant message fallback logic
|
||||
//!
|
||||
//! Bug fixed: When tools were executed in previous iterations (any_tool_executed=true)
|
||||
//! and the current iteration finished without executing a tool (tool_executed=false),
|
||||
//! the assistant message was being added twice:
|
||||
//! 1. At the break point when any_tool_executed is true
|
||||
//! 2. At the return point when !tool_executed
|
||||
//! The original bugs:
|
||||
//! - Consecutive assistant messages: When tools were executed in previous iterations
|
||||
//! and the current iteration finished without executing a tool, the assistant
|
||||
//! message was being added twice.
|
||||
//! - Missing assistant messages: When the LLM responded with text-only (no tool calls),
|
||||
//! the assistant message was sometimes not saved because the code checked
|
||||
//! `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
|
||||
//!
|
||||
//! The fix uses an `assistant_message_added` flag to ensure only one add occurs.
|
||||
//! These bugs are now tested through the public API in mock_provider_integration_test.rs.
|
||||
//! This file contains unit tests for the helper functions and invariants.
|
||||
|
||||
use g3_core::context_window::ContextWindow;
|
||||
use g3_providers::{Message, MessageRole};
|
||||
@@ -19,7 +23,8 @@ use g3_providers::{Message, MessageRole};
|
||||
// Helper Functions
|
||||
// =============================================================================
|
||||
|
||||
/// Check if conversation history has consecutive assistant messages
|
||||
/// Check if conversation history has consecutive assistant messages.
|
||||
/// Returns the indices of the first consecutive pair found, if any.
|
||||
fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
|
||||
for i in 0..history.len().saturating_sub(1) {
|
||||
if matches!(history[i].role, MessageRole::Assistant)
|
||||
@@ -40,43 +45,48 @@ fn count_assistant_messages(history: &[Message]) -> usize {
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Unit Tests for Helper Functions
|
||||
// Helper Function Tests
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_has_consecutive_assistant_messages_none() {
|
||||
fn test_consecutive_detection_no_consecutive() {
|
||||
let history = vec![
|
||||
Message::new(MessageRole::System, "System".to_string()),
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::User, "Bye".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Goodbye".to_string()),
|
||||
];
|
||||
assert!(has_consecutive_assistant_messages(&history).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_consecutive_assistant_messages_found_middle() {
|
||||
fn test_consecutive_detection_at_start() {
|
||||
let history = vec![
|
||||
Message::new(MessageRole::System, "System".to_string()),
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello again".to_string()), // BUG!
|
||||
Message::new(MessageRole::User, "Bye".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello again".to_string()),
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
];
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((2, 3)));
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_consecutive_assistant_messages_found_end() {
|
||||
fn test_consecutive_detection_in_middle() {
|
||||
let history = vec![
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello again".to_string()),
|
||||
Message::new(MessageRole::User, "Bye".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Goodbye".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Goodbye again".to_string()), // BUG!
|
||||
];
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((3, 4)));
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_consecutive_detection_at_end() {
|
||||
let history = vec![
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello again".to_string()),
|
||||
];
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -95,15 +105,11 @@ fn test_count_assistant_messages() {
|
||||
// ContextWindow Unit Tests
|
||||
// =============================================================================
|
||||
|
||||
/// Test that ContextWindow correctly tracks messages in normal flow
|
||||
#[test]
|
||||
fn test_context_window_normal_flow() {
|
||||
fn test_normal_conversation_flow() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::System,
|
||||
"You are helpful.".to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "How are you?".to_string()));
|
||||
@@ -117,16 +123,11 @@ fn test_context_window_normal_flow() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Test that simulates the correct flow after tool execution
|
||||
#[test]
|
||||
fn test_context_window_tool_execution_correct_flow() {
|
||||
fn test_tool_execution_correct_flow() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Setup
|
||||
context.add_message(Message::new(
|
||||
MessageRole::System,
|
||||
"You are helpful.".to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
|
||||
|
||||
// Tool call (assistant message with tool JSON)
|
||||
@@ -147,7 +148,6 @@ fn test_context_window_tool_execution_correct_flow() {
|
||||
"Here are the files: file1.txt, file2.txt".to_string(),
|
||||
));
|
||||
|
||||
// Verify structure
|
||||
assert_eq!(context.conversation_history.len(), 5);
|
||||
assert_eq!(count_assistant_messages(&context.conversation_history), 2);
|
||||
assert!(
|
||||
@@ -156,58 +156,11 @@ fn test_context_window_tool_execution_correct_flow() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Test that demonstrates the bug scenario (what the bug looked like)
|
||||
#[test]
|
||||
fn test_context_window_bug_scenario_demonstration() {
|
||||
fn test_multiple_tools_correct_flow() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Setup
|
||||
context.add_message(Message::new(
|
||||
MessageRole::System,
|
||||
"You are helpful.".to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
|
||||
|
||||
// Tool call
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
|
||||
));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Tool result: file1.txt".to_string(),
|
||||
));
|
||||
|
||||
// THE BUG: Summary added TWICE
|
||||
let summary = "Here are the files: file1.txt".to_string();
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
|
||||
|
||||
// This demonstrates what the bug looked like
|
||||
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
|
||||
assert!(
|
||||
consecutive.is_some(),
|
||||
"Bug scenario should have consecutive assistant messages"
|
||||
);
|
||||
assert_eq!(consecutive, Some((4, 5)));
|
||||
|
||||
// The content is duplicated
|
||||
assert_eq!(
|
||||
context.conversation_history[4].content,
|
||||
context.conversation_history[5].content,
|
||||
"Bug: consecutive messages have identical content"
|
||||
);
|
||||
}
|
||||
|
||||
/// Test multiple tool executions followed by summary (correct flow)
|
||||
#[test]
|
||||
fn test_context_window_multiple_tools_correct_flow() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::System,
|
||||
"You are helpful.".to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"List files and show current directory".to_string(),
|
||||
@@ -239,7 +192,6 @@ fn test_context_window_multiple_tools_correct_flow() {
|
||||
"Files: file1.txt, file2.txt. Current directory: /home/user".to_string(),
|
||||
));
|
||||
|
||||
// Verify structure
|
||||
// System + User + (Assistant + User) * 2 + Assistant = 1 + 1 + 4 + 1 = 7
|
||||
assert_eq!(context.conversation_history.len(), 7);
|
||||
assert_eq!(count_assistant_messages(&context.conversation_history), 3);
|
||||
@@ -249,72 +201,17 @@ fn test_context_window_multiple_tools_correct_flow() {
|
||||
);
|
||||
}
|
||||
|
||||
/// Test the exact session log pattern from the original bug report
|
||||
#[test]
|
||||
fn test_session_log_pattern_from_bug_report() {
|
||||
// This test recreates the exact pattern seen in the butler session log:
|
||||
// - Index N: assistant message with content
|
||||
// - Index N+1: assistant message with same content (consecutive!)
|
||||
// =============================================================================
|
||||
// Bug Demonstration Tests (document what the bugs looked like)
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_bug_demonstration_consecutive_messages() {
|
||||
// This test demonstrates what the consecutive message bug looked like.
|
||||
// The bug is now fixed and tested through mock_provider_integration_test.rs.
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Simulate a conversation with many messages
|
||||
context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
|
||||
|
||||
// Add some conversation history
|
||||
for i in 0..10 {
|
||||
context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
|
||||
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
|
||||
}
|
||||
|
||||
// Now simulate the buggy pattern
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Task: move appa_music into appa".to_string(),
|
||||
));
|
||||
|
||||
// Tool call
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
|
||||
));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Tool result: ✓ Moved".to_string(),
|
||||
));
|
||||
|
||||
// The bug: two consecutive assistant messages with duplicated content
|
||||
let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
|
||||
|
||||
// First add (correct)
|
||||
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
|
||||
// Second add (THE BUG)
|
||||
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
|
||||
|
||||
// Verify the bug pattern exists
|
||||
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
|
||||
assert!(
|
||||
consecutive.is_some(),
|
||||
"Should detect consecutive assistant messages from bug pattern"
|
||||
);
|
||||
|
||||
// The consecutive messages should be the last two
|
||||
let (idx1, idx2) = consecutive.unwrap();
|
||||
let history_len = context.conversation_history.len();
|
||||
assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
|
||||
assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
|
||||
}
|
||||
|
||||
/// Test that the fix prevents consecutive assistant messages
|
||||
/// This simulates what the fixed code does: check before adding
|
||||
#[test]
|
||||
fn test_fix_prevents_consecutive_assistant_messages() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::System,
|
||||
"You are helpful.".to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
@@ -325,64 +222,31 @@ fn test_fix_prevents_consecutive_assistant_messages() {
|
||||
"Tool result: file1.txt".to_string(),
|
||||
));
|
||||
|
||||
// Simulate the fix: use a flag to track if message was added
|
||||
let mut assistant_message_added = false;
|
||||
// THE BUG: Summary added TWICE
|
||||
let summary = "Here are the files: file1.txt".to_string();
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone())); // BUG!
|
||||
|
||||
// First add location (simulating line ~2529 in the fix)
|
||||
if !assistant_message_added {
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
assistant_message_added = true;
|
||||
}
|
||||
// Verify the bug pattern
|
||||
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
|
||||
assert!(consecutive.is_some(), "Bug scenario should have consecutive assistant messages");
|
||||
assert_eq!(consecutive, Some((4, 5)));
|
||||
|
||||
// Second add location (simulating line ~2772 in the fix)
|
||||
// This would have added a duplicate before the fix
|
||||
if !assistant_message_added {
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
// assistant_message_added = true; // Not needed since we return after
|
||||
}
|
||||
|
||||
// Verify the fix works
|
||||
assert!(
|
||||
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
|
||||
"Fix should prevent consecutive assistant messages"
|
||||
);
|
||||
// The content is duplicated
|
||||
assert_eq!(
|
||||
count_assistant_messages(&context.conversation_history),
|
||||
2,
|
||||
"Should have exactly 2 assistant messages (tool call + summary)"
|
||||
context.conversation_history[4].content,
|
||||
context.conversation_history[5].content,
|
||||
"Bug: consecutive messages have identical content"
|
||||
);
|
||||
}
|
||||
|
||||
/// Test edge case: empty response should not add message
|
||||
#[test]
|
||||
fn test_empty_response_not_added() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
// =============================================================================
|
||||
// Invariant Tests
|
||||
// =============================================================================
|
||||
|
||||
context.add_message(Message::new(
|
||||
MessageRole::System,
|
||||
"You are helpful.".to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, "Hi!".to_string()));
|
||||
|
||||
// Simulate the check: don't add empty/whitespace responses
|
||||
let empty_response = " ".to_string();
|
||||
if !empty_response.trim().is_empty() {
|
||||
context.add_message(Message::new(MessageRole::Assistant, empty_response));
|
||||
}
|
||||
|
||||
// Should still have only 3 messages
|
||||
assert_eq!(context.conversation_history.len(), 3);
|
||||
assert!(
|
||||
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
|
||||
"Empty response should not create consecutive messages"
|
||||
);
|
||||
}
|
||||
|
||||
/// Test the invariant: conversation should always alternate user/assistant after system
|
||||
#[test]
|
||||
fn test_alternating_pattern_invariant() {
|
||||
// After the system message, conversation should alternate user/assistant
|
||||
let history = vec![
|
||||
Message::new(MessageRole::System, "System".to_string()),
|
||||
Message::new(MessageRole::User, "Q1".to_string()),
|
||||
@@ -393,7 +257,6 @@ fn test_alternating_pattern_invariant() {
|
||||
Message::new(MessageRole::Assistant, "A3".to_string()),
|
||||
];
|
||||
|
||||
// Verify alternating pattern
|
||||
for i in 1..history.len() - 1 {
|
||||
let current = &history[i].role;
|
||||
let next = &history[i + 1].role;
|
||||
@@ -414,3 +277,93 @@ fn test_alternating_pattern_invariant() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fix_prevents_consecutive_messages() {
|
||||
// This simulates what the fixed code does: use a flag to track if message was added
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(MessageRole::System, "You are helpful.".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "Run a command".to_string()));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
|
||||
));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::User,
|
||||
"Tool result: file1.txt".to_string(),
|
||||
));
|
||||
|
||||
// Simulate the fix: use a flag to track if message was added
|
||||
let mut assistant_message_added = false;
|
||||
let summary = "Here are the files: file1.txt".to_string();
|
||||
|
||||
// First add location
|
||||
if !assistant_message_added {
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
assistant_message_added = true;
|
||||
}
|
||||
|
||||
// Second add location (would have added duplicate before the fix)
|
||||
if !assistant_message_added {
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
}
|
||||
|
||||
assert!(
|
||||
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
|
||||
"Fix should prevent consecutive assistant messages"
|
||||
);
|
||||
assert_eq!(
|
||||
count_assistant_messages(&context.conversation_history),
|
||||
2,
|
||||
"Should have exactly 2 assistant messages (tool call + summary)"
|
||||
);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Missing Assistant Message Fallback Tests
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_fallback_to_current_response_when_raw_empty() {
|
||||
// Simulate the scenario where raw_clean is empty but current_response has content
|
||||
let current_response = "Here's my helpful response!";
|
||||
let raw_clean = "";
|
||||
|
||||
let content_to_save = if !raw_clean.trim().is_empty() {
|
||||
raw_clean.to_string()
|
||||
} else {
|
||||
current_response.to_string()
|
||||
};
|
||||
|
||||
assert_eq!(content_to_save, current_response);
|
||||
assert!(!content_to_save.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_prefer_raw_clean_when_available() {
|
||||
let current_response = "Filtered response";
|
||||
let raw_clean = "Raw response with tool JSON";
|
||||
|
||||
let content_to_save = if !raw_clean.trim().is_empty() {
|
||||
raw_clean.to_string()
|
||||
} else {
|
||||
current_response.to_string()
|
||||
};
|
||||
|
||||
assert_eq!(content_to_save, raw_clean);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitespace_raw_clean_triggers_fallback() {
|
||||
let current_response = "Actual content";
|
||||
let raw_clean = " \n\t ";
|
||||
|
||||
let content_to_save = if !raw_clean.trim().is_empty() {
|
||||
raw_clean.to_string()
|
||||
} else {
|
||||
current_response.to_string()
|
||||
};
|
||||
|
||||
assert_eq!(content_to_save, current_response);
|
||||
}
|
||||
@@ -1,248 +0,0 @@
|
||||
//! Tests for consecutive assistant message bug
|
||||
//!
|
||||
//! This test verifies that the streaming completion logic does not add
|
||||
//! consecutive assistant messages to the conversation history.
|
||||
//!
|
||||
//! Bug description:
|
||||
//! When tools are executed in previous iterations (any_tool_executed=true)
|
||||
//! and the current iteration finishes without executing a tool (tool_executed=false),
|
||||
//! the assistant message was being added twice:
|
||||
//! 1. At line ~2529 when breaking to auto-continue (if any_tool_executed && !current_response.is_empty())
|
||||
//! 2. At line ~2772 when returning (if !current_response.is_empty())
|
||||
//!
|
||||
//! This creates consecutive assistant messages in the conversation history,
|
||||
//! which causes the LLM to see duplicated content and mimic the pattern,
|
||||
//! leading to compounding duplication over time.
|
||||
|
||||
use g3_core::context_window::ContextWindow;
|
||||
use g3_providers::{Message, MessageRole};
|
||||
|
||||
/// Helper to check if conversation history has consecutive assistant messages
|
||||
fn has_consecutive_assistant_messages(history: &[Message]) -> Option<(usize, usize)> {
|
||||
for i in 0..history.len().saturating_sub(1) {
|
||||
if matches!(history[i].role, MessageRole::Assistant)
|
||||
&& matches!(history[i + 1].role, MessageRole::Assistant)
|
||||
{
|
||||
return Some((i, i + 1));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Test that a well-formed conversation has no consecutive assistant messages
|
||||
#[test]
|
||||
fn test_no_consecutive_assistant_messages_in_normal_flow() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Simulate a normal conversation flow
|
||||
context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "Task: Hello".to_string()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "Task: How are you?".to_string()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, "I'm doing well!".to_string()));
|
||||
|
||||
assert!(
|
||||
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
|
||||
"Normal conversation should not have consecutive assistant messages"
|
||||
);
|
||||
}
|
||||
|
||||
/// Test that detects the bug: consecutive assistant messages
|
||||
/// This test SHOULD FAIL until the bug is fixed
|
||||
#[test]
|
||||
fn test_detect_consecutive_assistant_messages_bug() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Simulate the buggy flow:
|
||||
// 1. System message
|
||||
context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
|
||||
|
||||
// 2. User asks a question
|
||||
context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
|
||||
|
||||
// 3. Assistant responds with tool call (tool execution adds this)
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
|
||||
));
|
||||
|
||||
// 4. Tool result
|
||||
context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
|
||||
|
||||
// 5. Assistant provides summary - THIS IS WHERE THE BUG OCCURS
|
||||
// The bug adds this message TWICE
|
||||
let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
|
||||
|
||||
// Simulate the bug: message added at line 2529 (when breaking to auto-continue)
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
|
||||
// Simulate the bug: message added AGAIN at line 2772 (when returning)
|
||||
// This is the bug - the same message is added twice!
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary.clone()));
|
||||
|
||||
// This assertion verifies the bug exists
|
||||
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
|
||||
assert!(
|
||||
consecutive.is_some(),
|
||||
"Bug reproduction: should have consecutive assistant messages at indices {:?}",
|
||||
consecutive
|
||||
);
|
||||
|
||||
// Verify the indices
|
||||
let (idx1, idx2) = consecutive.unwrap();
|
||||
assert_eq!(idx1, 4, "First consecutive assistant message should be at index 4");
|
||||
assert_eq!(idx2, 5, "Second consecutive assistant message should be at index 5");
|
||||
|
||||
// Verify the content is duplicated
|
||||
assert_eq!(
|
||||
context.conversation_history[idx1].content,
|
||||
context.conversation_history[idx2].content,
|
||||
"Consecutive assistant messages should have identical content (the bug)"
|
||||
);
|
||||
}
|
||||
|
||||
/// Test that validates the invariant: no consecutive assistant messages should exist
|
||||
/// This is the test that should PASS after the bug is fixed
|
||||
#[test]
|
||||
fn test_invariant_no_consecutive_assistant_messages() {
|
||||
// This test documents the invariant that should hold
|
||||
// After the bug is fixed, this test should pass
|
||||
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Build a conversation that would trigger the bug
|
||||
context.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "Task: List files".to_string()));
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::User, "Tool result: file1.txt file2.txt".to_string()));
|
||||
|
||||
// After the fix, only ONE assistant message should be added
|
||||
let summary = "Here are the files:\n- file1.txt\n- file2.txt".to_string();
|
||||
context.add_message(Message::new(MessageRole::Assistant, summary));
|
||||
|
||||
// Invariant: no consecutive assistant messages
|
||||
assert!(
|
||||
has_consecutive_assistant_messages(&context.conversation_history).is_none(),
|
||||
"Invariant: conversation history should never have consecutive assistant messages"
|
||||
);
|
||||
}
|
||||
|
||||
/// Test helper function works correctly
|
||||
#[test]
|
||||
fn test_consecutive_detection_helper() {
|
||||
// Test with no consecutive
|
||||
let history = vec![
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::User, "Bye".to_string()),
|
||||
];
|
||||
assert!(has_consecutive_assistant_messages(&history).is_none());
|
||||
|
||||
// Test with consecutive at start
|
||||
let history = vec![
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello again".to_string()),
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
];
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((0, 1)));
|
||||
|
||||
// Test with consecutive in middle
|
||||
let history = vec![
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello again".to_string()),
|
||||
Message::new(MessageRole::User, "Bye".to_string()),
|
||||
];
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
|
||||
|
||||
// Test with consecutive at end
|
||||
let history = vec![
|
||||
Message::new(MessageRole::User, "Hi".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello".to_string()),
|
||||
Message::new(MessageRole::Assistant, "Hello again".to_string()),
|
||||
];
|
||||
assert_eq!(has_consecutive_assistant_messages(&history), Some((1, 2)));
|
||||
}
|
||||
|
||||
/// Test that simulates the exact session log pattern from the bug report
|
||||
#[test]
|
||||
fn test_session_log_pattern_from_bug_report() {
|
||||
// This test recreates the exact pattern seen in the butler session log:
|
||||
// - Index 368: assistant message with duplicated content
|
||||
// - Index 369: assistant message with same duplicated content (consecutive!)
|
||||
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Simulate the conversation leading up to the bug
|
||||
context.add_message(Message::new(MessageRole::System, "You are butler.".to_string()));
|
||||
|
||||
// ... many messages ...
|
||||
for i in 0..50 {
|
||||
context.add_message(Message::new(MessageRole::User, format!("Task {}", i)));
|
||||
context.add_message(Message::new(MessageRole::Assistant, format!("Response {}", i)));
|
||||
}
|
||||
|
||||
// Now simulate the buggy pattern
|
||||
context.add_message(Message::new(MessageRole::User, "Task: move appa_music into appa".to_string()));
|
||||
|
||||
// Tool call
|
||||
context.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
r#"{"tool": "shell", "args": {"command": "mv ~/Desktop/appa_music ~/icloud/appa/music"}}"#.to_string(),
|
||||
));
|
||||
context.add_message(Message::new(MessageRole::User, "Tool result: ✓ Moved".to_string()));
|
||||
|
||||
// The bug: two consecutive assistant messages with duplicated content
|
||||
let duplicated_content = "Done! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?\n\nDone! ✅\n\nappa/\n├── archive/\n├── docs/\n├── music/ 🎵 NEW\n\nAnything else?";
|
||||
|
||||
// First add (line 2529)
|
||||
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
|
||||
// Second add (line 2772) - THE BUG
|
||||
context.add_message(Message::new(MessageRole::Assistant, duplicated_content.to_string()));
|
||||
|
||||
// Verify the bug pattern exists
|
||||
let consecutive = has_consecutive_assistant_messages(&context.conversation_history);
|
||||
assert!(
|
||||
consecutive.is_some(),
|
||||
"Should detect consecutive assistant messages from bug pattern"
|
||||
);
|
||||
|
||||
// The consecutive messages should be the last two
|
||||
let (idx1, idx2) = consecutive.unwrap();
|
||||
let history_len = context.conversation_history.len();
|
||||
assert_eq!(idx1, history_len - 2, "First consecutive should be second-to-last");
|
||||
assert_eq!(idx2, history_len - 1, "Second consecutive should be last");
|
||||
}
|
||||
|
||||
/// Test that the context window could have a guard against consecutive assistant messages
|
||||
/// This is a proposed fix validation test
|
||||
#[test]
|
||||
fn test_proposed_fix_guard_against_consecutive() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(MessageRole::System, "System".to_string()));
|
||||
context.add_message(Message::new(MessageRole::User, "User".to_string()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, "First response".to_string()));
|
||||
|
||||
// Proposed fix: before adding an assistant message, check if the last message
|
||||
// is also an assistant message. If so, either:
|
||||
// 1. Skip adding (if content is identical)
|
||||
// 2. Merge the content (if content is different)
|
||||
// 3. Log a warning and skip (safest option)
|
||||
|
||||
let last_message = context.conversation_history.last();
|
||||
let is_last_assistant = last_message
|
||||
.map(|m| matches!(m.role, MessageRole::Assistant))
|
||||
.unwrap_or(false);
|
||||
|
||||
assert!(
|
||||
is_last_assistant,
|
||||
"Last message should be assistant for this test"
|
||||
);
|
||||
|
||||
// The fix would check this condition before adding
|
||||
// and skip adding if true (to prevent consecutive assistant messages)
|
||||
}
|
||||
@@ -1,192 +0,0 @@
|
||||
//! Tests for the early return path in stream_completion_with_tools
|
||||
//!
|
||||
//! This test documents the bug fix for missing assistant messages when:
|
||||
//! - The LLM responds with text only (no tool calls)
|
||||
//! - any_tool_executed = false (no tools in previous iterations)
|
||||
//! - The stream finishes and hits the early return path
|
||||
//!
|
||||
//! The fix adds assistant message saving before the early return at ~line 2535.
|
||||
|
||||
use g3_core::context_window::ContextWindow;
|
||||
use g3_providers::{Message, MessageRole};
|
||||
|
||||
/// Simulates the early return path logic
|
||||
/// This mirrors the code at lines 2535-2555 in lib.rs
|
||||
fn simulate_early_return_path(
|
||||
context_window: &mut ContextWindow,
|
||||
current_response: &str,
|
||||
assistant_message_added: bool,
|
||||
any_tool_executed: bool,
|
||||
) -> bool {
|
||||
// This simulates the code path when stream finishes
|
||||
|
||||
if any_tool_executed {
|
||||
// If tools were executed in previous iterations, break to finalize
|
||||
if !current_response.trim().is_empty() && !assistant_message_added {
|
||||
let assistant_msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
current_response.to_string(),
|
||||
);
|
||||
context_window.add_message(assistant_msg);
|
||||
return true; // assistant_message_added = true
|
||||
}
|
||||
return assistant_message_added;
|
||||
}
|
||||
|
||||
// THE FIX: Save assistant message before returning (no tools were executed)
|
||||
if !current_response.trim().is_empty() && !assistant_message_added {
|
||||
let assistant_msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
current_response.to_string(),
|
||||
);
|
||||
context_window.add_message(assistant_msg);
|
||||
return true;
|
||||
}
|
||||
|
||||
assistant_message_added
|
||||
}
|
||||
|
||||
/// Test: Text-only response with no previous tool execution
|
||||
/// This is the exact bug scenario from butler session
|
||||
#[test]
|
||||
fn test_text_only_response_no_tools() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Add initial user message
|
||||
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
|
||||
|
||||
let current_response = "Phew! 😅 Glad it's back. Sorry about that...";
|
||||
let assistant_message_added = false;
|
||||
let any_tool_executed = false;
|
||||
|
||||
let was_added = simulate_early_return_path(
|
||||
&mut context,
|
||||
current_response,
|
||||
assistant_message_added,
|
||||
any_tool_executed,
|
||||
);
|
||||
|
||||
assert!(was_added, "Assistant message should be added");
|
||||
|
||||
let history = context.conversation_history.clone();
|
||||
assert_eq!(history.len(), 2, "Should have user + assistant messages");
|
||||
assert!(matches!(history[1].role, MessageRole::Assistant));
|
||||
assert!(history[1].content.contains("Phew!"));
|
||||
}
|
||||
|
||||
/// Test: Text-only response with previous tool execution
|
||||
#[test]
|
||||
fn test_text_only_response_with_previous_tools() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(MessageRole::User, "Run ls".to_string()));
|
||||
|
||||
let current_response = "Here are the files...";
|
||||
let assistant_message_added = false;
|
||||
let any_tool_executed = true; // Tools were executed in previous iterations
|
||||
|
||||
let was_added = simulate_early_return_path(
|
||||
&mut context,
|
||||
current_response,
|
||||
assistant_message_added,
|
||||
any_tool_executed,
|
||||
);
|
||||
|
||||
assert!(was_added, "Assistant message should be added");
|
||||
|
||||
let history = context.conversation_history.clone();
|
||||
assert_eq!(history.len(), 2);
|
||||
assert!(matches!(history[1].role, MessageRole::Assistant));
|
||||
}
|
||||
|
||||
/// Test: Empty response should not add message
|
||||
#[test]
|
||||
fn test_empty_response_not_added() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
|
||||
|
||||
let current_response = " "; // Whitespace only
|
||||
let assistant_message_added = false;
|
||||
let any_tool_executed = false;
|
||||
|
||||
let was_added = simulate_early_return_path(
|
||||
&mut context,
|
||||
current_response,
|
||||
assistant_message_added,
|
||||
any_tool_executed,
|
||||
);
|
||||
|
||||
assert!(!was_added, "Empty response should not be added");
|
||||
|
||||
let history = context.conversation_history.clone();
|
||||
assert_eq!(history.len(), 1, "Should only have user message");
|
||||
}
|
||||
|
||||
/// Test: Already added flag prevents duplication
|
||||
#[test]
|
||||
fn test_already_added_prevents_duplication() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
context.add_message(Message::new(MessageRole::User, "Hello".to_string()));
|
||||
context.add_message(Message::new(MessageRole::Assistant, "Hi there!".to_string()));
|
||||
|
||||
let current_response = "Hi there!";
|
||||
let assistant_message_added = true; // Already added
|
||||
let any_tool_executed = false;
|
||||
|
||||
let was_added = simulate_early_return_path(
|
||||
&mut context,
|
||||
current_response,
|
||||
assistant_message_added,
|
||||
any_tool_executed,
|
||||
);
|
||||
|
||||
assert!(was_added, "Flag should remain true");
|
||||
|
||||
let history = context.conversation_history.clone();
|
||||
assert_eq!(history.len(), 2, "Should not add duplicate");
|
||||
}
|
||||
|
||||
/// Test: Consecutive user messages scenario (the bug)
|
||||
/// Before the fix, this would result in consecutive user messages
|
||||
#[test]
|
||||
fn test_bug_scenario_consecutive_user_messages() {
|
||||
let mut context = ContextWindow::new(200_000);
|
||||
|
||||
// Simulate the butler session bug:
|
||||
// Message 80: Tool result (user)
|
||||
// Message 81: User input (user) - SHOULD have assistant between!
|
||||
// Message 82: User input (user)
|
||||
|
||||
context.add_message(Message::new(MessageRole::User, "Tool result: Self care".to_string()));
|
||||
|
||||
// Simulate assistant response that was displayed but not saved (THE BUG)
|
||||
let current_response = "Phew! 😅 Glad it's back...";
|
||||
let assistant_message_added = false;
|
||||
let any_tool_executed = false;
|
||||
|
||||
// THE FIX: This should now save the assistant message
|
||||
simulate_early_return_path(
|
||||
&mut context,
|
||||
current_response,
|
||||
assistant_message_added,
|
||||
any_tool_executed,
|
||||
);
|
||||
|
||||
// Now add the next user message
|
||||
context.add_message(Message::new(MessageRole::User, "Task: Ok it's back...".to_string()));
|
||||
|
||||
let history = context.conversation_history.clone();
|
||||
|
||||
// Verify no consecutive user messages
|
||||
for i in 0..history.len().saturating_sub(1) {
|
||||
let current_is_user = matches!(history[i].role, MessageRole::User);
|
||||
let next_is_user = matches!(history[i + 1].role, MessageRole::User);
|
||||
assert!(
|
||||
!(current_is_user && next_is_user),
|
||||
"Found consecutive user messages at positions {} and {}",
|
||||
i, i + 1
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
//! Tests for the missing assistant message bug fix.
|
||||
//!
|
||||
//! Bug: When the LLM responds with text-only (no tool calls), the assistant message
|
||||
//! was sometimes not saved to the context window because the code checked
|
||||
//! `raw_clean.trim().is_empty()` after already confirming `current_response` had content.
|
||||
//! If the parser buffer was empty/different from current_response, no message was added.
|
||||
//!
|
||||
//! Fix: Use current_response as fallback when raw_clean is empty.
|
||||
|
||||
/// Test that the fix ensures assistant messages are always added when there's content.
|
||||
/// This simulates the fixed behavior where current_response is used as fallback.
|
||||
#[test]
|
||||
fn test_fallback_to_current_response_when_raw_empty() {
|
||||
// Simulate the scenario:
|
||||
// - current_response has content (what was displayed)
|
||||
// - raw_clean is empty (parser buffer was cleared)
|
||||
// - The fix should use current_response as fallback
|
||||
|
||||
let current_response = "Here's my helpful response!";
|
||||
let raw_clean = ""; // Parser buffer is empty
|
||||
|
||||
// The fix logic:
|
||||
let content_to_save = if !raw_clean.trim().is_empty() {
|
||||
raw_clean.to_string()
|
||||
} else {
|
||||
current_response.to_string()
|
||||
};
|
||||
|
||||
// Verify fallback works
|
||||
assert_eq!(content_to_save, current_response);
|
||||
assert!(!content_to_save.is_empty());
|
||||
}
|
||||
|
||||
/// Test that raw_clean is preferred when available.
|
||||
#[test]
|
||||
fn test_prefer_raw_clean_when_available() {
|
||||
let current_response = "Filtered response"; // What was displayed (filtered)
|
||||
let raw_clean = "Raw response with {\"tool\": ...} JSON"; // Raw content
|
||||
|
||||
// The fix logic:
|
||||
let content_to_save = if !raw_clean.trim().is_empty() {
|
||||
raw_clean.to_string()
|
||||
} else {
|
||||
current_response.to_string()
|
||||
};
|
||||
|
||||
// Verify raw_clean is preferred
|
||||
assert_eq!(content_to_save, raw_clean);
|
||||
}
|
||||
|
||||
/// Test that whitespace-only raw_clean triggers fallback.
|
||||
#[test]
|
||||
fn test_whitespace_raw_clean_triggers_fallback() {
|
||||
let current_response = "Actual content";
|
||||
let raw_clean = " \n\t "; // Whitespace only
|
||||
|
||||
// The fix logic:
|
||||
let content_to_save = if !raw_clean.trim().is_empty() {
|
||||
raw_clean.to_string()
|
||||
} else {
|
||||
current_response.to_string()
|
||||
};
|
||||
|
||||
// Verify fallback to current_response
|
||||
assert_eq!(content_to_save, current_response);
|
||||
}
|
||||
|
||||
/// Test that the fix logic handles various edge cases.
|
||||
#[test]
|
||||
fn test_fix_logic_edge_cases() {
|
||||
// Test case 1: Both have content - prefer raw
|
||||
let current = "displayed";
|
||||
let raw = "raw content";
|
||||
let result = if !raw.trim().is_empty() { raw } else { current };
|
||||
assert_eq!(result, "raw content");
|
||||
|
||||
// Test case 2: Raw is empty - use current
|
||||
let current = "displayed";
|
||||
let raw = "";
|
||||
let result = if !raw.trim().is_empty() { raw } else { current };
|
||||
assert_eq!(result, "displayed");
|
||||
|
||||
// Test case 3: Raw is whitespace - use current
|
||||
let current = "displayed";
|
||||
let raw = " \n ";
|
||||
let result = if !raw.trim().is_empty() { raw } else { current };
|
||||
assert_eq!(result, "displayed");
|
||||
|
||||
// Test case 4: Both empty - still returns current (empty)
|
||||
let current = "";
|
||||
let raw = "";
|
||||
let result = if !raw.trim().is_empty() { raw } else { current };
|
||||
assert_eq!(result, "");
|
||||
|
||||
// Test case 5: Current has content, raw has only newlines
|
||||
let current = "Hello world!";
|
||||
let raw = "\n\n\n";
|
||||
let result = if !raw.trim().is_empty() { raw } else { current };
|
||||
assert_eq!(result, "Hello world!");
|
||||
}
|
||||
Reference in New Issue
Block a user