fix: strip orphaned tool_calls from preserved assistant message during compaction
After context compaction, the preserved last assistant message retained its structured tool_calls field, but the corresponding tool_result was summarized away. This created orphaned tool_use blocks that violated the Anthropic API constraint: 'Each tool_use block must have a corresponding tool_result block in the next message', causing 400 errors. Primary fix: clear tool_calls from the preserved assistant message in extract_preserved_messages(). The tool call was already executed and its result is captured in the summary. Defense-in-depth: added strip_orphaned_tool_use() post-processing in Anthropic convert_messages() to detect and strip any orphaned tool_use blocks before they reach the API. Added 7 tests: 3 unit tests for compaction stripping, 3 unit tests for Anthropic orphan detection, 1 integration test reproducing the exact bug scenario from the h3 session.
This commit is contained in:
@@ -334,13 +334,24 @@ Format this as a detailed but concise summary that can be used to resume the con
|
||||
}
|
||||
});
|
||||
|
||||
// Find the last assistant message in the conversation
|
||||
// Find the last assistant message in the conversation.
|
||||
// IMPORTANT: Strip tool_calls from the preserved message. After compaction,
|
||||
// the tool_result messages are summarized away, so keeping tool_calls would
|
||||
// create orphaned tool_use blocks that violate the Anthropic API constraint:
|
||||
// "Each tool_use block must have a corresponding tool_result block in the next message."
|
||||
let last_assistant_message = self
|
||||
.conversation_history
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|m| matches!(m.role, MessageRole::Assistant))
|
||||
.cloned();
|
||||
.map(|m| {
|
||||
let mut msg = m.clone();
|
||||
if !msg.tool_calls.is_empty() {
|
||||
debug!("Stripping {} tool_calls from preserved assistant message during compaction", msg.tool_calls.len());
|
||||
msg.tool_calls.clear();
|
||||
}
|
||||
msg
|
||||
});
|
||||
|
||||
PreservedMessages {
|
||||
system_prompt,
|
||||
@@ -767,6 +778,7 @@ impl ThinResult {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use g3_providers::MessageToolCall;
|
||||
|
||||
#[test]
|
||||
fn test_new_context_window() {
|
||||
@@ -858,4 +870,136 @@ mod tests {
|
||||
assert_eq!(ThinScope::FirstThird.error_action(), "thinning");
|
||||
assert_eq!(ThinScope::All.error_action(), "skinnifying");
|
||||
}
|
||||
|
||||
// ====================================================================
|
||||
// Compaction: tool_call stripping tests
|
||||
// ====================================================================
|
||||
|
||||
/// Helper to create a Message with tool_calls
|
||||
fn assistant_msg_with_tool_calls(content: &str, tool_call_ids: &[&str]) -> Message {
|
||||
let mut msg = Message::new(MessageRole::Assistant, content.to_string());
|
||||
msg.tool_calls = tool_call_ids
|
||||
.iter()
|
||||
.map(|id| MessageToolCall {
|
||||
id: id.to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: serde_json::json!({"file_path": "/tmp/test.rs"}),
|
||||
})
|
||||
.collect();
|
||||
msg
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compaction_strips_tool_calls_from_last_assistant() {
|
||||
// Reproduce the exact bug: assistant message with tool_calls gets preserved
|
||||
// across compaction, creating orphaned tool_use blocks.
|
||||
let mut cw = ContextWindow::new(100_000);
|
||||
|
||||
// Build a conversation: system, user, assistant(with tool_call), user(tool_result), user(new input)
|
||||
cw.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
|
||||
cw.add_message(Message::new(MessageRole::User, "Read the file please.".to_string()));
|
||||
cw.add_message(assistant_msg_with_tool_calls(
|
||||
"Let me read that file for you.",
|
||||
&["toolu_01QRFL8vGKDjZZkfHR586Srb"],
|
||||
));
|
||||
let mut tool_result = Message::new(MessageRole::User, "Tool result: file contents here...".to_string());
|
||||
tool_result.tool_result_id = Some("toolu_01QRFL8vGKDjZZkfHR586Srb".to_string());
|
||||
cw.add_message(tool_result);
|
||||
|
||||
// Now compact
|
||||
cw.reset_with_summary(
|
||||
"Summary: user asked to read a file, assistant read it.".to_string(),
|
||||
Some("Now do something else.".to_string()),
|
||||
);
|
||||
|
||||
// Find the preserved assistant message
|
||||
let assistant_msgs: Vec<&Message> = cw
|
||||
.conversation_history
|
||||
.iter()
|
||||
.filter(|m| matches!(m.role, MessageRole::Assistant))
|
||||
.collect();
|
||||
|
||||
assert_eq!(assistant_msgs.len(), 1, "Should have exactly one assistant message");
|
||||
let preserved = assistant_msgs[0];
|
||||
|
||||
// The key assertion: tool_calls must be stripped
|
||||
assert!(
|
||||
preserved.tool_calls.is_empty(),
|
||||
"tool_calls should be stripped from preserved assistant message, but found: {:?}",
|
||||
preserved.tool_calls
|
||||
);
|
||||
|
||||
// Text content should be preserved
|
||||
assert!(preserved.content.contains("Let me read that file"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compaction_drops_assistant_with_only_tool_calls_no_text() {
|
||||
// Edge case: assistant message has tool_calls but empty content.
|
||||
// After stripping tool_calls, the message is empty and should be dropped.
|
||||
let mut cw = ContextWindow::new(100_000);
|
||||
|
||||
cw.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
|
||||
cw.add_message(Message::new(MessageRole::User, "Do something.".to_string()));
|
||||
|
||||
// Assistant message with tool_calls but empty text content
|
||||
let mut assistant = Message::new(MessageRole::Assistant, "".to_string());
|
||||
assistant.tool_calls = vec![MessageToolCall {
|
||||
id: "toolu_abc123".to_string(),
|
||||
name: "shell".to_string(),
|
||||
input: serde_json::json!({"command": "ls"}),
|
||||
}];
|
||||
// Force-add it (bypassing the empty check since it has tool_calls)
|
||||
cw.conversation_history.push(assistant);
|
||||
|
||||
let mut tool_result = Message::new(MessageRole::User, "Tool result: file1 file2".to_string());
|
||||
tool_result.tool_result_id = Some("toolu_abc123".to_string());
|
||||
cw.add_message(tool_result);
|
||||
|
||||
// Compact
|
||||
cw.reset_with_summary(
|
||||
"Summary: ran ls command.".to_string(),
|
||||
Some("What next?".to_string()),
|
||||
);
|
||||
|
||||
// The empty assistant message (after tool_call stripping) should be dropped
|
||||
let assistant_msgs: Vec<&Message> = cw
|
||||
.conversation_history
|
||||
.iter()
|
||||
.filter(|m| matches!(m.role, MessageRole::Assistant))
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
assistant_msgs.len(), 0,
|
||||
"Empty assistant message (after tool_call stripping) should be dropped"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compaction_preserves_normal_assistant_message() {
|
||||
// Normal case: assistant message without tool_calls should be preserved as-is.
|
||||
let mut cw = ContextWindow::new(100_000);
|
||||
|
||||
cw.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string()));
|
||||
cw.add_message(Message::new(MessageRole::User, "Hello!".to_string()));
|
||||
cw.add_message(Message::new(
|
||||
MessageRole::Assistant,
|
||||
"Hello! How can I help you today?".to_string(),
|
||||
));
|
||||
|
||||
cw.reset_with_summary(
|
||||
"Summary: greeting exchange.".to_string(),
|
||||
Some("Tell me a joke.".to_string()),
|
||||
);
|
||||
|
||||
let assistant_msgs: Vec<&Message> = cw
|
||||
.conversation_history
|
||||
.iter()
|
||||
.filter(|m| matches!(m.role, MessageRole::Assistant))
|
||||
.collect();
|
||||
|
||||
assert_eq!(assistant_msgs.len(), 1);
|
||||
assert!(assistant_msgs[0].tool_calls.is_empty());
|
||||
assert!(assistant_msgs[0].content.contains("Hello! How can I help you today?"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -565,3 +565,138 @@ async fn test_second_compaction_no_bloat() {
|
||||
|
||||
eprintln!("\n✅ Second compaction maintains clean structure without bloat!");
|
||||
}
|
||||
|
||||
/// Test: Compaction strips structured tool_calls from preserved assistant message
|
||||
///
|
||||
/// Reproduces the exact bug from the h3 session:
|
||||
/// 1. Agent executes a task that triggers a native tool call (read_file)
|
||||
/// 2. The assistant message is stored with structured `tool_calls` field
|
||||
/// 3. Compaction preserves the last assistant message
|
||||
/// 4. The tool_result message is summarized away
|
||||
/// 5. Next API call would fail with "tool_use ids were found without tool_result blocks"
|
||||
///
|
||||
/// After the fix, compaction strips tool_calls from the preserved assistant message.
|
||||
#[tokio::test]
|
||||
async fn test_compaction_strips_structured_tool_calls() {
|
||||
use g3_providers::MessageToolCall;
|
||||
|
||||
let provider = MockProvider::new()
|
||||
.with_native_tool_calling(true)
|
||||
// Response 1: Summary for compaction
|
||||
.with_response(MockResponse::text(
|
||||
"Summary: User asked to read a file. Assistant read test_file.txt which contained a greeting.",
|
||||
))
|
||||
// Response 2: Post-compaction response (this would fail with 400 if tool_calls leaked)
|
||||
.with_response(MockResponse::text(
|
||||
"Continuing after compaction. What would you like to do next?",
|
||||
));
|
||||
|
||||
let (mut agent, _agent_temp) = create_agent_with_mock(provider).await;
|
||||
|
||||
// Directly build the exact conversation state that triggers the bug:
|
||||
// The last assistant message has structured tool_calls, followed by a tool_result,
|
||||
// but the LAST message in the conversation is the assistant with tool_calls
|
||||
// (simulating the case where compaction happens mid-tool-execution or the
|
||||
// last assistant response was a tool call).
|
||||
|
||||
// User asks to read a file
|
||||
agent.add_message_to_context(Message::new(
|
||||
MessageRole::User,
|
||||
"Please read the recognize.rs file".to_string(),
|
||||
));
|
||||
|
||||
// Assistant responds with text + structured tool_call (this will be the LAST assistant message)
|
||||
let mut assistant_with_tool = Message::new(
|
||||
MessageRole::Assistant,
|
||||
"You're right — the recognizer should serve the corpus. Let me research what it takes.".to_string(),
|
||||
);
|
||||
assistant_with_tool.tool_calls.push(MessageToolCall {
|
||||
id: "toolu_01QRFL8vGKDjZZkfHR586Srb".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: serde_json::json!({"file_path": "/tmp/recognize.rs"}),
|
||||
});
|
||||
agent.add_message_to_context(assistant_with_tool);
|
||||
|
||||
// Tool result follows
|
||||
let mut tool_result = Message::new(
|
||||
MessageRole::User,
|
||||
"Tool result: pub fn recognize(lexemes: &[Lexeme]) -> Result<RecognizedStream> { ... }".to_string(),
|
||||
);
|
||||
tool_result.tool_result_id = Some("toolu_01QRFL8vGKDjZZkfHR586Srb".to_string());
|
||||
agent.add_message_to_context(tool_result);
|
||||
|
||||
// Verify the pre-compaction state
|
||||
let history_before = agent.get_context_window().conversation_history.clone();
|
||||
eprintln!("\n=== Before compaction ===");
|
||||
for (i, msg) in history_before.iter().enumerate() {
|
||||
eprintln!(
|
||||
" {}: {:?} tool_calls={} tool_result_id={:?} content={}...",
|
||||
i,
|
||||
msg.role,
|
||||
msg.tool_calls.len(),
|
||||
msg.tool_result_id,
|
||||
msg.content.chars().take(60).collect::<String>()
|
||||
);
|
||||
}
|
||||
|
||||
// Verify: last assistant message has tool_calls
|
||||
let last_assistant = history_before.iter().rev()
|
||||
.find(|m| matches!(m.role, MessageRole::Assistant))
|
||||
.expect("Should have assistant message");
|
||||
assert_eq!(last_assistant.tool_calls.len(), 1, "Last assistant should have 1 tool_call");
|
||||
assert_eq!(last_assistant.tool_calls[0].id, "toolu_01QRFL8vGKDjZZkfHR586Srb");
|
||||
|
||||
// Trigger compaction
|
||||
let compact_result = agent.force_compact().await;
|
||||
assert!(compact_result.is_ok(), "Compaction should succeed: {:?}", compact_result.err());
|
||||
|
||||
// Verify: no assistant messages with tool_calls after compaction
|
||||
let history_after = &agent.get_context_window().conversation_history;
|
||||
|
||||
eprintln!("\n=== After compaction ===");
|
||||
for (i, msg) in history_after.iter().enumerate() {
|
||||
eprintln!(
|
||||
" {}: {:?} tool_calls={} tool_result_id={:?} content={}...",
|
||||
i,
|
||||
msg.role,
|
||||
msg.tool_calls.len(),
|
||||
msg.tool_result_id,
|
||||
msg.content.chars().take(60).collect::<String>()
|
||||
);
|
||||
}
|
||||
|
||||
let orphaned_tool_calls: Vec<_> = history_after
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_, m)| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty())
|
||||
.collect();
|
||||
|
||||
assert!(
|
||||
orphaned_tool_calls.is_empty(),
|
||||
"After compaction, no assistant messages should have tool_calls. Found {} orphaned: {:?}",
|
||||
orphaned_tool_calls.len(),
|
||||
orphaned_tool_calls.iter().map(|(i, m)| {
|
||||
format!("msg[{}]: {} tool_calls", i, m.tool_calls.len())
|
||||
}).collect::<Vec<_>>()
|
||||
);
|
||||
|
||||
// Verify the preserved assistant message has text content but no tool_calls
|
||||
let preserved_assistant = history_after.iter()
|
||||
.find(|m| matches!(m.role, MessageRole::Assistant))
|
||||
.expect("Should have preserved assistant message after compaction");
|
||||
assert!(preserved_assistant.tool_calls.is_empty(),
|
||||
"Preserved assistant message should have tool_calls stripped");
|
||||
assert!(preserved_assistant.content.contains("recognizer should serve the corpus"),
|
||||
"Preserved assistant message should retain text content");
|
||||
|
||||
// Execute another task post-compaction to verify the conversation is valid
|
||||
// (this would fail with Anthropic 400 error if tool_calls leaked through)
|
||||
let post_compact_result = agent.execute_task("What should we do next?", None, false).await;
|
||||
assert!(
|
||||
post_compact_result.is_ok(),
|
||||
"Post-compaction task should succeed (no orphaned tool_use blocks): {:?}",
|
||||
post_compact_result.err()
|
||||
);
|
||||
|
||||
eprintln!("\n✅ Compaction correctly strips structured tool_calls - no orphaned tool_use blocks!");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user