From d61be719c210c8dcafa82de4d8da74af12196de9 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Wed, 11 Feb 2026 15:22:03 +1100 Subject: [PATCH] fix: strip orphaned tool_calls from preserved assistant message during compaction After context compaction, the preserved last assistant message retained its structured tool_calls field, but the corresponding tool_result was summarized away. This created orphaned tool_use blocks that violated the Anthropic API constraint: 'Each tool_use block must have a corresponding tool_result block in the next message', causing 400 errors. Primary fix: clear tool_calls from the preserved assistant message in extract_preserved_messages(). The tool call was already executed and its result is captured in the summary. Defense-in-depth: added strip_orphaned_tool_use() post-processing in Anthropic convert_messages() to detect and strip any orphaned tool_use blocks before they reach the API. Added 7 tests: 3 unit tests for compaction stripping, 3 unit tests for Anthropic orphan detection, 1 integration test reproducing the exact bug scenario from the h3 session. --- analysis/memory.md | 14 +- crates/g3-core/src/context_window.rs | 148 +++++++++++++++- crates/g3-core/tests/compaction_test.rs | 135 +++++++++++++++ crates/g3-providers/src/anthropic.rs | 213 ++++++++++++++++++++++++ 4 files changed, 506 insertions(+), 4 deletions(-) diff --git a/analysis/memory.md b/analysis/memory.md index 52bddaf..611c2ce 100644 --- a/analysis/memory.md +++ b/analysis/memory.md @@ -1,5 +1,5 @@ # Workspace Memory -> Updated: 2026-02-10T21:08:38Z | Size: 27.8k chars +> Updated: 2026-02-11T03:39:03Z | Size: 29.0k chars ### Remember Tool Wiring - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()` @@ -442,4 +442,14 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r - `crates/g3-core/src/context_window.rs` [107..109] - `add_message_with_tokens()` preserves messages with `tool_calls` even if content is empty - `crates/g3-core/src/streaming_parser.rs` [339] - `process_chunk()` preserves tool call `id` from provider -**Bug fixed**: Agent would stop mid-task because native tool calls were stored as inline JSON text in `Message.content`. When sent back to Anthropic API via `convert_messages()`, they went as plain text instead of structured `tool_use`/`tool_result` blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism. \ No newline at end of file +**Bug fixed**: Agent would stop mid-task because native tool calls were stored as inline JSON text in `Message.content`. When sent back to Anthropic API via `convert_messages()`, they went as plain text instead of structured `tool_use`/`tool_result` blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism. + +### Compaction Tool Call Stripping Fix (2026-02-11) +- `crates/g3-core/src/context_window.rs` [339..355] - `extract_preserved_messages()` now strips `tool_calls` from preserved last assistant message + - **Root cause**: After compaction, preserved assistant message retained structured `tool_calls` but the corresponding `tool_result` was summarized away → orphaned `tool_use` blocks → Anthropic 400 error + - **Fix**: Clear `msg.tool_calls` in `extract_preserved_messages()` before returning + - Messages with only tool_calls and empty content are dropped by `add_message_with_tokens()` empty check +- `crates/g3-providers/src/anthropic.rs` [369..435] - `strip_orphaned_tool_use()` defense-in-depth + - Post-processing pass in `convert_messages()` detects orphaned `tool_use` blocks (no matching `tool_result` in next message) + - Strips orphaned blocks with warning, adds placeholder text if message becomes empty +- Tests: `test_compaction_strips_tool_calls_from_last_assistant`, `test_compaction_drops_assistant_with_only_tool_calls_no_text`, `test_compaction_preserves_normal_assistant_message` (unit), `test_strip_orphaned_tool_use_*` (anthropic), `test_compaction_strips_structured_tool_calls` (integration) \ No newline at end of file diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs index 814a90d..8009a74 100644 --- a/crates/g3-core/src/context_window.rs +++ b/crates/g3-core/src/context_window.rs @@ -334,13 +334,24 @@ Format this as a detailed but concise summary that can be used to resume the con } }); - // Find the last assistant message in the conversation + // Find the last assistant message in the conversation. + // IMPORTANT: Strip tool_calls from the preserved message. After compaction, + // the tool_result messages are summarized away, so keeping tool_calls would + // create orphaned tool_use blocks that violate the Anthropic API constraint: + // "Each tool_use block must have a corresponding tool_result block in the next message." let last_assistant_message = self .conversation_history .iter() .rev() .find(|m| matches!(m.role, MessageRole::Assistant)) - .cloned(); + .map(|m| { + let mut msg = m.clone(); + if !msg.tool_calls.is_empty() { + debug!("Stripping {} tool_calls from preserved assistant message during compaction", msg.tool_calls.len()); + msg.tool_calls.clear(); + } + msg + }); PreservedMessages { system_prompt, @@ -767,6 +778,7 @@ impl ThinResult { #[cfg(test)] mod tests { use super::*; + use g3_providers::MessageToolCall; #[test] fn test_new_context_window() { @@ -858,4 +870,136 @@ mod tests { assert_eq!(ThinScope::FirstThird.error_action(), "thinning"); assert_eq!(ThinScope::All.error_action(), "skinnifying"); } + + // ==================================================================== + // Compaction: tool_call stripping tests + // ==================================================================== + + /// Helper to create a Message with tool_calls + fn assistant_msg_with_tool_calls(content: &str, tool_call_ids: &[&str]) -> Message { + let mut msg = Message::new(MessageRole::Assistant, content.to_string()); + msg.tool_calls = tool_call_ids + .iter() + .map(|id| MessageToolCall { + id: id.to_string(), + name: "read_file".to_string(), + input: serde_json::json!({"file_path": "/tmp/test.rs"}), + }) + .collect(); + msg + } + + #[test] + fn test_compaction_strips_tool_calls_from_last_assistant() { + // Reproduce the exact bug: assistant message with tool_calls gets preserved + // across compaction, creating orphaned tool_use blocks. + let mut cw = ContextWindow::new(100_000); + + // Build a conversation: system, user, assistant(with tool_call), user(tool_result), user(new input) + cw.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string())); + cw.add_message(Message::new(MessageRole::User, "Read the file please.".to_string())); + cw.add_message(assistant_msg_with_tool_calls( + "Let me read that file for you.", + &["toolu_01QRFL8vGKDjZZkfHR586Srb"], + )); + let mut tool_result = Message::new(MessageRole::User, "Tool result: file contents here...".to_string()); + tool_result.tool_result_id = Some("toolu_01QRFL8vGKDjZZkfHR586Srb".to_string()); + cw.add_message(tool_result); + + // Now compact + cw.reset_with_summary( + "Summary: user asked to read a file, assistant read it.".to_string(), + Some("Now do something else.".to_string()), + ); + + // Find the preserved assistant message + let assistant_msgs: Vec<&Message> = cw + .conversation_history + .iter() + .filter(|m| matches!(m.role, MessageRole::Assistant)) + .collect(); + + assert_eq!(assistant_msgs.len(), 1, "Should have exactly one assistant message"); + let preserved = assistant_msgs[0]; + + // The key assertion: tool_calls must be stripped + assert!( + preserved.tool_calls.is_empty(), + "tool_calls should be stripped from preserved assistant message, but found: {:?}", + preserved.tool_calls + ); + + // Text content should be preserved + assert!(preserved.content.contains("Let me read that file")); + } + + #[test] + fn test_compaction_drops_assistant_with_only_tool_calls_no_text() { + // Edge case: assistant message has tool_calls but empty content. + // After stripping tool_calls, the message is empty and should be dropped. + let mut cw = ContextWindow::new(100_000); + + cw.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string())); + cw.add_message(Message::new(MessageRole::User, "Do something.".to_string())); + + // Assistant message with tool_calls but empty text content + let mut assistant = Message::new(MessageRole::Assistant, "".to_string()); + assistant.tool_calls = vec![MessageToolCall { + id: "toolu_abc123".to_string(), + name: "shell".to_string(), + input: serde_json::json!({"command": "ls"}), + }]; + // Force-add it (bypassing the empty check since it has tool_calls) + cw.conversation_history.push(assistant); + + let mut tool_result = Message::new(MessageRole::User, "Tool result: file1 file2".to_string()); + tool_result.tool_result_id = Some("toolu_abc123".to_string()); + cw.add_message(tool_result); + + // Compact + cw.reset_with_summary( + "Summary: ran ls command.".to_string(), + Some("What next?".to_string()), + ); + + // The empty assistant message (after tool_call stripping) should be dropped + let assistant_msgs: Vec<&Message> = cw + .conversation_history + .iter() + .filter(|m| matches!(m.role, MessageRole::Assistant)) + .collect(); + + assert_eq!( + assistant_msgs.len(), 0, + "Empty assistant message (after tool_call stripping) should be dropped" + ); + } + + #[test] + fn test_compaction_preserves_normal_assistant_message() { + // Normal case: assistant message without tool_calls should be preserved as-is. + let mut cw = ContextWindow::new(100_000); + + cw.add_message(Message::new(MessageRole::System, "You are a helpful assistant.".to_string())); + cw.add_message(Message::new(MessageRole::User, "Hello!".to_string())); + cw.add_message(Message::new( + MessageRole::Assistant, + "Hello! How can I help you today?".to_string(), + )); + + cw.reset_with_summary( + "Summary: greeting exchange.".to_string(), + Some("Tell me a joke.".to_string()), + ); + + let assistant_msgs: Vec<&Message> = cw + .conversation_history + .iter() + .filter(|m| matches!(m.role, MessageRole::Assistant)) + .collect(); + + assert_eq!(assistant_msgs.len(), 1); + assert!(assistant_msgs[0].tool_calls.is_empty()); + assert!(assistant_msgs[0].content.contains("Hello! How can I help you today?")); + } } diff --git a/crates/g3-core/tests/compaction_test.rs b/crates/g3-core/tests/compaction_test.rs index b36dbe6..7a22a1a 100644 --- a/crates/g3-core/tests/compaction_test.rs +++ b/crates/g3-core/tests/compaction_test.rs @@ -565,3 +565,138 @@ async fn test_second_compaction_no_bloat() { eprintln!("\n✅ Second compaction maintains clean structure without bloat!"); } + +/// Test: Compaction strips structured tool_calls from preserved assistant message +/// +/// Reproduces the exact bug from the h3 session: +/// 1. Agent executes a task that triggers a native tool call (read_file) +/// 2. The assistant message is stored with structured `tool_calls` field +/// 3. Compaction preserves the last assistant message +/// 4. The tool_result message is summarized away +/// 5. Next API call would fail with "tool_use ids were found without tool_result blocks" +/// +/// After the fix, compaction strips tool_calls from the preserved assistant message. +#[tokio::test] +async fn test_compaction_strips_structured_tool_calls() { + use g3_providers::MessageToolCall; + + let provider = MockProvider::new() + .with_native_tool_calling(true) + // Response 1: Summary for compaction + .with_response(MockResponse::text( + "Summary: User asked to read a file. Assistant read test_file.txt which contained a greeting.", + )) + // Response 2: Post-compaction response (this would fail with 400 if tool_calls leaked) + .with_response(MockResponse::text( + "Continuing after compaction. What would you like to do next?", + )); + + let (mut agent, _agent_temp) = create_agent_with_mock(provider).await; + + // Directly build the exact conversation state that triggers the bug: + // The last assistant message has structured tool_calls, followed by a tool_result, + // but the LAST message in the conversation is the assistant with tool_calls + // (simulating the case where compaction happens mid-tool-execution or the + // last assistant response was a tool call). + + // User asks to read a file + agent.add_message_to_context(Message::new( + MessageRole::User, + "Please read the recognize.rs file".to_string(), + )); + + // Assistant responds with text + structured tool_call (this will be the LAST assistant message) + let mut assistant_with_tool = Message::new( + MessageRole::Assistant, + "You're right — the recognizer should serve the corpus. Let me research what it takes.".to_string(), + ); + assistant_with_tool.tool_calls.push(MessageToolCall { + id: "toolu_01QRFL8vGKDjZZkfHR586Srb".to_string(), + name: "read_file".to_string(), + input: serde_json::json!({"file_path": "/tmp/recognize.rs"}), + }); + agent.add_message_to_context(assistant_with_tool); + + // Tool result follows + let mut tool_result = Message::new( + MessageRole::User, + "Tool result: pub fn recognize(lexemes: &[Lexeme]) -> Result { ... }".to_string(), + ); + tool_result.tool_result_id = Some("toolu_01QRFL8vGKDjZZkfHR586Srb".to_string()); + agent.add_message_to_context(tool_result); + + // Verify the pre-compaction state + let history_before = agent.get_context_window().conversation_history.clone(); + eprintln!("\n=== Before compaction ==="); + for (i, msg) in history_before.iter().enumerate() { + eprintln!( + " {}: {:?} tool_calls={} tool_result_id={:?} content={}...", + i, + msg.role, + msg.tool_calls.len(), + msg.tool_result_id, + msg.content.chars().take(60).collect::() + ); + } + + // Verify: last assistant message has tool_calls + let last_assistant = history_before.iter().rev() + .find(|m| matches!(m.role, MessageRole::Assistant)) + .expect("Should have assistant message"); + assert_eq!(last_assistant.tool_calls.len(), 1, "Last assistant should have 1 tool_call"); + assert_eq!(last_assistant.tool_calls[0].id, "toolu_01QRFL8vGKDjZZkfHR586Srb"); + + // Trigger compaction + let compact_result = agent.force_compact().await; + assert!(compact_result.is_ok(), "Compaction should succeed: {:?}", compact_result.err()); + + // Verify: no assistant messages with tool_calls after compaction + let history_after = &agent.get_context_window().conversation_history; + + eprintln!("\n=== After compaction ==="); + for (i, msg) in history_after.iter().enumerate() { + eprintln!( + " {}: {:?} tool_calls={} tool_result_id={:?} content={}...", + i, + msg.role, + msg.tool_calls.len(), + msg.tool_result_id, + msg.content.chars().take(60).collect::() + ); + } + + let orphaned_tool_calls: Vec<_> = history_after + .iter() + .enumerate() + .filter(|(_, m)| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty()) + .collect(); + + assert!( + orphaned_tool_calls.is_empty(), + "After compaction, no assistant messages should have tool_calls. Found {} orphaned: {:?}", + orphaned_tool_calls.len(), + orphaned_tool_calls.iter().map(|(i, m)| { + format!("msg[{}]: {} tool_calls", i, m.tool_calls.len()) + }).collect::>() + ); + + // Verify the preserved assistant message has text content but no tool_calls + let preserved_assistant = history_after.iter() + .find(|m| matches!(m.role, MessageRole::Assistant)) + .expect("Should have preserved assistant message after compaction"); + assert!(preserved_assistant.tool_calls.is_empty(), + "Preserved assistant message should have tool_calls stripped"); + assert!(preserved_assistant.content.contains("recognizer should serve the corpus"), + "Preserved assistant message should retain text content"); + + // Execute another task post-compaction to verify the conversation is valid + // (this would fail with Anthropic 400 error if tool_calls leaked through) + let post_compact_result = agent.execute_task("What should we do next?", None, false).await; + assert!( + post_compact_result.is_ok(), + "Post-compaction task should succeed (no orphaned tool_use blocks): {:?}", + post_compact_result.err() + ); + + eprintln!("\n✅ Compaction correctly strips structured tool_calls - no orphaned tool_use blocks!"); +} diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs index ab97396..471c922 100644 --- a/crates/g3-providers/src/anthropic.rs +++ b/crates/g3-providers/src/anthropic.rs @@ -360,9 +360,80 @@ impl AnthropicProvider { } } + // Defense-in-depth: strip orphaned tool_use blocks that have no matching tool_result + Self::strip_orphaned_tool_use(&mut anthropic_messages); + Ok((system_message, anthropic_messages)) } + /// Strip orphaned tool_use blocks from assistant messages that have no matching + /// tool_result in the immediately following user message. + /// + /// Anthropic API requires: "Each tool_use block must have a corresponding tool_result + /// block in the next message." This can happen after context compaction when the + /// last assistant message had tool_calls but the tool_result was summarized away. + fn strip_orphaned_tool_use(messages: &mut Vec) { + // Collect tool_result IDs from each user message, indexed by position + let tool_result_ids_by_pos: Vec>> = messages + .iter() + .map(|msg| { + if msg.role == "user" { + let ids: Vec = msg + .content + .iter() + .filter_map(|c| match c { + AnthropicContent::ToolResult { tool_use_id, .. } => { + Some(tool_use_id.clone()) + } + _ => None, + }) + .collect(); + if ids.is_empty() { None } else { Some(ids) } + } else { + None + } + }) + .collect(); + + for i in 0..messages.len() { + if messages[i].role != "assistant" { + continue; + } + let has_tool_use = messages[i].content.iter().any(|c| matches!(c, AnthropicContent::ToolUse { .. })); + if !has_tool_use { + continue; + } + + // Check if next message is a user message with tool_result blocks + let next_has_results = i + 1 < messages.len() + && tool_result_ids_by_pos.get(i + 1).and_then(|v| v.as_ref()).is_some(); + + if !next_has_results { + let tool_use_ids: Vec = messages[i] + .content + .iter() + .filter_map(|c| match c { + AnthropicContent::ToolUse { id, .. } => Some(id.clone()), + _ => None, + }) + .collect(); + tracing::warn!( + "Stripping {} orphaned tool_use block(s) from assistant message {}: {:?}", + tool_use_ids.len(), i, tool_use_ids + ); + messages[i].content.retain(|c| !matches!(c, AnthropicContent::ToolUse { .. })); + + // If stripping left the message empty, add placeholder text + if messages[i].content.is_empty() { + messages[i].content.push(AnthropicContent::Text { + text: "(continued)".to_string(), + cache_control: None, + }); + } + } + } + } + fn create_request_body( &self, messages: &[Message], @@ -1310,4 +1381,146 @@ mod tests { assert_eq!(text_content.len(), 1); assert_eq!(text_content[0], "Here is my response."); } + + // ==================================================================== + // Orphaned tool_use stripping tests + // ==================================================================== + + #[test] + fn test_strip_orphaned_tool_use_removes_orphaned_blocks() { + // Simulate: assistant with tool_use, followed by regular user message (no tool_result) + let mut messages = vec![ + AnthropicMessage { + role: "user".to_string(), + content: vec![AnthropicContent::Text { + text: "Read the file".to_string(), + cache_control: None, + }], + }, + AnthropicMessage { + role: "assistant".to_string(), + content: vec![ + AnthropicContent::Text { + text: "Let me read that.".to_string(), + cache_control: None, + }, + AnthropicContent::ToolUse { + id: "toolu_orphaned".to_string(), + name: "read_file".to_string(), + input: serde_json::json!({"file_path": "test.rs"}), + }, + ], + }, + // Next message is a regular user message, NOT a tool_result + AnthropicMessage { + role: "user".to_string(), + content: vec![AnthropicContent::Text { + text: "Do something else".to_string(), + cache_control: None, + }], + }, + ]; + + AnthropicProvider::strip_orphaned_tool_use(&mut messages); + + // The tool_use should be stripped from the assistant message + let assistant = &messages[1]; + assert!( + !assistant.content.iter().any(|c| matches!(c, AnthropicContent::ToolUse { .. })), + "Orphaned tool_use should be stripped" + ); + // Text content should remain + assert!( + assistant.content.iter().any(|c| matches!(c, AnthropicContent::Text { .. })), + "Text content should be preserved" + ); + } + + #[test] + fn test_strip_orphaned_tool_use_preserves_valid_sequence() { + // Valid: assistant with tool_use, followed by user with matching tool_result + let mut messages = vec![ + AnthropicMessage { + role: "user".to_string(), + content: vec![AnthropicContent::Text { + text: "Read the file".to_string(), + cache_control: None, + }], + }, + AnthropicMessage { + role: "assistant".to_string(), + content: vec![ + AnthropicContent::Text { + text: "Reading...".to_string(), + cache_control: None, + }, + AnthropicContent::ToolUse { + id: "toolu_valid".to_string(), + name: "read_file".to_string(), + input: serde_json::json!({"file_path": "test.rs"}), + }, + ], + }, + AnthropicMessage { + role: "user".to_string(), + content: vec![AnthropicContent::ToolResult { + tool_use_id: "toolu_valid".to_string(), + content: "file contents".to_string(), + cache_control: None, + }], + }, + ]; + + AnthropicProvider::strip_orphaned_tool_use(&mut messages); + + // tool_use should NOT be stripped + let assistant = &messages[1]; + assert!( + assistant.content.iter().any(|c| matches!(c, AnthropicContent::ToolUse { .. })), + "Valid tool_use should be preserved" + ); + } + + #[test] + fn test_strip_orphaned_tool_use_adds_placeholder_for_empty_message() { + // Assistant message with ONLY a tool_use block (no text) + let mut messages = vec![ + AnthropicMessage { + role: "user".to_string(), + content: vec![AnthropicContent::Text { + text: "Do something".to_string(), + cache_control: None, + }], + }, + AnthropicMessage { + role: "assistant".to_string(), + content: vec![AnthropicContent::ToolUse { + id: "toolu_only".to_string(), + name: "shell".to_string(), + input: serde_json::json!({"command": "ls"}), + }], + }, + AnthropicMessage { + role: "user".to_string(), + content: vec![AnthropicContent::Text { + text: "Never mind".to_string(), + cache_control: None, + }], + }, + ]; + + AnthropicProvider::strip_orphaned_tool_use(&mut messages); + + // Should have placeholder text instead of empty content + let assistant = &messages[1]; + assert!(!assistant.content.is_empty(), "Should not have empty content"); + assert!( + assistant.content.iter().any(|c| matches!(c, AnthropicContent::Text { .. })), + "Should have placeholder text" + ); + assert!( + !assistant.content.iter().any(|c| matches!(c, AnthropicContent::ToolUse { .. })), + "tool_use should be stripped" + ); + } }