fix: store tool calls structurally for proper API roundtripping
The agent would stop mid-task because native tool calls were stored as inline JSON text in Message.content. When sent back to the Anthropic API via convert_messages(), they went as plain text instead of structured tool_use/tool_result blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism. Changes: - Add MessageToolCall struct and tool_calls/tool_result_id fields to Message - Add id field to core ToolCall struct to preserve provider tool call IDs - Update Anthropic convert_messages() to emit tool_use and tool_result blocks - Add ToolResult variant to AnthropicContent enum - Store tool calls structurally in tool message construction (not inline JSON) - Fix add_message() to preserve empty-content messages with tool_calls - Fix check_duplicate_in_previous_message() to check structured tool_calls - Generate valid IDs for JSON fallback tool calls (Anthropic pattern requirement) - Update planner create_tool_message() to use structured tool calls
This commit is contained in:
@@ -103,7 +103,8 @@ impl ContextWindow {
|
||||
|
||||
/// Add a message with optional token count from the provider
|
||||
pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option<u32>) {
|
||||
if message.content.trim().is_empty() {
|
||||
// Skip truly empty messages, but keep messages that have structured tool calls or tool results
|
||||
if message.content.trim().is_empty() && message.tool_calls.is_empty() && message.tool_result_id.is_none() {
|
||||
warn!("Skipping empty message to avoid API error");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -82,6 +82,10 @@ pub use paths::{
|
||||
pub struct ToolCall {
|
||||
pub tool: String,
|
||||
pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments
|
||||
/// Unique ID for this tool call (from native tool calling providers).
|
||||
/// Used to correlate tool_use/tool_result blocks in the API.
|
||||
#[serde(default)]
|
||||
pub id: String,
|
||||
}
|
||||
|
||||
/// Cumulative cache statistics for prompt caching efficacy tracking.
|
||||
@@ -1379,6 +1383,22 @@ impl<W: UiWriter> Agent<W> {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check structured tool_calls first (native tool calling)
|
||||
if !msg.tool_calls.is_empty() {
|
||||
if let Some(last_tc) = msg.tool_calls.last() {
|
||||
let prev = ToolCall {
|
||||
tool: last_tc.name.clone(),
|
||||
args: last_tc.input.clone(),
|
||||
id: last_tc.id.clone(),
|
||||
};
|
||||
if streaming::are_tool_calls_duplicate(&prev, tool_call) {
|
||||
return Some("DUP IN MSG".to_string());
|
||||
}
|
||||
}
|
||||
// Only check the most recent assistant message
|
||||
break;
|
||||
}
|
||||
|
||||
let content = &msg.content;
|
||||
|
||||
// Look for the last occurrence of a tool call pattern
|
||||
@@ -2001,6 +2021,8 @@ Skip if nothing new. Be brief."#;
|
||||
content: content.to_string(),
|
||||
kind: g3_providers::MessageKind::Regular,
|
||||
cache_control: None,
|
||||
tool_calls: Vec::new(),
|
||||
tool_result_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2028,6 +2050,8 @@ Skip if nothing new. Be brief."#;
|
||||
content: format!("[Session Resumed]\n\n{}", context_msg),
|
||||
kind: g3_providers::MessageKind::Regular,
|
||||
cache_control: None,
|
||||
tool_calls: Vec::new(),
|
||||
tool_result_id: None,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2503,25 +2527,29 @@ Skip if nothing new. Be brief."#;
|
||||
|
||||
// Add the tool call and result to the context window using RAW unfiltered content
|
||||
// This ensures the log file contains the true raw content including JSON tool calls
|
||||
let tool_message = if !raw_content_for_log.trim().is_empty() {
|
||||
Message::new(
|
||||
let tool_message = {
|
||||
let text_content = raw_content_for_log.trim().to_string();
|
||||
let mut msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
format!(
|
||||
"{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
|
||||
raw_content_for_log.trim(),
|
||||
tool_call.tool,
|
||||
tool_call.args
|
||||
),
|
||||
)
|
||||
} else {
|
||||
// No text content before tool call, just include the tool call
|
||||
Message::new(
|
||||
MessageRole::Assistant,
|
||||
format!(
|
||||
"{{\"tool\": \"{}\", \"args\": {}}}",
|
||||
tool_call.tool, tool_call.args
|
||||
),
|
||||
)
|
||||
text_content,
|
||||
);
|
||||
// Store the tool call structurally so that providers can
|
||||
// emit proper tool_use blocks (e.g. Anthropic API) instead
|
||||
// of inline JSON text that confuses the model.
|
||||
msg.tool_calls.push(g3_providers::MessageToolCall {
|
||||
id: if tool_call.id.is_empty() {
|
||||
// Safety net: generate an ID if none was provided.
|
||||
// Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
static FALLBACK_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
format!("tool_{}", FALLBACK_COUNTER.fetch_add(1, Ordering::SeqCst))
|
||||
} else {
|
||||
tool_call.id.clone()
|
||||
},
|
||||
name: tool_call.tool.clone(),
|
||||
input: tool_call.args.clone(),
|
||||
});
|
||||
msg
|
||||
};
|
||||
let mut result_message = {
|
||||
let content = format!("Tool result: {}", tool_result);
|
||||
@@ -2548,6 +2576,10 @@ Skip if nothing new. Be brief."#;
|
||||
}
|
||||
};
|
||||
|
||||
// Link the tool result to the tool_use ID so providers can
|
||||
// emit proper tool_result blocks (e.g. Anthropic API).
|
||||
result_message.tool_result_id = Some(tool_call.id.clone());
|
||||
|
||||
// Attach any pending images to the result message
|
||||
// (images loaded via read_image tool)
|
||||
if !self.pending_images.is_empty() {
|
||||
|
||||
@@ -716,8 +716,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_deduplicate_tool_calls_no_duplicates() {
|
||||
let tools = vec![
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
|
||||
ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}) },
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
|
||||
ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}), id: String::new() },
|
||||
];
|
||||
|
||||
let result = deduplicate_tool_calls(tools, |_| None);
|
||||
@@ -730,8 +730,8 @@ mod tests {
|
||||
#[test]
|
||||
fn test_deduplicate_tool_calls_sequential_duplicate() {
|
||||
let tools = vec![
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
|
||||
];
|
||||
|
||||
let result = deduplicate_tool_calls(tools, |_| None);
|
||||
@@ -744,7 +744,7 @@ mod tests {
|
||||
#[test]
|
||||
fn test_deduplicate_tool_calls_previous_message_duplicate() {
|
||||
let tools = vec![
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
|
||||
ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
|
||||
];
|
||||
|
||||
// Simulate finding a duplicate in previous message
|
||||
|
||||
@@ -336,6 +336,7 @@ impl StreamingToolParser {
|
||||
completed_tools.push(ToolCall {
|
||||
tool: tool_call.tool.clone(),
|
||||
args: tool_call.args.clone(),
|
||||
id: tool_call.id.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -466,13 +467,21 @@ impl StreamingToolParser {
|
||||
}
|
||||
|
||||
fn try_parse_tool_call_json(&self, json_str: &str) -> Option<ToolCall> {
|
||||
let tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
|
||||
let mut tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
|
||||
let args_obj = tool_call.args.as_object()?;
|
||||
|
||||
if args_contain_prose_fragments(args_obj) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Generate an ID if not provided (JSON fallback tool calls don't have IDs,
|
||||
// but the Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$)
|
||||
if tool_call.id.is_empty() {
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
static COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||
tool_call.id = format!("json_tool_{}", COUNTER.fetch_add(1, Ordering::SeqCst));
|
||||
}
|
||||
|
||||
Some(tool_call)
|
||||
}
|
||||
|
||||
|
||||
@@ -181,6 +181,7 @@ mod tests {
|
||||
let tool_call = ToolCall {
|
||||
tool: "rehydrate".to_string(),
|
||||
args: json!({}),
|
||||
id: String::new(),
|
||||
};
|
||||
|
||||
let result = execute_rehydrate(&tool_call, &mut ctx).await;
|
||||
@@ -213,6 +214,7 @@ mod tests {
|
||||
let tool_call = ToolCall {
|
||||
tool: "rehydrate".to_string(),
|
||||
args: json!({"fragment_id": "test-fragment"}),
|
||||
id: String::new(),
|
||||
};
|
||||
|
||||
let result = execute_rehydrate(&tool_call, &mut ctx).await;
|
||||
@@ -245,6 +247,7 @@ mod tests {
|
||||
let tool_call = ToolCall {
|
||||
tool: "rehydrate".to_string(),
|
||||
args: json!({"fragment_id": "nonexistent-fragment"}),
|
||||
id: String::new(),
|
||||
};
|
||||
|
||||
let result = execute_rehydrate(&tool_call, &mut ctx).await;
|
||||
|
||||
163
crates/g3-core/tests/abrupt_stop_bug_test.rs
Normal file
163
crates/g3-core/tests/abrupt_stop_bug_test.rs
Normal file
@@ -0,0 +1,163 @@
|
||||
//! Tests for the abrupt stop bug where the agent returns control to the user
|
||||
//! mid-task because tool calls are stored as text in the Message struct and
|
||||
//! sent back to the Anthropic API as plain text instead of structured
|
||||
//! tool_use/tool_result blocks.
|
||||
//!
|
||||
//! Root cause: Message struct has no tool_calls field. Native tool calls are
|
||||
//! stored as inline JSON text. convert_messages() sends them as plain text,
|
||||
//! not tool_use/tool_result blocks. The model sees its previous tool
|
||||
//! interactions as text it wrote, not as actual tool invocations, and
|
||||
//! occasionally emits text describing what it wants to do instead of
|
||||
//! invoking the tool mechanism.
|
||||
|
||||
use g3_providers::{Message, MessageRole};
|
||||
|
||||
/// Demonstrates the bug: tool calls stored as inline JSON text in assistant
|
||||
/// messages are indistinguishable from regular text when sent back to the API.
|
||||
///
|
||||
/// In the real bug, the model sees:
|
||||
/// Assistant: "Let me check.\n\n{\"tool\": \"shell\", \"args\": {...}}"
|
||||
/// User: "Tool result: ..."
|
||||
///
|
||||
/// Instead of the proper Anthropic format:
|
||||
/// Assistant: [{type: "text", text: "Let me check."}, {type: "tool_use", id: "...", name: "shell", input: {...}}]
|
||||
/// User: [{type: "tool_result", tool_use_id: "...", content: "..."}]
|
||||
#[test]
|
||||
fn test_tool_calls_stored_as_text_lack_structure() {
|
||||
// This is how tool calls are currently stored (the bug)
|
||||
let assistant_msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
"Let me check that file.\n\n{\"tool\": \"shell\", \"args\": {\"command\": \"ls\"}}".to_string(),
|
||||
);
|
||||
|
||||
// The message has no structured tool call information
|
||||
assert!(
|
||||
assistant_msg.tool_calls.is_empty(),
|
||||
"Message should now support structured tool_calls field"
|
||||
);
|
||||
}
|
||||
|
||||
/// Verifies that Message struct supports structured tool calls.
|
||||
/// After the fix, tool calls should be stored structurally.
|
||||
#[test]
|
||||
fn test_message_supports_structured_tool_calls() {
|
||||
use g3_providers::MessageToolCall;
|
||||
|
||||
let mut msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
"Let me check that file.".to_string(),
|
||||
);
|
||||
|
||||
msg.tool_calls.push(MessageToolCall {
|
||||
id: "toolu_123".to_string(),
|
||||
name: "shell".to_string(),
|
||||
input: serde_json::json!({"command": "ls"}),
|
||||
});
|
||||
|
||||
assert_eq!(msg.tool_calls.len(), 1);
|
||||
assert_eq!(msg.tool_calls[0].name, "shell");
|
||||
assert_eq!(msg.tool_calls[0].id, "toolu_123");
|
||||
}
|
||||
|
||||
/// Verifies that Message struct supports tool_result for user messages.
|
||||
/// After the fix, tool results should reference the tool_use_id.
|
||||
#[test]
|
||||
fn test_message_supports_tool_result() {
|
||||
let mut msg = Message::new(
|
||||
MessageRole::User,
|
||||
"file1.txt\nfile2.txt".to_string(),
|
||||
);
|
||||
|
||||
msg.tool_result_id = Some("toolu_123".to_string());
|
||||
|
||||
assert_eq!(msg.tool_result_id.as_deref(), Some("toolu_123"));
|
||||
}
|
||||
|
||||
/// Integration test: simulates the exact bug scenario from the h3 session.
|
||||
/// After several tool call iterations, the model stops mid-thought.
|
||||
///
|
||||
/// The fix ensures that when messages are sent back to the API, tool calls
|
||||
/// are properly structured so the model maintains its tool-calling context.
|
||||
#[test]
|
||||
fn test_tool_call_roundtrip_preserves_structure() {
|
||||
use g3_providers::MessageToolCall;
|
||||
|
||||
// Simulate a multi-turn tool-calling conversation
|
||||
let messages = vec![
|
||||
// System prompt
|
||||
Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
|
||||
// User asks something
|
||||
Message::new(MessageRole::User, "Check the files".to_string()),
|
||||
// Assistant uses a tool (properly structured)
|
||||
{
|
||||
let mut msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
"Let me check the files.".to_string(),
|
||||
);
|
||||
msg.tool_calls.push(MessageToolCall {
|
||||
id: "toolu_001".to_string(),
|
||||
name: "shell".to_string(),
|
||||
input: serde_json::json!({"command": "ls"}),
|
||||
});
|
||||
msg
|
||||
},
|
||||
// Tool result (properly structured)
|
||||
{
|
||||
let mut msg = Message::new(
|
||||
MessageRole::User,
|
||||
"file1.txt\nfile2.txt".to_string(),
|
||||
);
|
||||
msg.tool_result_id = Some("toolu_001".to_string());
|
||||
msg
|
||||
},
|
||||
// Assistant uses another tool
|
||||
{
|
||||
let mut msg = Message::new(
|
||||
MessageRole::Assistant,
|
||||
"Let me read file1.txt.".to_string(),
|
||||
);
|
||||
msg.tool_calls.push(MessageToolCall {
|
||||
id: "toolu_002".to_string(),
|
||||
name: "read_file".to_string(),
|
||||
input: serde_json::json!({"file_path": "file1.txt"}),
|
||||
});
|
||||
msg
|
||||
},
|
||||
// Tool result
|
||||
{
|
||||
let mut msg = Message::new(
|
||||
MessageRole::User,
|
||||
"Contents of file1.txt".to_string(),
|
||||
);
|
||||
msg.tool_result_id = Some("toolu_002".to_string());
|
||||
msg
|
||||
},
|
||||
];
|
||||
|
||||
// Verify all tool calls have IDs
|
||||
for msg in &messages {
|
||||
for tc in &msg.tool_calls {
|
||||
assert!(!tc.id.is_empty(), "Tool call should have an ID");
|
||||
}
|
||||
// Verify tool results reference a tool_use_id
|
||||
if msg.tool_result_id.is_some() {
|
||||
assert!(
|
||||
matches!(msg.role, MessageRole::User),
|
||||
"Tool results should be user messages"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify assistant messages with tool calls still have text content
|
||||
let assistant_with_tools: Vec<_> = messages
|
||||
.iter()
|
||||
.filter(|m| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty())
|
||||
.collect();
|
||||
assert_eq!(assistant_with_tools.len(), 2);
|
||||
for msg in assistant_with_tools {
|
||||
assert!(
|
||||
!msg.content.is_empty(),
|
||||
"Assistant messages should have text content alongside tool calls"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -122,6 +122,7 @@ mod duplicate_detection {
|
||||
ToolCall {
|
||||
tool: tool.to_string(),
|
||||
args,
|
||||
id: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -328,6 +328,7 @@ mod duplicate_detection_characterization {
|
||||
ToolCall {
|
||||
tool: tool.to_string(),
|
||||
args: serde_json::from_str(args).unwrap(),
|
||||
id: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -525,6 +526,7 @@ mod tool_execution_integration {
|
||||
// Execute a tool directly
|
||||
let tool_call = ToolCall {
|
||||
tool: "read_file".to_string(),
|
||||
id: String::new(),
|
||||
args: serde_json::json!({ "file_path": test_file.to_string_lossy() }),
|
||||
};
|
||||
|
||||
@@ -547,6 +549,7 @@ mod tool_execution_integration {
|
||||
|
||||
let tool_call = ToolCall {
|
||||
tool: "shell".to_string(),
|
||||
id: String::new(),
|
||||
args: serde_json::json!({ "command": "echo 'test output'" }),
|
||||
};
|
||||
|
||||
@@ -570,6 +573,7 @@ mod tool_execution_integration {
|
||||
|
||||
let tool_call = ToolCall {
|
||||
tool: "write_file".to_string(),
|
||||
id: String::new(),
|
||||
args: serde_json::json!({
|
||||
"file_path": new_file.to_string_lossy(),
|
||||
"content": "New content"
|
||||
@@ -602,6 +606,7 @@ mod tool_execution_integration {
|
||||
// Write Plan
|
||||
let write_call = ToolCall {
|
||||
tool: "plan_write".to_string(),
|
||||
id: String::new(),
|
||||
args: serde_json::json!({
|
||||
"plan": r#"plan_id: test-plan
|
||||
revision: 1
|
||||
@@ -634,6 +639,7 @@ items:
|
||||
// Read Plan
|
||||
let read_call = ToolCall {
|
||||
tool: "plan_read".to_string(),
|
||||
id: String::new(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
let read_result = agent.execute_tool(&read_call).await.unwrap();
|
||||
|
||||
@@ -37,6 +37,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
|
||||
ToolCall {
|
||||
tool: tool.to_string(),
|
||||
args,
|
||||
id: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
|
||||
ToolCall {
|
||||
tool: tool.to_string(),
|
||||
args,
|
||||
id: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user