From d3f0112f4699e38c89166dbbb21a5b1c7232ba85 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Wed, 11 Feb 2026 08:48:07 +1100 Subject: [PATCH] fix: store tool calls structurally for proper API roundtripping The agent would stop mid-task because native tool calls were stored as inline JSON text in Message.content. When sent back to the Anthropic API via convert_messages(), they went as plain text instead of structured tool_use/tool_result blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism. Changes: - Add MessageToolCall struct and tool_calls/tool_result_id fields to Message - Add id field to core ToolCall struct to preserve provider tool call IDs - Update Anthropic convert_messages() to emit tool_use and tool_result blocks - Add ToolResult variant to AnthropicContent enum - Store tool calls structurally in tool message construction (not inline JSON) - Fix add_message() to preserve empty-content messages with tool_calls - Fix check_duplicate_in_previous_message() to check structured tool_calls - Generate valid IDs for JSON fallback tool calls (Anthropic pattern requirement) - Update planner create_tool_message() to use structured tool calls --- analysis/memory.md | 17 +- crates/g3-core/src/context_window.rs | 3 +- crates/g3-core/src/lib.rs | 68 ++++++-- crates/g3-core/src/streaming.rs | 10 +- crates/g3-core/src/streaming_parser.rs | 11 +- crates/g3-core/src/tools/acd.rs | 3 + crates/g3-core/tests/abrupt_stop_bug_test.rs | 163 ++++++++++++++++++ .../tests/end_of_turn_behavior_test.rs | 1 + ...stream_completion_characterization_test.rs | 6 + .../tests/tool_execution_roundtrip_test.rs | 1 + crates/g3-core/tests/tool_execution_test.rs | 1 + crates/g3-planner/src/lib.rs | 24 +-- crates/g3-planner/tests/planner_test.rs | 7 +- crates/g3-providers/src/anthropic.rs | 67 +++++-- crates/g3-providers/src/lib.rs | 26 +++ 15 files changed, 355 insertions(+), 53 deletions(-) create mode 100644 crates/g3-core/tests/abrupt_stop_bug_test.rs diff --git a/analysis/memory.md b/analysis/memory.md index f63319f..52bddaf 100644 --- a/analysis/memory.md +++ b/analysis/memory.md @@ -1,5 +1,5 @@ # Workspace Memory -> Updated: 2026-02-07T05:28:12Z | Size: 26.3k chars +> Updated: 2026-02-10T21:08:38Z | Size: 27.8k chars ### Remember Tool Wiring - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()` @@ -429,4 +429,17 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r - **Bug**: The `_ =>` catch-all in when condition evaluation did naive string `contains` check. For `Matches` (regex like `^Re: `), it checked if fact values literally contained the regex pattern string — which never matched. Result: when conditions with `matches` rule always evaluated as not-met → vacuous pass → violations slipped through. - **Fix**: Replaced hand-rolled when evaluation with synthetic `CompiledPredicate` delegation to `evaluate_predicate_datalog()`, which handles all 12 rule types correctly. - **Tests**: `test_execute_rules_when_matches_condition_met`, `test_execute_rules_when_matches_condition_met_but_predicate_fails`, `test_execute_rules_when_matches_condition_not_met` -- **Note**: The `invariants.rs` path was NOT affected — it already delegated to `evaluate_predicate()` which handles all rules. \ No newline at end of file +- **Note**: The `invariants.rs` path was NOT affected — it already delegated to `evaluate_predicate()` which handles all rules. + +### Structured Tool Call Messages (2026-02-11) +- `crates/g3-providers/src/lib.rs` [102..106] - `MessageToolCall` struct (id, name, input) +- `crates/g3-providers/src/lib.rs` [124..131] - `Message.tool_calls: Vec`, `Message.tool_result_id: Option` +- `crates/g3-providers/src/anthropic.rs` [284..340] - `convert_messages()` emits `tool_use` blocks for assistant messages with `tool_calls`, `tool_result` blocks for user messages with `tool_result_id` +- `crates/g3-providers/src/anthropic.rs` [935..941] - `AnthropicContent::ToolResult` variant added +- `crates/g3-core/src/lib.rs` [82..88] - `ToolCall.id` field added (from native providers) +- `crates/g3-core/src/lib.rs` [2530..2545] - Tool messages store structured `tool_calls` instead of inline JSON text +- `crates/g3-core/src/lib.rs` [1385..1400] - `check_duplicate_in_previous_message()` checks structured `tool_calls` field +- `crates/g3-core/src/context_window.rs` [107..109] - `add_message_with_tokens()` preserves messages with `tool_calls` even if content is empty +- `crates/g3-core/src/streaming_parser.rs` [339] - `process_chunk()` preserves tool call `id` from provider + +**Bug fixed**: Agent would stop mid-task because native tool calls were stored as inline JSON text in `Message.content`. When sent back to Anthropic API via `convert_messages()`, they went as plain text instead of structured `tool_use`/`tool_result` blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism. \ No newline at end of file diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs index 367d065..814a90d 100644 --- a/crates/g3-core/src/context_window.rs +++ b/crates/g3-core/src/context_window.rs @@ -103,7 +103,8 @@ impl ContextWindow { /// Add a message with optional token count from the provider pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option) { - if message.content.trim().is_empty() { + // Skip truly empty messages, but keep messages that have structured tool calls or tool results + if message.content.trim().is_empty() && message.tool_calls.is_empty() && message.tool_result_id.is_none() { warn!("Skipping empty message to avoid API error"); return; } diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 834780f..e5457f8 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -82,6 +82,10 @@ pub use paths::{ pub struct ToolCall { pub tool: String, pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments + /// Unique ID for this tool call (from native tool calling providers). + /// Used to correlate tool_use/tool_result blocks in the API. + #[serde(default)] + pub id: String, } /// Cumulative cache statistics for prompt caching efficacy tracking. @@ -1379,6 +1383,22 @@ impl Agent { continue; } + // Check structured tool_calls first (native tool calling) + if !msg.tool_calls.is_empty() { + if let Some(last_tc) = msg.tool_calls.last() { + let prev = ToolCall { + tool: last_tc.name.clone(), + args: last_tc.input.clone(), + id: last_tc.id.clone(), + }; + if streaming::are_tool_calls_duplicate(&prev, tool_call) { + return Some("DUP IN MSG".to_string()); + } + } + // Only check the most recent assistant message + break; + } + let content = &msg.content; // Look for the last occurrence of a tool call pattern @@ -2001,6 +2021,8 @@ Skip if nothing new. Be brief."#; content: content.to_string(), kind: g3_providers::MessageKind::Regular, cache_control: None, + tool_calls: Vec::new(), + tool_result_id: None, }); } @@ -2028,6 +2050,8 @@ Skip if nothing new. Be brief."#; content: format!("[Session Resumed]\n\n{}", context_msg), kind: g3_providers::MessageKind::Regular, cache_control: None, + tool_calls: Vec::new(), + tool_result_id: None, }); } @@ -2503,25 +2527,29 @@ Skip if nothing new. Be brief."#; // Add the tool call and result to the context window using RAW unfiltered content // This ensures the log file contains the true raw content including JSON tool calls - let tool_message = if !raw_content_for_log.trim().is_empty() { - Message::new( + let tool_message = { + let text_content = raw_content_for_log.trim().to_string(); + let mut msg = Message::new( MessageRole::Assistant, - format!( - "{}\n\n{{\"tool\": \"{}\", \"args\": {}}}", - raw_content_for_log.trim(), - tool_call.tool, - tool_call.args - ), - ) - } else { - // No text content before tool call, just include the tool call - Message::new( - MessageRole::Assistant, - format!( - "{{\"tool\": \"{}\", \"args\": {}}}", - tool_call.tool, tool_call.args - ), - ) + text_content, + ); + // Store the tool call structurally so that providers can + // emit proper tool_use blocks (e.g. Anthropic API) instead + // of inline JSON text that confuses the model. + msg.tool_calls.push(g3_providers::MessageToolCall { + id: if tool_call.id.is_empty() { + // Safety net: generate an ID if none was provided. + // Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$ + use std::sync::atomic::{AtomicU64, Ordering}; + static FALLBACK_COUNTER: AtomicU64 = AtomicU64::new(0); + format!("tool_{}", FALLBACK_COUNTER.fetch_add(1, Ordering::SeqCst)) + } else { + tool_call.id.clone() + }, + name: tool_call.tool.clone(), + input: tool_call.args.clone(), + }); + msg }; let mut result_message = { let content = format!("Tool result: {}", tool_result); @@ -2548,6 +2576,10 @@ Skip if nothing new. Be brief."#; } }; + // Link the tool result to the tool_use ID so providers can + // emit proper tool_result blocks (e.g. Anthropic API). + result_message.tool_result_id = Some(tool_call.id.clone()); + // Attach any pending images to the result message // (images loaded via read_image tool) if !self.pending_images.is_empty() { diff --git a/crates/g3-core/src/streaming.rs b/crates/g3-core/src/streaming.rs index 0d9b007..6984df2 100644 --- a/crates/g3-core/src/streaming.rs +++ b/crates/g3-core/src/streaming.rs @@ -716,8 +716,8 @@ mod tests { #[test] fn test_deduplicate_tool_calls_no_duplicates() { let tools = vec![ - ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) }, - ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}) }, + ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() }, + ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}), id: String::new() }, ]; let result = deduplicate_tool_calls(tools, |_| None); @@ -730,8 +730,8 @@ mod tests { #[test] fn test_deduplicate_tool_calls_sequential_duplicate() { let tools = vec![ - ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) }, - ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) }, + ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() }, + ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() }, ]; let result = deduplicate_tool_calls(tools, |_| None); @@ -744,7 +744,7 @@ mod tests { #[test] fn test_deduplicate_tool_calls_previous_message_duplicate() { let tools = vec![ - ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) }, + ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() }, ]; // Simulate finding a duplicate in previous message diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index fc3f3ec..9c9f297 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -336,6 +336,7 @@ impl StreamingToolParser { completed_tools.push(ToolCall { tool: tool_call.tool.clone(), args: tool_call.args.clone(), + id: tool_call.id.clone(), }); } } @@ -466,13 +467,21 @@ impl StreamingToolParser { } fn try_parse_tool_call_json(&self, json_str: &str) -> Option { - let tool_call: ToolCall = serde_json::from_str(json_str).ok()?; + let mut tool_call: ToolCall = serde_json::from_str(json_str).ok()?; let args_obj = tool_call.args.as_object()?; if args_contain_prose_fragments(args_obj) { return None; } + // Generate an ID if not provided (JSON fallback tool calls don't have IDs, + // but the Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$) + if tool_call.id.is_empty() { + use std::sync::atomic::{AtomicU64, Ordering}; + static COUNTER: AtomicU64 = AtomicU64::new(0); + tool_call.id = format!("json_tool_{}", COUNTER.fetch_add(1, Ordering::SeqCst)); + } + Some(tool_call) } diff --git a/crates/g3-core/src/tools/acd.rs b/crates/g3-core/src/tools/acd.rs index 8101738..07cec5a 100644 --- a/crates/g3-core/src/tools/acd.rs +++ b/crates/g3-core/src/tools/acd.rs @@ -181,6 +181,7 @@ mod tests { let tool_call = ToolCall { tool: "rehydrate".to_string(), args: json!({}), + id: String::new(), }; let result = execute_rehydrate(&tool_call, &mut ctx).await; @@ -213,6 +214,7 @@ mod tests { let tool_call = ToolCall { tool: "rehydrate".to_string(), args: json!({"fragment_id": "test-fragment"}), + id: String::new(), }; let result = execute_rehydrate(&tool_call, &mut ctx).await; @@ -245,6 +247,7 @@ mod tests { let tool_call = ToolCall { tool: "rehydrate".to_string(), args: json!({"fragment_id": "nonexistent-fragment"}), + id: String::new(), }; let result = execute_rehydrate(&tool_call, &mut ctx).await; diff --git a/crates/g3-core/tests/abrupt_stop_bug_test.rs b/crates/g3-core/tests/abrupt_stop_bug_test.rs new file mode 100644 index 0000000..5269580 --- /dev/null +++ b/crates/g3-core/tests/abrupt_stop_bug_test.rs @@ -0,0 +1,163 @@ +//! Tests for the abrupt stop bug where the agent returns control to the user +//! mid-task because tool calls are stored as text in the Message struct and +//! sent back to the Anthropic API as plain text instead of structured +//! tool_use/tool_result blocks. +//! +//! Root cause: Message struct has no tool_calls field. Native tool calls are +//! stored as inline JSON text. convert_messages() sends them as plain text, +//! not tool_use/tool_result blocks. The model sees its previous tool +//! interactions as text it wrote, not as actual tool invocations, and +//! occasionally emits text describing what it wants to do instead of +//! invoking the tool mechanism. + +use g3_providers::{Message, MessageRole}; + +/// Demonstrates the bug: tool calls stored as inline JSON text in assistant +/// messages are indistinguishable from regular text when sent back to the API. +/// +/// In the real bug, the model sees: +/// Assistant: "Let me check.\n\n{\"tool\": \"shell\", \"args\": {...}}" +/// User: "Tool result: ..." +/// +/// Instead of the proper Anthropic format: +/// Assistant: [{type: "text", text: "Let me check."}, {type: "tool_use", id: "...", name: "shell", input: {...}}] +/// User: [{type: "tool_result", tool_use_id: "...", content: "..."}] +#[test] +fn test_tool_calls_stored_as_text_lack_structure() { + // This is how tool calls are currently stored (the bug) + let assistant_msg = Message::new( + MessageRole::Assistant, + "Let me check that file.\n\n{\"tool\": \"shell\", \"args\": {\"command\": \"ls\"}}".to_string(), + ); + + // The message has no structured tool call information + assert!( + assistant_msg.tool_calls.is_empty(), + "Message should now support structured tool_calls field" + ); +} + +/// Verifies that Message struct supports structured tool calls. +/// After the fix, tool calls should be stored structurally. +#[test] +fn test_message_supports_structured_tool_calls() { + use g3_providers::MessageToolCall; + + let mut msg = Message::new( + MessageRole::Assistant, + "Let me check that file.".to_string(), + ); + + msg.tool_calls.push(MessageToolCall { + id: "toolu_123".to_string(), + name: "shell".to_string(), + input: serde_json::json!({"command": "ls"}), + }); + + assert_eq!(msg.tool_calls.len(), 1); + assert_eq!(msg.tool_calls[0].name, "shell"); + assert_eq!(msg.tool_calls[0].id, "toolu_123"); +} + +/// Verifies that Message struct supports tool_result for user messages. +/// After the fix, tool results should reference the tool_use_id. +#[test] +fn test_message_supports_tool_result() { + let mut msg = Message::new( + MessageRole::User, + "file1.txt\nfile2.txt".to_string(), + ); + + msg.tool_result_id = Some("toolu_123".to_string()); + + assert_eq!(msg.tool_result_id.as_deref(), Some("toolu_123")); +} + +/// Integration test: simulates the exact bug scenario from the h3 session. +/// After several tool call iterations, the model stops mid-thought. +/// +/// The fix ensures that when messages are sent back to the API, tool calls +/// are properly structured so the model maintains its tool-calling context. +#[test] +fn test_tool_call_roundtrip_preserves_structure() { + use g3_providers::MessageToolCall; + + // Simulate a multi-turn tool-calling conversation + let messages = vec![ + // System prompt + Message::new(MessageRole::System, "You are a helpful assistant.".to_string()), + // User asks something + Message::new(MessageRole::User, "Check the files".to_string()), + // Assistant uses a tool (properly structured) + { + let mut msg = Message::new( + MessageRole::Assistant, + "Let me check the files.".to_string(), + ); + msg.tool_calls.push(MessageToolCall { + id: "toolu_001".to_string(), + name: "shell".to_string(), + input: serde_json::json!({"command": "ls"}), + }); + msg + }, + // Tool result (properly structured) + { + let mut msg = Message::new( + MessageRole::User, + "file1.txt\nfile2.txt".to_string(), + ); + msg.tool_result_id = Some("toolu_001".to_string()); + msg + }, + // Assistant uses another tool + { + let mut msg = Message::new( + MessageRole::Assistant, + "Let me read file1.txt.".to_string(), + ); + msg.tool_calls.push(MessageToolCall { + id: "toolu_002".to_string(), + name: "read_file".to_string(), + input: serde_json::json!({"file_path": "file1.txt"}), + }); + msg + }, + // Tool result + { + let mut msg = Message::new( + MessageRole::User, + "Contents of file1.txt".to_string(), + ); + msg.tool_result_id = Some("toolu_002".to_string()); + msg + }, + ]; + + // Verify all tool calls have IDs + for msg in &messages { + for tc in &msg.tool_calls { + assert!(!tc.id.is_empty(), "Tool call should have an ID"); + } + // Verify tool results reference a tool_use_id + if msg.tool_result_id.is_some() { + assert!( + matches!(msg.role, MessageRole::User), + "Tool results should be user messages" + ); + } + } + + // Verify assistant messages with tool calls still have text content + let assistant_with_tools: Vec<_> = messages + .iter() + .filter(|m| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty()) + .collect(); + assert_eq!(assistant_with_tools.len(), 2); + for msg in assistant_with_tools { + assert!( + !msg.content.is_empty(), + "Assistant messages should have text content alongside tool calls" + ); + } +} diff --git a/crates/g3-core/tests/end_of_turn_behavior_test.rs b/crates/g3-core/tests/end_of_turn_behavior_test.rs index 938aa90..d50338f 100644 --- a/crates/g3-core/tests/end_of_turn_behavior_test.rs +++ b/crates/g3-core/tests/end_of_turn_behavior_test.rs @@ -122,6 +122,7 @@ mod duplicate_detection { ToolCall { tool: tool.to_string(), args, + id: String::new(), } } diff --git a/crates/g3-core/tests/stream_completion_characterization_test.rs b/crates/g3-core/tests/stream_completion_characterization_test.rs index 8d50838..8fc7699 100644 --- a/crates/g3-core/tests/stream_completion_characterization_test.rs +++ b/crates/g3-core/tests/stream_completion_characterization_test.rs @@ -328,6 +328,7 @@ mod duplicate_detection_characterization { ToolCall { tool: tool.to_string(), args: serde_json::from_str(args).unwrap(), + id: String::new(), } } @@ -525,6 +526,7 @@ mod tool_execution_integration { // Execute a tool directly let tool_call = ToolCall { tool: "read_file".to_string(), + id: String::new(), args: serde_json::json!({ "file_path": test_file.to_string_lossy() }), }; @@ -547,6 +549,7 @@ mod tool_execution_integration { let tool_call = ToolCall { tool: "shell".to_string(), + id: String::new(), args: serde_json::json!({ "command": "echo 'test output'" }), }; @@ -570,6 +573,7 @@ mod tool_execution_integration { let tool_call = ToolCall { tool: "write_file".to_string(), + id: String::new(), args: serde_json::json!({ "file_path": new_file.to_string_lossy(), "content": "New content" @@ -602,6 +606,7 @@ mod tool_execution_integration { // Write Plan let write_call = ToolCall { tool: "plan_write".to_string(), + id: String::new(), args: serde_json::json!({ "plan": r#"plan_id: test-plan revision: 1 @@ -634,6 +639,7 @@ items: // Read Plan let read_call = ToolCall { tool: "plan_read".to_string(), + id: String::new(), args: serde_json::json!({}), }; let read_result = agent.execute_tool(&read_call).await.unwrap(); diff --git a/crates/g3-core/tests/tool_execution_roundtrip_test.rs b/crates/g3-core/tests/tool_execution_roundtrip_test.rs index 935f1b5..b6488f1 100644 --- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs +++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs @@ -37,6 +37,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall { ToolCall { tool: tool.to_string(), args, + id: String::new(), } } diff --git a/crates/g3-core/tests/tool_execution_test.rs b/crates/g3-core/tests/tool_execution_test.rs index d32be3d..871f850 100644 --- a/crates/g3-core/tests/tool_execution_test.rs +++ b/crates/g3-core/tests/tool_execution_test.rs @@ -29,6 +29,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall { ToolCall { tool: tool.to_string(), args, + id: String::new(), } } diff --git a/crates/g3-planner/src/lib.rs b/crates/g3-planner/src/lib.rs index d3d9042..d05a157 100644 --- a/crates/g3-planner/src/lib.rs +++ b/crates/g3-planner/src/lib.rs @@ -126,14 +126,16 @@ pub async fn get_initial_discovery_messages( /// Creates an Assistant message with a tool call in g3's JSON format. pub fn create_tool_message(tool: &str, command: &str) -> Message { - let tool_call = serde_json::json!({ - "tool": tool, - "args": { - "command": command - } + let mut msg = Message::new( + MessageRole::Assistant, + String::new(), + ); + msg.tool_calls.push(g3_providers::MessageToolCall { + id: format!("plan_{}", msg.id), + name: tool.to_string(), + input: serde_json::json!({ "command": command }), }); - - Message::new(MessageRole::Assistant, tool_call.to_string()) + msg } /// Extract shell commands from the LLM response. @@ -269,10 +271,10 @@ mod tests { let msg = create_tool_message("shell", "ls -la"); assert!(matches!(msg.role, MessageRole::Assistant)); - - let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap(); - assert_eq!(parsed["tool"], "shell"); - assert_eq!(parsed["args"]["command"], "ls -la"); + assert_eq!(msg.tool_calls.len(), 1); + assert_eq!(msg.tool_calls[0].name, "shell"); + assert_eq!(msg.tool_calls[0].input["command"], "ls -la"); + assert!(!msg.tool_calls[0].id.is_empty()); } #[test] diff --git a/crates/g3-planner/tests/planner_test.rs b/crates/g3-planner/tests/planner_test.rs index be285ab..4d56de2 100644 --- a/crates/g3-planner/tests/planner_test.rs +++ b/crates/g3-planner/tests/planner_test.rs @@ -9,9 +9,10 @@ fn test_create_tool_message_format() { assert!(matches!(msg.role, MessageRole::Assistant)); - let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap(); - assert_eq!(parsed["tool"], "shell"); - assert_eq!(parsed["args"]["command"], "ls -la"); + assert_eq!(msg.tool_calls.len(), 1); + assert_eq!(msg.tool_calls[0].name, "shell"); + assert_eq!(msg.tool_calls[0].input["command"], "ls -la"); + assert!(!msg.tool_calls[0].id.is_empty()); } #[test] diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs index 780cbfe..ab97396 100644 --- a/crates/g3-providers/src/anthropic.rs +++ b/crates/g3-providers/src/anthropic.rs @@ -295,14 +295,26 @@ impl AnthropicProvider { }); } - // Add text content - content_blocks.push(AnthropicContent::Text { - text: message.content.clone(), - cache_control: message - .cache_control - .as_ref() - .map(Self::convert_cache_control), - }); + // Check if this is a tool result message + if let Some(ref tool_use_id) = message.tool_result_id { + content_blocks.push(AnthropicContent::ToolResult { + tool_use_id: tool_use_id.clone(), + content: message.content.clone(), + cache_control: message + .cache_control + .as_ref() + .map(Self::convert_cache_control), + }); + } else { + // Regular text content + content_blocks.push(AnthropicContent::Text { + text: message.content.clone(), + cache_control: message + .cache_control + .as_ref() + .map(Self::convert_cache_control), + }); + } anthropic_messages.push(AnthropicMessage { role: "user".to_string(), @@ -310,15 +322,39 @@ impl AnthropicProvider { }); } MessageRole::Assistant => { - anthropic_messages.push(AnthropicMessage { - role: "assistant".to_string(), - content: vec![AnthropicContent::Text { + let mut content_blocks: Vec = Vec::new(); + + // Add text content if non-empty + if !message.content.trim().is_empty() { + content_blocks.push(AnthropicContent::Text { text: message.content.clone(), cache_control: message .cache_control .as_ref() .map(Self::convert_cache_control), - }], + }); + } + + // Add tool_use blocks for any structured tool calls + for tc in &message.tool_calls { + content_blocks.push(AnthropicContent::ToolUse { + id: tc.id.clone(), + name: tc.name.clone(), + input: tc.input.clone(), + }); + } + + // Ensure we have at least one content block + if content_blocks.is_empty() { + content_blocks.push(AnthropicContent::Text { + text: message.content.clone(), + cache_control: None, + }); + } + + anthropic_messages.push(AnthropicMessage { + role: "assistant".to_string(), + content: content_blocks, }); } } @@ -929,6 +965,13 @@ enum AnthropicContent { }, #[serde(rename = "image")] Image { source: AnthropicImageSource }, + #[serde(rename = "tool_result")] + ToolResult { + tool_use_id: String, + content: String, + #[serde(skip_serializing_if = "Option::is_none")] + cache_control: Option, + }, } /// Image source for Anthropic API diff --git a/crates/g3-providers/src/lib.rs b/crates/g3-providers/src/lib.rs index 7793a94..ff62d80 100644 --- a/crates/g3-providers/src/lib.rs +++ b/crates/g3-providers/src/lib.rs @@ -95,6 +95,16 @@ impl CacheControl { } } +/// A tool call stored in an assistant message for proper API roundtripping. +/// When the model makes a native tool call, we store it structurally so that +/// convert_messages() can send it as a proper tool_use block (not inline JSON text). +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MessageToolCall { + pub id: String, + pub name: String, + pub input: serde_json::Value, +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Message { pub role: MessageRole, @@ -107,6 +117,16 @@ pub struct Message { pub kind: MessageKind, #[serde(skip_serializing_if = "Option::is_none")] pub cache_control: Option, + /// Structured tool calls made by the assistant in this message. + /// When non-empty, convert_messages() should emit tool_use content blocks + /// instead of (or in addition to) plain text. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tool_calls: Vec, + /// If this is a tool result message, the ID of the tool_use it responds to. + /// When set, convert_messages() should emit a tool_result content block + /// instead of plain text. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tool_result_id: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -279,6 +299,8 @@ impl Message { id: Self::generate_id(), kind: MessageKind::Regular, cache_control: None, + tool_calls: Vec::new(), + tool_result_id: None, } } @@ -295,6 +317,8 @@ impl Message { id: Self::generate_id(), kind: MessageKind::Regular, cache_control: Some(cache_control), + tool_calls: Vec::new(), + tool_result_id: None, } } @@ -307,6 +331,8 @@ impl Message { id: Self::generate_id(), kind, cache_control: None, + tool_calls: Vec::new(), + tool_result_id: None, } }