From d3f0112f4699e38c89166dbbb21a5b1c7232ba85 Mon Sep 17 00:00:00 2001
From: "Dhanji R. Prasanna" <d@wideplay.com>
Date: Wed, 11 Feb 2026 08:48:07 +1100
Subject: [PATCH] fix: store tool calls structurally for proper API
 roundtripping

The agent would stop mid-task because native tool calls were stored as
inline JSON text in Message.content. When sent back to the Anthropic API
via convert_messages(), they went as plain text instead of structured
tool_use/tool_result blocks. The model would occasionally get confused
and emit text describing what it wanted to do instead of invoking the
tool mechanism.

Changes:
- Add MessageToolCall struct and tool_calls/tool_result_id fields to Message
- Add id field to core ToolCall struct to preserve provider tool call IDs
- Update Anthropic convert_messages() to emit tool_use and tool_result blocks
- Add ToolResult variant to AnthropicContent enum
- Store tool calls structurally in tool message construction (not inline JSON)
- Fix add_message() to preserve empty-content messages with tool_calls
- Fix check_duplicate_in_previous_message() to check structured tool_calls
- Generate valid IDs for JSON fallback tool calls (Anthropic pattern requirement)
- Update planner create_tool_message() to use structured tool calls
---
 analysis/memory.md                            |  17 +-
 crates/g3-core/src/context_window.rs          |   3 +-
 crates/g3-core/src/lib.rs                     |  68 ++++++--
 crates/g3-core/src/streaming.rs               |  10 +-
 crates/g3-core/src/streaming_parser.rs        |  11 +-
 crates/g3-core/src/tools/acd.rs               |   3 +
 crates/g3-core/tests/abrupt_stop_bug_test.rs  | 163 ++++++++++++++++++
 .../tests/end_of_turn_behavior_test.rs        |   1 +
 ...stream_completion_characterization_test.rs |   6 +
 .../tests/tool_execution_roundtrip_test.rs    |   1 +
 crates/g3-core/tests/tool_execution_test.rs   |   1 +
 crates/g3-planner/src/lib.rs                  |  24 +--
 crates/g3-planner/tests/planner_test.rs       |   7 +-
 crates/g3-providers/src/anthropic.rs          |  67 +++++--
 crates/g3-providers/src/lib.rs                |  26 +++
 15 files changed, 355 insertions(+), 53 deletions(-)
 create mode 100644 crates/g3-core/tests/abrupt_stop_bug_test.rs
diff --git a/analysis/memory.md b/analysis/memory.md
index f63319f..52bddaf 100644
--- a/analysis/memory.md
+++ b/analysis/memory.md
@@ -1,5 +1,5 @@
 # Workspace Memory
-> Updated: 2026-02-07T05:28:12Z | Size: 26.3k chars
+> Updated: 2026-02-10T21:08:38Z | Size: 27.8k chars
 
 ### Remember Tool Wiring
 - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -429,4 +429,17 @@ Makes tool output responsive to terminal width - no line wrapping, with 4-char r
 - **Bug**: The `_ =>` catch-all in when condition evaluation did naive string `contains` check. For `Matches` (regex like `^Re: `), it checked if fact values literally contained the regex pattern string — which never matched. Result: when conditions with `matches` rule always evaluated as not-met → vacuous pass → violations slipped through.
 - **Fix**: Replaced hand-rolled when evaluation with synthetic `CompiledPredicate` delegation to `evaluate_predicate_datalog()`, which handles all 12 rule types correctly.
 - **Tests**: `test_execute_rules_when_matches_condition_met`, `test_execute_rules_when_matches_condition_met_but_predicate_fails`, `test_execute_rules_when_matches_condition_not_met`
-- **Note**: The `invariants.rs` path was NOT affected — it already delegated to `evaluate_predicate()` which handles all rules.
\ No newline at end of file
+- **Note**: The `invariants.rs` path was NOT affected — it already delegated to `evaluate_predicate()` which handles all rules.
+
+### Structured Tool Call Messages (2026-02-11)
+- `crates/g3-providers/src/lib.rs` [102..106] - `MessageToolCall` struct (id, name, input)
+- `crates/g3-providers/src/lib.rs` [124..131] - `Message.tool_calls: Vec<MessageToolCall>`, `Message.tool_result_id: Option<String>`
+- `crates/g3-providers/src/anthropic.rs` [284..340] - `convert_messages()` emits `tool_use` blocks for assistant messages with `tool_calls`, `tool_result` blocks for user messages with `tool_result_id`
+- `crates/g3-providers/src/anthropic.rs` [935..941] - `AnthropicContent::ToolResult` variant added
+- `crates/g3-core/src/lib.rs` [82..88] - `ToolCall.id` field added (from native providers)
+- `crates/g3-core/src/lib.rs` [2530..2545] - Tool messages store structured `tool_calls` instead of inline JSON text
+- `crates/g3-core/src/lib.rs` [1385..1400] - `check_duplicate_in_previous_message()` checks structured `tool_calls` field
+- `crates/g3-core/src/context_window.rs` [107..109] - `add_message_with_tokens()` preserves messages with `tool_calls` even if content is empty
+- `crates/g3-core/src/streaming_parser.rs` [339] - `process_chunk()` preserves tool call `id` from provider
+
+**Bug fixed**: Agent would stop mid-task because native tool calls were stored as inline JSON text in `Message.content`. When sent back to Anthropic API via `convert_messages()`, they went as plain text instead of structured `tool_use`/`tool_result` blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism.
\ No newline at end of file
diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs
index 367d065..814a90d 100644
--- a/crates/g3-core/src/context_window.rs
+++ b/crates/g3-core/src/context_window.rs
@@ -103,7 +103,8 @@ impl ContextWindow {
 
     /// Add a message with optional token count from the provider
     pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option<u32>) {
-        if message.content.trim().is_empty() {
+        // Skip truly empty messages, but keep messages that have structured tool calls or tool results
+        if message.content.trim().is_empty() && message.tool_calls.is_empty() && message.tool_result_id.is_none() {
             warn!("Skipping empty message to avoid API error");
             return;
         }
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index 834780f..e5457f8 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -82,6 +82,10 @@ pub use paths::{
 pub struct ToolCall {
     pub tool: String,
     pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments
+    /// Unique ID for this tool call (from native tool calling providers).
+    /// Used to correlate tool_use/tool_result blocks in the API.
+    #[serde(default)]
+    pub id: String,
 }
 
 /// Cumulative cache statistics for prompt caching efficacy tracking.
@@ -1379,6 +1383,22 @@ impl<W: UiWriter> Agent<W> {
                 continue;
             }
 
+            // Check structured tool_calls first (native tool calling)
+            if !msg.tool_calls.is_empty() {
+                if let Some(last_tc) = msg.tool_calls.last() {
+                    let prev = ToolCall {
+                        tool: last_tc.name.clone(),
+                        args: last_tc.input.clone(),
+                        id: last_tc.id.clone(),
+                    };
+                    if streaming::are_tool_calls_duplicate(&prev, tool_call) {
+                        return Some("DUP IN MSG".to_string());
+                    }
+                }
+                // Only check the most recent assistant message
+                break;
+            }
+
             let content = &msg.content;
 
             // Look for the last occurrence of a tool call pattern
@@ -2001,6 +2021,8 @@ Skip if nothing new. Be brief."#;
                                 content: content.to_string(),
                                 kind: g3_providers::MessageKind::Regular,
                                 cache_control: None,
+                                tool_calls: Vec::new(),
+                                tool_result_id: None,
                             });
                         }
 
@@ -2028,6 +2050,8 @@ Skip if nothing new. Be brief."#;
                 content: format!("[Session Resumed]\n\n{}", context_msg),
                 kind: g3_providers::MessageKind::Regular,
                 cache_control: None,
+                tool_calls: Vec::new(),
+                tool_result_id: None,
             });
         }
 
@@ -2503,25 +2527,29 @@ Skip if nothing new. Be brief."#;
 
                             // Add the tool call and result to the context window using RAW unfiltered content
                             // This ensures the log file contains the true raw content including JSON tool calls
-                            let tool_message = if !raw_content_for_log.trim().is_empty() {
-                                Message::new(
+                            let tool_message = {
+                                let text_content = raw_content_for_log.trim().to_string();
+                                let mut msg = Message::new(
                                     MessageRole::Assistant,
-                                    format!(
-                                        "{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
-                                        raw_content_for_log.trim(),
-                                        tool_call.tool,
-                                        tool_call.args
-                                    ),
-                                )
-                            } else {
-                                // No text content before tool call, just include the tool call
-                                Message::new(
-                                    MessageRole::Assistant,
-                                    format!(
-                                        "{{\"tool\": \"{}\", \"args\": {}}}",
-                                        tool_call.tool, tool_call.args
-                                    ),
-                                )
+                                    text_content,
+                                );
+                                // Store the tool call structurally so that providers can
+                                // emit proper tool_use blocks (e.g. Anthropic API) instead
+                                // of inline JSON text that confuses the model.
+                                msg.tool_calls.push(g3_providers::MessageToolCall {
+                                    id: if tool_call.id.is_empty() {
+                                        // Safety net: generate an ID if none was provided.
+                                        // Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$
+                                        use std::sync::atomic::{AtomicU64, Ordering};
+                                        static FALLBACK_COUNTER: AtomicU64 = AtomicU64::new(0);
+                                        format!("tool_{}", FALLBACK_COUNTER.fetch_add(1, Ordering::SeqCst))
+                                    } else {
+                                        tool_call.id.clone()
+                                    },
+                                    name: tool_call.tool.clone(),
+                                    input: tool_call.args.clone(),
+                                });
+                                msg
                             };
                             let mut result_message = {
                                 let content = format!("Tool result: {}", tool_result);
@@ -2548,6 +2576,10 @@ Skip if nothing new. Be brief."#;
                                 }
                             };
 
+                            // Link the tool result to the tool_use ID so providers can
+                            // emit proper tool_result blocks (e.g. Anthropic API).
+                            result_message.tool_result_id = Some(tool_call.id.clone());
+
                             // Attach any pending images to the result message
                             // (images loaded via read_image tool)
                             if !self.pending_images.is_empty() {
diff --git a/crates/g3-core/src/streaming.rs b/crates/g3-core/src/streaming.rs
index 0d9b007..6984df2 100644
--- a/crates/g3-core/src/streaming.rs
+++ b/crates/g3-core/src/streaming.rs
@@ -716,8 +716,8 @@ mod tests {
     #[test]
     fn test_deduplicate_tool_calls_no_duplicates() {
         let tools = vec![
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
-            ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}) },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
+            ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}), id: String::new() },
         ];
         
         let result = deduplicate_tool_calls(tools, |_| None);
@@ -730,8 +730,8 @@ mod tests {
     #[test]
     fn test_deduplicate_tool_calls_sequential_duplicate() {
         let tools = vec![
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
         ];
         
         let result = deduplicate_tool_calls(tools, |_| None);
@@ -744,7 +744,7 @@ mod tests {
     #[test]
     fn test_deduplicate_tool_calls_previous_message_duplicate() {
         let tools = vec![
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
         ];
         
         // Simulate finding a duplicate in previous message
diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs
index fc3f3ec..9c9f297 100644
--- a/crates/g3-core/src/streaming_parser.rs
+++ b/crates/g3-core/src/streaming_parser.rs
@@ -336,6 +336,7 @@ impl StreamingToolParser {
                 completed_tools.push(ToolCall {
                     tool: tool_call.tool.clone(),
                     args: tool_call.args.clone(),
+                    id: tool_call.id.clone(),
                 });
             }
         }
@@ -466,13 +467,21 @@ impl StreamingToolParser {
     }
 
     fn try_parse_tool_call_json(&self, json_str: &str) -> Option<ToolCall> {
-        let tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
+        let mut tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
         let args_obj = tool_call.args.as_object()?;
 
         if args_contain_prose_fragments(args_obj) {
             return None;
         }
 
+        // Generate an ID if not provided (JSON fallback tool calls don't have IDs,
+        // but the Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$)
+        if tool_call.id.is_empty() {
+            use std::sync::atomic::{AtomicU64, Ordering};
+            static COUNTER: AtomicU64 = AtomicU64::new(0);
+            tool_call.id = format!("json_tool_{}", COUNTER.fetch_add(1, Ordering::SeqCst));
+        }
+
         Some(tool_call)
     }
 
diff --git a/crates/g3-core/src/tools/acd.rs b/crates/g3-core/src/tools/acd.rs
index 8101738..07cec5a 100644
--- a/crates/g3-core/src/tools/acd.rs
+++ b/crates/g3-core/src/tools/acd.rs
@@ -181,6 +181,7 @@ mod tests {
         let tool_call = ToolCall {
             tool: "rehydrate".to_string(),
             args: json!({}),
+            id: String::new(),
         };
 
         let result = execute_rehydrate(&tool_call, &mut ctx).await;
@@ -213,6 +214,7 @@ mod tests {
         let tool_call = ToolCall {
             tool: "rehydrate".to_string(),
             args: json!({"fragment_id": "test-fragment"}),
+            id: String::new(),
         };
 
         let result = execute_rehydrate(&tool_call, &mut ctx).await;
@@ -245,6 +247,7 @@ mod tests {
         let tool_call = ToolCall {
             tool: "rehydrate".to_string(),
             args: json!({"fragment_id": "nonexistent-fragment"}),
+            id: String::new(),
         };
 
         let result = execute_rehydrate(&tool_call, &mut ctx).await;
diff --git a/crates/g3-core/tests/abrupt_stop_bug_test.rs b/crates/g3-core/tests/abrupt_stop_bug_test.rs
new file mode 100644
index 0000000..5269580
--- /dev/null
+++ b/crates/g3-core/tests/abrupt_stop_bug_test.rs
@@ -0,0 +1,163 @@
+//! Tests for the abrupt stop bug where the agent returns control to the user
+//! mid-task because tool calls are stored as text in the Message struct and
+//! sent back to the Anthropic API as plain text instead of structured
+//! tool_use/tool_result blocks.
+//!
+//! Root cause: Message struct has no tool_calls field. Native tool calls are
+//! stored as inline JSON text. convert_messages() sends them as plain text,
+//! not tool_use/tool_result blocks. The model sees its previous tool
+//! interactions as text it wrote, not as actual tool invocations, and
+//! occasionally emits text describing what it wants to do instead of
+//! invoking the tool mechanism.
+
+use g3_providers::{Message, MessageRole};
+
+/// Demonstrates the bug: tool calls stored as inline JSON text in assistant
+/// messages are indistinguishable from regular text when sent back to the API.
+///
+/// In the real bug, the model sees:
+///   Assistant: "Let me check.\n\n{\"tool\": \"shell\", \"args\": {...}}"
+///   User: "Tool result: ..."
+///
+/// Instead of the proper Anthropic format:
+///   Assistant: [{type: "text", text: "Let me check."}, {type: "tool_use", id: "...", name: "shell", input: {...}}]
+///   User: [{type: "tool_result", tool_use_id: "...", content: "..."}]
+#[test]
+fn test_tool_calls_stored_as_text_lack_structure() {
+    // This is how tool calls are currently stored (the bug)
+    let assistant_msg = Message::new(
+        MessageRole::Assistant,
+        "Let me check that file.\n\n{\"tool\": \"shell\", \"args\": {\"command\": \"ls\"}}".to_string(),
+    );
+
+    // The message has no structured tool call information
+    assert!(
+        assistant_msg.tool_calls.is_empty(),
+        "Message should now support structured tool_calls field"
+    );
+}
+
+/// Verifies that Message struct supports structured tool calls.
+/// After the fix, tool calls should be stored structurally.
+#[test]
+fn test_message_supports_structured_tool_calls() {
+    use g3_providers::MessageToolCall;
+
+    let mut msg = Message::new(
+        MessageRole::Assistant,
+        "Let me check that file.".to_string(),
+    );
+
+    msg.tool_calls.push(MessageToolCall {
+        id: "toolu_123".to_string(),
+        name: "shell".to_string(),
+        input: serde_json::json!({"command": "ls"}),
+    });
+
+    assert_eq!(msg.tool_calls.len(), 1);
+    assert_eq!(msg.tool_calls[0].name, "shell");
+    assert_eq!(msg.tool_calls[0].id, "toolu_123");
+}
+
+/// Verifies that Message struct supports tool_result for user messages.
+/// After the fix, tool results should reference the tool_use_id.
+#[test]
+fn test_message_supports_tool_result() {
+    let mut msg = Message::new(
+        MessageRole::User,
+        "file1.txt\nfile2.txt".to_string(),
+    );
+
+    msg.tool_result_id = Some("toolu_123".to_string());
+
+    assert_eq!(msg.tool_result_id.as_deref(), Some("toolu_123"));
+}
+
+/// Integration test: simulates the exact bug scenario from the h3 session.
+/// After several tool call iterations, the model stops mid-thought.
+///
+/// The fix ensures that when messages are sent back to the API, tool calls
+/// are properly structured so the model maintains its tool-calling context.
+#[test]
+fn test_tool_call_roundtrip_preserves_structure() {
+    use g3_providers::MessageToolCall;
+
+    // Simulate a multi-turn tool-calling conversation
+    let messages = vec![
+        // System prompt
+        Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+        // User asks something
+        Message::new(MessageRole::User, "Check the files".to_string()),
+        // Assistant uses a tool (properly structured)
+        {
+            let mut msg = Message::new(
+                MessageRole::Assistant,
+                "Let me check the files.".to_string(),
+            );
+            msg.tool_calls.push(MessageToolCall {
+                id: "toolu_001".to_string(),
+                name: "shell".to_string(),
+                input: serde_json::json!({"command": "ls"}),
+            });
+            msg
+        },
+        // Tool result (properly structured)
+        {
+            let mut msg = Message::new(
+                MessageRole::User,
+                "file1.txt\nfile2.txt".to_string(),
+            );
+            msg.tool_result_id = Some("toolu_001".to_string());
+            msg
+        },
+        // Assistant uses another tool
+        {
+            let mut msg = Message::new(
+                MessageRole::Assistant,
+                "Let me read file1.txt.".to_string(),
+            );
+            msg.tool_calls.push(MessageToolCall {
+                id: "toolu_002".to_string(),
+                name: "read_file".to_string(),
+                input: serde_json::json!({"file_path": "file1.txt"}),
+            });
+            msg
+        },
+        // Tool result
+        {
+            let mut msg = Message::new(
+                MessageRole::User,
+                "Contents of file1.txt".to_string(),
+            );
+            msg.tool_result_id = Some("toolu_002".to_string());
+            msg
+        },
+    ];
+
+    // Verify all tool calls have IDs
+    for msg in &messages {
+        for tc in &msg.tool_calls {
+            assert!(!tc.id.is_empty(), "Tool call should have an ID");
+        }
+        // Verify tool results reference a tool_use_id
+        if msg.tool_result_id.is_some() {
+            assert!(
+                matches!(msg.role, MessageRole::User),
+                "Tool results should be user messages"
+            );
+        }
+    }
+
+    // Verify assistant messages with tool calls still have text content
+    let assistant_with_tools: Vec<_> = messages
+        .iter()
+        .filter(|m| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty())
+        .collect();
+    assert_eq!(assistant_with_tools.len(), 2);
+    for msg in assistant_with_tools {
+        assert!(
+            !msg.content.is_empty(),
+            "Assistant messages should have text content alongside tool calls"
+        );
+    }
+}
diff --git a/crates/g3-core/tests/end_of_turn_behavior_test.rs b/crates/g3-core/tests/end_of_turn_behavior_test.rs
index 938aa90..d50338f 100644
--- a/crates/g3-core/tests/end_of_turn_behavior_test.rs
+++ b/crates/g3-core/tests/end_of_turn_behavior_test.rs
@@ -122,6 +122,7 @@ mod duplicate_detection {
         ToolCall {
             tool: tool.to_string(),
             args,
+            id: String::new(),
         }
     }
 
diff --git a/crates/g3-core/tests/stream_completion_characterization_test.rs b/crates/g3-core/tests/stream_completion_characterization_test.rs
index 8d50838..8fc7699 100644
--- a/crates/g3-core/tests/stream_completion_characterization_test.rs
+++ b/crates/g3-core/tests/stream_completion_characterization_test.rs
@@ -328,6 +328,7 @@ mod duplicate_detection_characterization {
         ToolCall {
             tool: tool.to_string(),
             args: serde_json::from_str(args).unwrap(),
+            id: String::new(),
         }
     }
 
@@ -525,6 +526,7 @@ mod tool_execution_integration {
         // Execute a tool directly
         let tool_call = ToolCall {
             tool: "read_file".to_string(),
+            id: String::new(),
             args: serde_json::json!({ "file_path": test_file.to_string_lossy() }),
         };
 
@@ -547,6 +549,7 @@ mod tool_execution_integration {
 
         let tool_call = ToolCall {
             tool: "shell".to_string(),
+            id: String::new(),
             args: serde_json::json!({ "command": "echo 'test output'" }),
         };
 
@@ -570,6 +573,7 @@ mod tool_execution_integration {
 
         let tool_call = ToolCall {
             tool: "write_file".to_string(),
+            id: String::new(),
             args: serde_json::json!({
                 "file_path": new_file.to_string_lossy(),
                 "content": "New content"
@@ -602,6 +606,7 @@ mod tool_execution_integration {
         // Write Plan
         let write_call = ToolCall {
             tool: "plan_write".to_string(),
+            id: String::new(),
             args: serde_json::json!({
                 "plan": r#"plan_id: test-plan
 revision: 1
@@ -634,6 +639,7 @@ items:
         // Read Plan
         let read_call = ToolCall {
             tool: "plan_read".to_string(),
+            id: String::new(),
             args: serde_json::json!({}),
         };
         let read_result = agent.execute_tool(&read_call).await.unwrap();
diff --git a/crates/g3-core/tests/tool_execution_roundtrip_test.rs b/crates/g3-core/tests/tool_execution_roundtrip_test.rs
index 935f1b5..b6488f1 100644
--- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs
+++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs
@@ -37,6 +37,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
     ToolCall {
         tool: tool.to_string(),
         args,
+        id: String::new(),
     }
 }
 
diff --git a/crates/g3-core/tests/tool_execution_test.rs b/crates/g3-core/tests/tool_execution_test.rs
index d32be3d..871f850 100644
--- a/crates/g3-core/tests/tool_execution_test.rs
+++ b/crates/g3-core/tests/tool_execution_test.rs
@@ -29,6 +29,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
     ToolCall {
         tool: tool.to_string(),
         args,
+        id: String::new(),
     }
 }
 
diff --git a/crates/g3-planner/src/lib.rs b/crates/g3-planner/src/lib.rs
index d3d9042..d05a157 100644
--- a/crates/g3-planner/src/lib.rs
+++ b/crates/g3-planner/src/lib.rs
@@ -126,14 +126,16 @@ pub async fn get_initial_discovery_messages(
 
 /// Creates an Assistant message with a tool call in g3's JSON format.
 pub fn create_tool_message(tool: &str, command: &str) -> Message {
-    let tool_call = serde_json::json!({
-        "tool": tool,
-        "args": {
-            "command": command
-        }
+    let mut msg = Message::new(
+        MessageRole::Assistant,
+        String::new(),
+    );
+    msg.tool_calls.push(g3_providers::MessageToolCall {
+        id: format!("plan_{}", msg.id),
+        name: tool.to_string(),
+        input: serde_json::json!({ "command": command }),
     });
-
-    Message::new(MessageRole::Assistant, tool_call.to_string())
+    msg
 }
 
 /// Extract shell commands from the LLM response.
@@ -269,10 +271,10 @@ mod tests {
         let msg = create_tool_message("shell", "ls -la");
 
         assert!(matches!(msg.role, MessageRole::Assistant));
-
-        let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
-        assert_eq!(parsed["tool"], "shell");
-        assert_eq!(parsed["args"]["command"], "ls -la");
+        assert_eq!(msg.tool_calls.len(), 1);
+        assert_eq!(msg.tool_calls[0].name, "shell");
+        assert_eq!(msg.tool_calls[0].input["command"], "ls -la");
+        assert!(!msg.tool_calls[0].id.is_empty());
     }
 
     #[test]
diff --git a/crates/g3-planner/tests/planner_test.rs b/crates/g3-planner/tests/planner_test.rs
index be285ab..4d56de2 100644
--- a/crates/g3-planner/tests/planner_test.rs
+++ b/crates/g3-planner/tests/planner_test.rs
@@ -9,9 +9,10 @@ fn test_create_tool_message_format() {
 
     assert!(matches!(msg.role, MessageRole::Assistant));
 
-    let parsed: serde_json::Value = serde_json::from_str(&msg.content).unwrap();
-    assert_eq!(parsed["tool"], "shell");
-    assert_eq!(parsed["args"]["command"], "ls -la");
+    assert_eq!(msg.tool_calls.len(), 1);
+    assert_eq!(msg.tool_calls[0].name, "shell");
+    assert_eq!(msg.tool_calls[0].input["command"], "ls -la");
+    assert!(!msg.tool_calls[0].id.is_empty());
 }
 
 #[test]
diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs
index 780cbfe..ab97396 100644
--- a/crates/g3-providers/src/anthropic.rs
+++ b/crates/g3-providers/src/anthropic.rs
@@ -295,14 +295,26 @@ impl AnthropicProvider {
                         });
                     }
 
-                    // Add text content
-                    content_blocks.push(AnthropicContent::Text {
-                        text: message.content.clone(),
-                        cache_control: message
-                            .cache_control
-                            .as_ref()
-                            .map(Self::convert_cache_control),
-                    });
+                    // Check if this is a tool result message
+                    if let Some(ref tool_use_id) = message.tool_result_id {
+                        content_blocks.push(AnthropicContent::ToolResult {
+                            tool_use_id: tool_use_id.clone(),
+                            content: message.content.clone(),
+                            cache_control: message
+                                .cache_control
+                                .as_ref()
+                                .map(Self::convert_cache_control),
+                        });
+                    } else {
+                        // Regular text content
+                        content_blocks.push(AnthropicContent::Text {
+                            text: message.content.clone(),
+                            cache_control: message
+                                .cache_control
+                                .as_ref()
+                                .map(Self::convert_cache_control),
+                        });
+                    }
 
                     anthropic_messages.push(AnthropicMessage {
                         role: "user".to_string(),
@@ -310,15 +322,39 @@ impl AnthropicProvider {
                     });
                 }
                 MessageRole::Assistant => {
-                    anthropic_messages.push(AnthropicMessage {
-                        role: "assistant".to_string(),
-                        content: vec![AnthropicContent::Text {
+                    let mut content_blocks: Vec<AnthropicContent> = Vec::new();
+
+                    // Add text content if non-empty
+                    if !message.content.trim().is_empty() {
+                        content_blocks.push(AnthropicContent::Text {
                             text: message.content.clone(),
                             cache_control: message
                                 .cache_control
                                 .as_ref()
                                 .map(Self::convert_cache_control),
-                        }],
+                        });
+                    }
+
+                    // Add tool_use blocks for any structured tool calls
+                    for tc in &message.tool_calls {
+                        content_blocks.push(AnthropicContent::ToolUse {
+                            id: tc.id.clone(),
+                            name: tc.name.clone(),
+                            input: tc.input.clone(),
+                        });
+                    }
+
+                    // Ensure we have at least one content block
+                    if content_blocks.is_empty() {
+                        content_blocks.push(AnthropicContent::Text {
+                            text: message.content.clone(),
+                            cache_control: None,
+                        });
+                    }
+
+                    anthropic_messages.push(AnthropicMessage {
+                        role: "assistant".to_string(),
+                        content: content_blocks,
                     });
                 }
             }
@@ -929,6 +965,13 @@ enum AnthropicContent {
     },
     #[serde(rename = "image")]
     Image { source: AnthropicImageSource },
+    #[serde(rename = "tool_result")]
+    ToolResult {
+        tool_use_id: String,
+        content: String,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        cache_control: Option<crate::CacheControl>,
+    },
 }
 
 /// Image source for Anthropic API
diff --git a/crates/g3-providers/src/lib.rs b/crates/g3-providers/src/lib.rs
index 7793a94..ff62d80 100644
--- a/crates/g3-providers/src/lib.rs
+++ b/crates/g3-providers/src/lib.rs
@@ -95,6 +95,16 @@ impl CacheControl {
     }
 }
 
+/// A tool call stored in an assistant message for proper API roundtripping.
+/// When the model makes a native tool call, we store it structurally so that
+/// convert_messages() can send it as a proper tool_use block (not inline JSON text).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MessageToolCall {
+    pub id: String,
+    pub name: String,
+    pub input: serde_json::Value,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct Message {
     pub role: MessageRole,
@@ -107,6 +117,16 @@ pub struct Message {
     pub kind: MessageKind,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub cache_control: Option<CacheControl>,
+    /// Structured tool calls made by the assistant in this message.
+    /// When non-empty, convert_messages() should emit tool_use content blocks
+    /// instead of (or in addition to) plain text.
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub tool_calls: Vec<MessageToolCall>,
+    /// If this is a tool result message, the ID of the tool_use it responds to.
+    /// When set, convert_messages() should emit a tool_result content block
+    /// instead of plain text.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tool_result_id: Option<String>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -279,6 +299,8 @@ impl Message {
             id: Self::generate_id(),
             kind: MessageKind::Regular,
             cache_control: None,
+            tool_calls: Vec::new(),
+            tool_result_id: None,
         }
     }
 
@@ -295,6 +317,8 @@ impl Message {
             id: Self::generate_id(),
             kind: MessageKind::Regular,
             cache_control: Some(cache_control),
+            tool_calls: Vec::new(),
+            tool_result_id: None,
         }
     }
 
@@ -307,6 +331,8 @@ impl Message {
             id: Self::generate_id(),
             kind,
             cache_control: None,
+            tool_calls: Vec::new(),
+            tool_result_id: None,
         }
     }