fix: store tool calls structurally for proper API roundtripping

The agent would stop mid-task because native tool calls were stored as inline JSON text in Message.content. When sent back to the Anthropic API via convert_messages(), they went as plain text instead of structured tool_use/tool_result blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism. Changes: - Add MessageToolCall struct and tool_calls/tool_result_id fields to Message - Add id field to core ToolCall struct to preserve provider tool call IDs - Update Anthropic convert_messages() to emit tool_use and tool_result blocks - Add ToolResult variant to AnthropicContent enum - Store tool calls structurally in tool message construction (not inline JSON) - Fix add_message() to preserve empty-content messages with tool_calls - Fix check_duplicate_in_previous_message() to check structured tool_calls - Generate valid IDs for JSON fallback tool calls (Anthropic pattern requirement) - Update planner create_tool_message() to use structured tool calls
2026-02-11 08:48:07 +11:00
parent 2a4cd1f4d6
commit d3f0112f46
15 changed files with 355 additions and 53 deletions
--- a/crates/g3-core/src/context_window.rs
+++ b/crates/g3-core/src/context_window.rs
@@ -103,7 +103,8 @@ impl ContextWindow {

    /// Add a message with optional token count from the provider
    pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option<u32>) {
-        if message.content.trim().is_empty() {
+        // Skip truly empty messages, but keep messages that have structured tool calls or tool results
+        if message.content.trim().is_empty() && message.tool_calls.is_empty() && message.tool_result_id.is_none() {
            warn!("Skipping empty message to avoid API error");
            return;
        }
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -82,6 +82,10 @@ pub use paths::{
 pub struct ToolCall {
    pub tool: String,
    pub args: serde_json::Value, // Should be a JSON object with tool-specific arguments
+    /// Unique ID for this tool call (from native tool calling providers).
+    /// Used to correlate tool_use/tool_result blocks in the API.
+    #[serde(default)]
+    pub id: String,
 }

 /// Cumulative cache statistics for prompt caching efficacy tracking.
@@ -1379,6 +1383,22 @@ impl<W: UiWriter> Agent<W> {
                continue;
            }

+            // Check structured tool_calls first (native tool calling)
+            if !msg.tool_calls.is_empty() {
+                if let Some(last_tc) = msg.tool_calls.last() {
+                    let prev = ToolCall {
+                        tool: last_tc.name.clone(),
+                        args: last_tc.input.clone(),
+                        id: last_tc.id.clone(),
+                    };
+                    if streaming::are_tool_calls_duplicate(&prev, tool_call) {
+                        return Some("DUP IN MSG".to_string());
+                    }
+                }
+                // Only check the most recent assistant message
+                break;
+            }
+
            let content = &msg.content;

            // Look for the last occurrence of a tool call pattern
@@ -2001,6 +2021,8 @@ Skip if nothing new. Be brief."#;
                                content: content.to_string(),
                                kind: g3_providers::MessageKind::Regular,
                                cache_control: None,
+                                tool_calls: Vec::new(),
+                                tool_result_id: None,
                            });
                        }

@@ -2028,6 +2050,8 @@ Skip if nothing new. Be brief."#;
                content: format!("[Session Resumed]\n\n{}", context_msg),
                kind: g3_providers::MessageKind::Regular,
                cache_control: None,
+                tool_calls: Vec::new(),
+                tool_result_id: None,
            });
        }

@@ -2503,25 +2527,29 @@ Skip if nothing new. Be brief."#;

                            // Add the tool call and result to the context window using RAW unfiltered content
                            // This ensures the log file contains the true raw content including JSON tool calls
-                            let tool_message = if !raw_content_for_log.trim().is_empty() {
-                                Message::new(
+                            let tool_message = {
+                                let text_content = raw_content_for_log.trim().to_string();
+                                let mut msg = Message::new(
                                    MessageRole::Assistant,
-                                    format!(
-                                        "{}\n\n{{\"tool\": \"{}\", \"args\": {}}}",
-                                        raw_content_for_log.trim(),
-                                        tool_call.tool,
-                                        tool_call.args
-                                    ),
-                                )
-                            } else {
-                                // No text content before tool call, just include the tool call
-                                Message::new(
-                                    MessageRole::Assistant,
-                                    format!(
-                                        "{{\"tool\": \"{}\", \"args\": {}}}",
-                                        tool_call.tool, tool_call.args
-                                    ),
-                                )
+                                    text_content,
+                                );
+                                // Store the tool call structurally so that providers can
+                                // emit proper tool_use blocks (e.g. Anthropic API) instead
+                                // of inline JSON text that confuses the model.
+                                msg.tool_calls.push(g3_providers::MessageToolCall {
+                                    id: if tool_call.id.is_empty() {
+                                        // Safety net: generate an ID if none was provided.
+                                        // Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$
+                                        use std::sync::atomic::{AtomicU64, Ordering};
+                                        static FALLBACK_COUNTER: AtomicU64 = AtomicU64::new(0);
+                                        format!("tool_{}", FALLBACK_COUNTER.fetch_add(1, Ordering::SeqCst))
+                                    } else {
+                                        tool_call.id.clone()
+                                    },
+                                    name: tool_call.tool.clone(),
+                                    input: tool_call.args.clone(),
+                                });
+                                msg
                            };
                            let mut result_message = {
                                let content = format!("Tool result: {}", tool_result);
@@ -2548,6 +2576,10 @@ Skip if nothing new. Be brief."#;
                                }
                            };

+                            // Link the tool result to the tool_use ID so providers can
+                            // emit proper tool_result blocks (e.g. Anthropic API).
+                            result_message.tool_result_id = Some(tool_call.id.clone());
+
                            // Attach any pending images to the result message
                            // (images loaded via read_image tool)
                            if !self.pending_images.is_empty() {
--- a/crates/g3-core/src/streaming.rs
+++ b/crates/g3-core/src/streaming.rs
@@ -716,8 +716,8 @@ mod tests {
    #[test]
    fn test_deduplicate_tool_calls_no_duplicates() {
        let tools = vec![
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
-            ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}) },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
+            ToolCall { tool: "read_file".to_string(), args: serde_json::json!({"path": "foo.rs"}), id: String::new() },
        ];
        
        let result = deduplicate_tool_calls(tools, |_| None);
@@ -730,8 +730,8 @@ mod tests {
    #[test]
    fn test_deduplicate_tool_calls_sequential_duplicate() {
        let tools = vec![
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
        ];
        
        let result = deduplicate_tool_calls(tools, |_| None);
@@ -744,7 +744,7 @@ mod tests {
    #[test]
    fn test_deduplicate_tool_calls_previous_message_duplicate() {
        let tools = vec![
-            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}) },
+            ToolCall { tool: "shell".to_string(), args: serde_json::json!({"command": "ls"}), id: String::new() },
        ];
        
        // Simulate finding a duplicate in previous message
--- a/crates/g3-core/src/streaming_parser.rs
+++ b/crates/g3-core/src/streaming_parser.rs
@@ -336,6 +336,7 @@ impl StreamingToolParser {
                completed_tools.push(ToolCall {
                    tool: tool_call.tool.clone(),
                    args: tool_call.args.clone(),
+                    id: tool_call.id.clone(),
                });
            }
        }
@@ -466,13 +467,21 @@ impl StreamingToolParser {
    }

    fn try_parse_tool_call_json(&self, json_str: &str) -> Option<ToolCall> {
-        let tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
+        let mut tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
        let args_obj = tool_call.args.as_object()?;

        if args_contain_prose_fragments(args_obj) {
            return None;
        }

+        // Generate an ID if not provided (JSON fallback tool calls don't have IDs,
+        // but the Anthropic API requires tool_use IDs matching ^[a-zA-Z0-9_-]+$)
+        if tool_call.id.is_empty() {
+            use std::sync::atomic::{AtomicU64, Ordering};
+            static COUNTER: AtomicU64 = AtomicU64::new(0);
+            tool_call.id = format!("json_tool_{}", COUNTER.fetch_add(1, Ordering::SeqCst));
+        }
+
        Some(tool_call)
    }

--- a/crates/g3-core/src/tools/acd.rs
+++ b/crates/g3-core/src/tools/acd.rs
@@ -181,6 +181,7 @@ mod tests {
        let tool_call = ToolCall {
            tool: "rehydrate".to_string(),
            args: json!({}),
+            id: String::new(),
        };

        let result = execute_rehydrate(&tool_call, &mut ctx).await;
@@ -213,6 +214,7 @@ mod tests {
        let tool_call = ToolCall {
            tool: "rehydrate".to_string(),
            args: json!({"fragment_id": "test-fragment"}),
+            id: String::new(),
        };

        let result = execute_rehydrate(&tool_call, &mut ctx).await;
@@ -245,6 +247,7 @@ mod tests {
        let tool_call = ToolCall {
            tool: "rehydrate".to_string(),
            args: json!({"fragment_id": "nonexistent-fragment"}),
+            id: String::new(),
        };

        let result = execute_rehydrate(&tool_call, &mut ctx).await;