fix: store tool calls structurally for proper API roundtripping

The agent would stop mid-task because native tool calls were stored as inline JSON text in Message.content. When sent back to the Anthropic API via convert_messages(), they went as plain text instead of structured tool_use/tool_result blocks. The model would occasionally get confused and emit text describing what it wanted to do instead of invoking the tool mechanism. Changes: - Add MessageToolCall struct and tool_calls/tool_result_id fields to Message - Add id field to core ToolCall struct to preserve provider tool call IDs - Update Anthropic convert_messages() to emit tool_use and tool_result blocks - Add ToolResult variant to AnthropicContent enum - Store tool calls structurally in tool message construction (not inline JSON) - Fix add_message() to preserve empty-content messages with tool_calls - Fix check_duplicate_in_previous_message() to check structured tool_calls - Generate valid IDs for JSON fallback tool calls (Anthropic pattern requirement) - Update planner create_tool_message() to use structured tool calls
2026-02-11 08:48:07 +11:00
parent 2a4cd1f4d6
commit d3f0112f46
15 changed files with 355 additions and 53 deletions
--- a/crates/g3-core/tests/abrupt_stop_bug_test.rs
+++ b/crates/g3-core/tests/abrupt_stop_bug_test.rs
@@ -0,0 +1,163 @@
+//! Tests for the abrupt stop bug where the agent returns control to the user
+//! mid-task because tool calls are stored as text in the Message struct and
+//! sent back to the Anthropic API as plain text instead of structured
+//! tool_use/tool_result blocks.
+//!
+//! Root cause: Message struct has no tool_calls field. Native tool calls are
+//! stored as inline JSON text. convert_messages() sends them as plain text,
+//! not tool_use/tool_result blocks. The model sees its previous tool
+//! interactions as text it wrote, not as actual tool invocations, and
+//! occasionally emits text describing what it wants to do instead of
+//! invoking the tool mechanism.
+
+use g3_providers::{Message, MessageRole};
+
+/// Demonstrates the bug: tool calls stored as inline JSON text in assistant
+/// messages are indistinguishable from regular text when sent back to the API.
+///
+/// In the real bug, the model sees:
+///   Assistant: "Let me check.\n\n{\"tool\": \"shell\", \"args\": {...}}"
+///   User: "Tool result: ..."
+///
+/// Instead of the proper Anthropic format:
+///   Assistant: [{type: "text", text: "Let me check."}, {type: "tool_use", id: "...", name: "shell", input: {...}}]
+///   User: [{type: "tool_result", tool_use_id: "...", content: "..."}]
+#[test]
+fn test_tool_calls_stored_as_text_lack_structure() {
+    // This is how tool calls are currently stored (the bug)
+    let assistant_msg = Message::new(
+        MessageRole::Assistant,
+        "Let me check that file.\n\n{\"tool\": \"shell\", \"args\": {\"command\": \"ls\"}}".to_string(),
+    );
+
+    // The message has no structured tool call information
+    assert!(
+        assistant_msg.tool_calls.is_empty(),
+        "Message should now support structured tool_calls field"
+    );
+}
+
+/// Verifies that Message struct supports structured tool calls.
+/// After the fix, tool calls should be stored structurally.
+#[test]
+fn test_message_supports_structured_tool_calls() {
+    use g3_providers::MessageToolCall;
+
+    let mut msg = Message::new(
+        MessageRole::Assistant,
+        "Let me check that file.".to_string(),
+    );
+
+    msg.tool_calls.push(MessageToolCall {
+        id: "toolu_123".to_string(),
+        name: "shell".to_string(),
+        input: serde_json::json!({"command": "ls"}),
+    });
+
+    assert_eq!(msg.tool_calls.len(), 1);
+    assert_eq!(msg.tool_calls[0].name, "shell");
+    assert_eq!(msg.tool_calls[0].id, "toolu_123");
+}
+
+/// Verifies that Message struct supports tool_result for user messages.
+/// After the fix, tool results should reference the tool_use_id.
+#[test]
+fn test_message_supports_tool_result() {
+    let mut msg = Message::new(
+        MessageRole::User,
+        "file1.txt\nfile2.txt".to_string(),
+    );
+
+    msg.tool_result_id = Some("toolu_123".to_string());
+
+    assert_eq!(msg.tool_result_id.as_deref(), Some("toolu_123"));
+}
+
+/// Integration test: simulates the exact bug scenario from the h3 session.
+/// After several tool call iterations, the model stops mid-thought.
+///
+/// The fix ensures that when messages are sent back to the API, tool calls
+/// are properly structured so the model maintains its tool-calling context.
+#[test]
+fn test_tool_call_roundtrip_preserves_structure() {
+    use g3_providers::MessageToolCall;
+
+    // Simulate a multi-turn tool-calling conversation
+    let messages = vec![
+        // System prompt
+        Message::new(MessageRole::System, "You are a helpful assistant.".to_string()),
+        // User asks something
+        Message::new(MessageRole::User, "Check the files".to_string()),
+        // Assistant uses a tool (properly structured)
+        {
+            let mut msg = Message::new(
+                MessageRole::Assistant,
+                "Let me check the files.".to_string(),
+            );
+            msg.tool_calls.push(MessageToolCall {
+                id: "toolu_001".to_string(),
+                name: "shell".to_string(),
+                input: serde_json::json!({"command": "ls"}),
+            });
+            msg
+        },
+        // Tool result (properly structured)
+        {
+            let mut msg = Message::new(
+                MessageRole::User,
+                "file1.txt\nfile2.txt".to_string(),
+            );
+            msg.tool_result_id = Some("toolu_001".to_string());
+            msg
+        },
+        // Assistant uses another tool
+        {
+            let mut msg = Message::new(
+                MessageRole::Assistant,
+                "Let me read file1.txt.".to_string(),
+            );
+            msg.tool_calls.push(MessageToolCall {
+                id: "toolu_002".to_string(),
+                name: "read_file".to_string(),
+                input: serde_json::json!({"file_path": "file1.txt"}),
+            });
+            msg
+        },
+        // Tool result
+        {
+            let mut msg = Message::new(
+                MessageRole::User,
+                "Contents of file1.txt".to_string(),
+            );
+            msg.tool_result_id = Some("toolu_002".to_string());
+            msg
+        },
+    ];
+
+    // Verify all tool calls have IDs
+    for msg in &messages {
+        for tc in &msg.tool_calls {
+            assert!(!tc.id.is_empty(), "Tool call should have an ID");
+        }
+        // Verify tool results reference a tool_use_id
+        if msg.tool_result_id.is_some() {
+            assert!(
+                matches!(msg.role, MessageRole::User),
+                "Tool results should be user messages"
+            );
+        }
+    }
+
+    // Verify assistant messages with tool calls still have text content
+    let assistant_with_tools: Vec<_> = messages
+        .iter()
+        .filter(|m| matches!(m.role, MessageRole::Assistant) && !m.tool_calls.is_empty())
+        .collect();
+    assert_eq!(assistant_with_tools.len(), 2);
+    for msg in assistant_with_tools {
+        assert!(
+            !msg.content.is_empty(),
+            "Assistant messages should have text content alongside tool calls"
+        );
+    }
+}
--- a/crates/g3-core/tests/end_of_turn_behavior_test.rs
+++ b/crates/g3-core/tests/end_of_turn_behavior_test.rs
@@ -122,6 +122,7 @@ mod duplicate_detection {
        ToolCall {
            tool: tool.to_string(),
            args,
+            id: String::new(),
        }
    }

--- a/crates/g3-core/tests/stream_completion_characterization_test.rs
+++ b/crates/g3-core/tests/stream_completion_characterization_test.rs
@@ -328,6 +328,7 @@ mod duplicate_detection_characterization {
        ToolCall {
            tool: tool.to_string(),
            args: serde_json::from_str(args).unwrap(),
+            id: String::new(),
        }
    }

@@ -525,6 +526,7 @@ mod tool_execution_integration {
        // Execute a tool directly
        let tool_call = ToolCall {
            tool: "read_file".to_string(),
+            id: String::new(),
            args: serde_json::json!({ "file_path": test_file.to_string_lossy() }),
        };

@@ -547,6 +549,7 @@ mod tool_execution_integration {

        let tool_call = ToolCall {
            tool: "shell".to_string(),
+            id: String::new(),
            args: serde_json::json!({ "command": "echo 'test output'" }),
        };

@@ -570,6 +573,7 @@ mod tool_execution_integration {

        let tool_call = ToolCall {
            tool: "write_file".to_string(),
+            id: String::new(),
            args: serde_json::json!({
                "file_path": new_file.to_string_lossy(),
                "content": "New content"
@@ -602,6 +606,7 @@ mod tool_execution_integration {
        // Write Plan
        let write_call = ToolCall {
            tool: "plan_write".to_string(),
+            id: String::new(),
            args: serde_json::json!({
                "plan": r#"plan_id: test-plan
 revision: 1
@@ -634,6 +639,7 @@ items:
        // Read Plan
        let read_call = ToolCall {
            tool: "plan_read".to_string(),
+            id: String::new(),
            args: serde_json::json!({}),
        };
        let read_result = agent.execute_tool(&read_call).await.unwrap();
--- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs
+++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs
@@ -37,6 +37,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
    ToolCall {
        tool: tool.to_string(),
        args,
+        id: String::new(),
    }
 }

--- a/crates/g3-core/tests/tool_execution_test.rs
+++ b/crates/g3-core/tests/tool_execution_test.rs
@@ -29,6 +29,7 @@ fn make_tool_call(tool: &str, args: serde_json::Value) -> ToolCall {
    ToolCall {
        tool: tool.to_string(),
        args,
+        id: String::new(),
    }
 }