Fix multiple tool call handling and improve auto-continue logic

- Add last_consumed_position tracking to StreamingToolParser to prevent re-detecting already-executed tool calls - Add mark_tool_calls_consumed() method to mark tool calls as processed - Add find_first_tool_call_start() for forward scanning of tool patterns - Replace try_parse_json_tool_call_from_buffer() with try_parse_all_json_tool_calls_from_buffer() to find ALL tool calls - Update has_incomplete_tool_call() and has_unexecuted_tool_call() to only check unconsumed portion of buffer - Fix tool execution loop to not reset parser when unexecuted tools remain - Simplify should_auto_continue logic (remove redundant condition) - Add comprehensive tests for auto-continue condition logic
2025-12-22 16:08:57 +11:00
parent a755301cf9
commit 8070147a0c
2 changed files with 235 additions and 41 deletions
--- a/crates/g3-core/tests/auto_continue_test.rs
+++ b/crates/g3-core/tests/auto_continue_test.rs
@@ -111,3 +111,124 @@ fn test_max_auto_summary_attempts_is_reasonable() {
    assert!(CURRENT_VALUE <= EXPECTED_MAX_ATTEMPTS,
        "MAX_AUTO_SUMMARY_ATTEMPTS should not exceed {} to avoid infinite loops", EXPECTED_MAX_ATTEMPTS);
 }
+
+// =============================================================================
+// Test: Auto-continue condition logic
+// =============================================================================
+
+/// Simulates the should_auto_continue logic from lib.rs
+fn should_auto_continue(
+    any_tool_executed: bool,
+    final_output_called: bool,
+    has_incomplete_tool_call: bool,
+    has_unexecuted_tool_call: bool,
+    is_empty_response: bool,
+) -> bool {
+    (any_tool_executed && !final_output_called)
+        || has_incomplete_tool_call
+        || has_unexecuted_tool_call
+        || (any_tool_executed && is_empty_response)
+}
+
+#[test]
+fn test_auto_continue_after_tool_no_final_output() {
+    // Tool executed but no final_output - should continue
+    assert!(should_auto_continue(
+        true,  // any_tool_executed
+        false, // final_output_called
+        false, // has_incomplete_tool_call
+        false, // has_unexecuted_tool_call
+        false, // is_empty_response
+    ));
+}
+
+#[test]
+fn test_auto_continue_with_final_output() {
+    // Tool executed AND final_output called - should NOT continue
+    assert!(!should_auto_continue(
+        true,  // any_tool_executed
+        true,  // final_output_called
+        false, // has_incomplete_tool_call
+        false, // has_unexecuted_tool_call
+        false, // is_empty_response
+    ));
+}
+
+#[test]
+fn test_auto_continue_incomplete_tool_call() {
+    // Incomplete tool call - should continue regardless of other flags
+    assert!(should_auto_continue(
+        false, // any_tool_executed
+        false, // final_output_called
+        true,  // has_incomplete_tool_call
+        false, // has_unexecuted_tool_call
+        false, // is_empty_response
+    ));
+}
+
+#[test]
+fn test_auto_continue_unexecuted_tool_call() {
+    // Unexecuted tool call - should continue
+    assert!(should_auto_continue(
+        false, // any_tool_executed
+        false, // final_output_called
+        false, // has_incomplete_tool_call
+        true,  // has_unexecuted_tool_call
+        false, // is_empty_response
+    ));
+}
+
+#[test]
+fn test_auto_continue_empty_response_after_tool() {
+    // Empty response after tool execution - should continue
+    assert!(should_auto_continue(
+        true,  // any_tool_executed
+        false, // final_output_called
+        false, // has_incomplete_tool_call
+        false, // has_unexecuted_tool_call
+        true,  // is_empty_response
+    ));
+}
+
+#[test]
+fn test_auto_continue_empty_response_no_tool() {
+    // Empty response but no tool executed - should NOT continue
+    // (This is a normal case where LLM just didn't respond)
+    assert!(!should_auto_continue(
+        false, // any_tool_executed
+        false, // final_output_called
+        false, // has_incomplete_tool_call
+        false, // has_unexecuted_tool_call
+        true,  // is_empty_response
+    ));
+}
+
+#[test]
+fn test_auto_continue_no_conditions_met() {
+    // No tools, no incomplete calls, substantive response - should NOT continue
+    assert!(!should_auto_continue(
+        false, // any_tool_executed
+        false, // final_output_called
+        false, // has_incomplete_tool_call
+        false, // has_unexecuted_tool_call
+        false, // is_empty_response
+    ));
+}
+
+// =============================================================================
+// Test: Redundant condition detection
+// =============================================================================
+
+#[test]
+fn test_redundant_empty_response_condition() {
+    // This test documents that (any_tool_executed && is_empty_response) is redundant
+    // when (any_tool_executed && !final_output_called) is already true
+    
+    // Case: tool executed, no final_output, empty response
+    let result_with_empty = should_auto_continue(true, false, false, false, true);
+    let result_without_empty = should_auto_continue(true, false, false, false, false);
+    
+    // Both should be true because (any_tool_executed && !final_output_called) is true
+    assert_eq!(result_with_empty, result_without_empty, 
+        "The is_empty_response condition is redundant when any_tool_executed && !final_output_called");
+}