Fix multiple tool call handling and improve auto-continue logic
- Add last_consumed_position tracking to StreamingToolParser to prevent re-detecting already-executed tool calls - Add mark_tool_calls_consumed() method to mark tool calls as processed - Add find_first_tool_call_start() for forward scanning of tool patterns - Replace try_parse_json_tool_call_from_buffer() with try_parse_all_json_tool_calls_from_buffer() to find ALL tool calls - Update has_incomplete_tool_call() and has_unexecuted_tool_call() to only check unconsumed portion of buffer - Fix tool execution loop to not reset parser when unexecuted tools remain - Simplify should_auto_continue logic (remove redundant condition) - Add comprehensive tests for auto-continue condition logic
This commit is contained in:
@@ -111,3 +111,124 @@ fn test_max_auto_summary_attempts_is_reasonable() {
|
||||
assert!(CURRENT_VALUE <= EXPECTED_MAX_ATTEMPTS,
|
||||
"MAX_AUTO_SUMMARY_ATTEMPTS should not exceed {} to avoid infinite loops", EXPECTED_MAX_ATTEMPTS);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Auto-continue condition logic
|
||||
// =============================================================================
|
||||
|
||||
/// Simulates the should_auto_continue logic from lib.rs
|
||||
fn should_auto_continue(
|
||||
any_tool_executed: bool,
|
||||
final_output_called: bool,
|
||||
has_incomplete_tool_call: bool,
|
||||
has_unexecuted_tool_call: bool,
|
||||
is_empty_response: bool,
|
||||
) -> bool {
|
||||
(any_tool_executed && !final_output_called)
|
||||
|| has_incomplete_tool_call
|
||||
|| has_unexecuted_tool_call
|
||||
|| (any_tool_executed && is_empty_response)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_after_tool_no_final_output() {
|
||||
// Tool executed but no final_output - should continue
|
||||
assert!(should_auto_continue(
|
||||
true, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_with_final_output() {
|
||||
// Tool executed AND final_output called - should NOT continue
|
||||
assert!(!should_auto_continue(
|
||||
true, // any_tool_executed
|
||||
true, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_incomplete_tool_call() {
|
||||
// Incomplete tool call - should continue regardless of other flags
|
||||
assert!(should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
true, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_unexecuted_tool_call() {
|
||||
// Unexecuted tool call - should continue
|
||||
assert!(should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
true, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_empty_response_after_tool() {
|
||||
// Empty response after tool execution - should continue
|
||||
assert!(should_auto_continue(
|
||||
true, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
true, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_empty_response_no_tool() {
|
||||
// Empty response but no tool executed - should NOT continue
|
||||
// (This is a normal case where LLM just didn't respond)
|
||||
assert!(!should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
true, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_no_conditions_met() {
|
||||
// No tools, no incomplete calls, substantive response - should NOT continue
|
||||
assert!(!should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Redundant condition detection
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_redundant_empty_response_condition() {
|
||||
// This test documents that (any_tool_executed && is_empty_response) is redundant
|
||||
// when (any_tool_executed && !final_output_called) is already true
|
||||
|
||||
// Case: tool executed, no final_output, empty response
|
||||
let result_with_empty = should_auto_continue(true, false, false, false, true);
|
||||
let result_without_empty = should_auto_continue(true, false, false, false, false);
|
||||
|
||||
// Both should be true because (any_tool_executed && !final_output_called) is true
|
||||
assert_eq!(result_with_empty, result_without_empty,
|
||||
"The is_empty_response condition is redundant when any_tool_executed && !final_output_called");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user