The buffer truncation code was slicing at a raw byte offset which could land in the middle of a multi-byte character (like emojis), causing a panic. Fixed by using char_indices() to find valid character boundaries. Also added stop_reason field to CompletionChunk initializers in tests to complete the stop_reason feature addition. - Fix byte boundary panic in filter_json.rs line 327 - Add test for multi-byte character handling - Update test files with missing stop_reason field
233 lines
8.9 KiB
Rust
233 lines
8.9 KiB
Rust
//! Tests for tool call duplicate detection
|
|
//!
|
|
//! These tests ensure that duplicate detection only catches IMMEDIATELY SEQUENTIAL
|
|
//! duplicates, not legitimate re-use of tools with text between them.
|
|
|
|
use g3_core::StreamingToolParser;
|
|
use g3_providers::CompletionChunk;
|
|
|
|
// Helper to create a chunk
|
|
fn chunk(content: &str, finished: bool) -> CompletionChunk {
|
|
CompletionChunk {
|
|
content: content.to_string(),
|
|
finished,
|
|
tool_calls: None,
|
|
usage: None,
|
|
stop_reason: None,
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: find_complete_json_object_end helper function
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_find_complete_json_object_end_simple() {
|
|
let json = r#"{"tool": "test", "args": {}}"#;
|
|
let end = StreamingToolParser::find_complete_json_object_end(json);
|
|
assert!(end.is_some(), "Should find end of complete JSON");
|
|
assert_eq!(end.unwrap(), json.len() - 1, "End should be at last character");
|
|
}
|
|
|
|
#[test]
|
|
fn test_find_complete_json_object_end_nested() {
|
|
let json = r#"{"tool": "test", "args": {"nested": {"deep": true}}}"#;
|
|
let end = StreamingToolParser::find_complete_json_object_end(json);
|
|
assert!(end.is_some(), "Should find end of nested JSON");
|
|
assert_eq!(end.unwrap(), json.len() - 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_find_complete_json_object_end_with_trailing_text() {
|
|
let json = r#"{"tool": "test", "args": {}} some text after"#;
|
|
let end = StreamingToolParser::find_complete_json_object_end(json);
|
|
assert!(end.is_some(), "Should find end of JSON even with trailing text");
|
|
// The end should be at the closing brace, not at the end of the string
|
|
let end_pos = end.unwrap();
|
|
assert_eq!(&json[end_pos..end_pos+1], "}", "End should be at closing brace");
|
|
}
|
|
|
|
#[test]
|
|
fn test_find_complete_json_object_end_incomplete() {
|
|
let json = r#"{"tool": "test", "args": {"#;
|
|
let end = StreamingToolParser::find_complete_json_object_end(json);
|
|
assert!(end.is_none(), "Should return None for incomplete JSON");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Tool calls separated by text should NOT be duplicates
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_same_tool_with_text_between_not_duplicate() {
|
|
// This tests the scenario where the LLM calls the same tool twice
|
|
// but with explanatory text between them - this should NOT be a duplicate
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// First tool call
|
|
let content1 = r#"{"tool": "todo_read", "args": {}}"#;
|
|
let tools1 = parser.process_chunk(&chunk(content1, true));
|
|
assert_eq!(tools1.len(), 1, "First tool call should be detected");
|
|
assert_eq!(tools1[0].tool, "todo_read");
|
|
|
|
// Reset parser (simulating what happens after tool execution)
|
|
parser.reset();
|
|
|
|
// Some text, then the same tool call again
|
|
let content2 = r#"Now let me check the TODO again to verify my changes.
|
|
{"tool": "todo_read", "args": {}}"#;
|
|
let tools2 = parser.process_chunk(&chunk(content2, true));
|
|
|
|
// The second tool call should be detected - it's NOT a duplicate
|
|
// because there's text before it
|
|
assert_eq!(tools2.len(), 1, "Second tool call should be detected (not a duplicate)");
|
|
assert_eq!(tools2[0].tool, "todo_read");
|
|
}
|
|
|
|
#[test]
|
|
fn test_different_tools_back_to_back_not_duplicate() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Two different tool calls back to back
|
|
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
|
{"tool": "shell", "args": {"command": "ls"}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, true));
|
|
|
|
// Both should be detected - they're different tools
|
|
assert!(tools.len() >= 1, "Should detect tool calls");
|
|
// At minimum, the first one should be detected
|
|
assert_eq!(tools[0].tool, "read_file");
|
|
}
|
|
|
|
#[test]
|
|
fn test_same_tool_different_args_not_duplicate() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Same tool but different arguments - NOT a duplicate
|
|
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
|
{"tool": "read_file", "args": {"file_path": "b.txt"}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, true));
|
|
|
|
// Both should be detected - different args means not a duplicate
|
|
assert!(tools.len() >= 1, "Should detect tool calls");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Immediately sequential identical tool calls ARE duplicates
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_identical_tool_calls_back_to_back_are_duplicates() {
|
|
// This tests the scenario where the LLM stutters and outputs
|
|
// the exact same tool call twice in a row - this IS a duplicate
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Two identical tool calls with no text between them
|
|
let content = r#"{"tool": "todo_read", "args": {}}
|
|
{"tool": "todo_read", "args": {}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, true));
|
|
|
|
// The parser should detect both, but the deduplication logic
|
|
// (which happens at a higher level in the agent) should mark
|
|
// the second one as a duplicate
|
|
// Here we just verify both are parsed
|
|
assert!(tools.len() >= 1, "Should detect at least one tool call");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Text content detection for duplicate logic
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_has_text_after_tool_call() {
|
|
// Helper test to verify we can detect text after a tool call
|
|
let content_with_text = r#"{"tool": "test", "args": {}} Some text after"#;
|
|
let content_without_text = r#"{"tool": "test", "args": {}}"#;
|
|
let content_with_whitespace_only = r#"{"tool": "test", "args": {}}
|
|
"#;
|
|
|
|
// Find the end of the JSON in each case
|
|
let end1 = StreamingToolParser::find_complete_json_object_end(content_with_text).unwrap();
|
|
let end2 = StreamingToolParser::find_complete_json_object_end(content_without_text).unwrap();
|
|
let end3 = StreamingToolParser::find_complete_json_object_end(content_with_whitespace_only).unwrap();
|
|
|
|
// Check what's after the JSON
|
|
let after1 = content_with_text[end1 + 1..].trim();
|
|
let after2 = content_without_text.get(end2 + 1..).unwrap_or("").trim();
|
|
let after3 = content_with_whitespace_only[end3 + 1..].trim();
|
|
|
|
assert!(!after1.is_empty(), "Should have text after tool call");
|
|
assert!(after2.is_empty(), "Should have no text after tool call");
|
|
assert!(after3.is_empty(), "Whitespace-only should count as no text");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Edge cases
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_tool_call_with_newlines_between() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Tool calls separated by multiple newlines (but no actual text)
|
|
// This SHOULD be considered a duplicate since there's no meaningful text
|
|
let content = r#"{"tool": "todo_read", "args": {}}
|
|
|
|
|
|
{"tool": "todo_read", "args": {}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, true));
|
|
assert!(tools.len() >= 1, "Should detect at least one tool call");
|
|
}
|
|
|
|
#[test]
|
|
fn test_tool_call_with_whitespace_text_between() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Tool calls separated by text that's just whitespace and punctuation
|
|
// The key is whether there's "meaningful" text
|
|
let content = r#"{"tool": "todo_read", "args": {}}
|
|
OK, now again:
|
|
{"tool": "todo_read", "args": {}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, true));
|
|
|
|
// Both should be detected since there's text between them
|
|
assert!(tools.len() >= 1, "Should detect tool calls");
|
|
}
|
|
|
|
#[test]
|
|
fn test_tool_call_in_middle_of_text() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Tool call surrounded by text
|
|
let content = r#"Let me read the file first.
|
|
{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
|
Now I'll analyze the contents."#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, true));
|
|
assert_eq!(tools.len(), 1, "Should detect the tool call");
|
|
assert_eq!(tools[0].tool, "read_file");
|
|
}
|
|
|
|
#[test]
|
|
fn test_multiple_different_tool_calls_with_text() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Multiple different tool calls with text between each
|
|
let content = r#"First, let me read the file:
|
|
{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
|
Now let me check the TODO:
|
|
{"tool": "todo_read", "args": {}}
|
|
Finally, let me run a command:
|
|
{"tool": "shell", "args": {"command": "ls"}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, true));
|
|
|
|
// All three should be detected
|
|
assert!(tools.len() >= 1, "Should detect tool calls");
|
|
}
|