The buffer truncation code was slicing at a raw byte offset which could land in the middle of a multi-byte character (like emojis), causing a panic. Fixed by using char_indices() to find valid character boundaries. Also added stop_reason field to CompletionChunk initializers in tests to complete the stop_reason feature addition. - Fix byte boundary panic in filter_json.rs line 327 - Add test for multi-byte character handling - Update test files with missing stop_reason field
547 lines
17 KiB
Rust
547 lines
17 KiB
Rust
//! Comprehensive tests for StreamingToolParser
|
|
//!
|
|
//! Tests cover:
|
|
//! - Multiple tool calls in one response
|
|
//! - Tool call followed by text
|
|
//! - Incomplete tool calls at various truncation points
|
|
//! - Parser reset behavior
|
|
//! - Buffer management
|
|
|
|
use g3_core::StreamingToolParser;
|
|
use g3_providers::CompletionChunk;
|
|
|
|
// Helper to create a chunk
|
|
fn chunk(content: &str, finished: bool) -> CompletionChunk {
|
|
CompletionChunk {
|
|
content: content.to_string(),
|
|
finished,
|
|
tool_calls: None,
|
|
usage: None,
|
|
stop_reason: None,
|
|
}
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Multiple tool calls in one response
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_multiple_tool_calls_in_single_chunk() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Two complete tool calls in one chunk
|
|
let content = r#"Let me do two things:
|
|
{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
|
Now the second:
|
|
{"tool": "shell", "args": {"command": "ls"}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
// Should detect at least one tool call
|
|
// Note: Current implementation may only return the first one found
|
|
assert!(!tools.is_empty(), "Should detect at least one tool call");
|
|
}
|
|
|
|
#[test]
|
|
fn test_multiple_tool_calls_across_chunks() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// First tool call
|
|
let tools1 = parser.process_chunk(&chunk(
|
|
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
|
false
|
|
));
|
|
assert_eq!(tools1.len(), 1, "First tool call should be detected");
|
|
assert_eq!(tools1[0].tool, "read_file");
|
|
|
|
// Reset parser (simulating what happens after tool execution)
|
|
parser.reset();
|
|
|
|
// Second tool call
|
|
let tools2 = parser.process_chunk(&chunk(
|
|
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
|
|
false
|
|
));
|
|
assert_eq!(tools2.len(), 1, "Second tool call should be detected");
|
|
assert_eq!(tools2[0].tool, "shell");
|
|
}
|
|
|
|
#[test]
|
|
fn test_first_complete_second_incomplete() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// First complete, second incomplete
|
|
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
|
{"tool": "shell", "args": {"command": "ls"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
// Should detect the first complete tool call
|
|
// The incomplete one should be detected by has_incomplete_tool_call
|
|
assert!(parser.has_incomplete_tool_call(), "Should detect incomplete tool call");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Tool call followed by text
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_tool_call_with_trailing_text() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
let content = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
|
|
|
Here is the content of the file..."#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
assert_eq!(tools.len(), 1);
|
|
assert_eq!(tools[0].tool, "read_file");
|
|
|
|
// The trailing text should be in the buffer
|
|
let text = parser.get_text_content();
|
|
assert!(text.contains("Here is the content"), "Trailing text should be preserved");
|
|
}
|
|
|
|
#[test]
|
|
fn test_text_before_tool_call() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
let content = r#"Let me read that file for you.
|
|
|
|
{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
assert_eq!(tools.len(), 1);
|
|
assert_eq!(tools[0].tool, "read_file");
|
|
|
|
// The leading text should be in the buffer
|
|
let text = parser.get_text_content();
|
|
assert!(text.contains("Let me read"), "Leading text should be preserved");
|
|
}
|
|
|
|
#[test]
|
|
fn test_text_before_and_after_tool_call() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
let content = r#"I'll check the file.
|
|
|
|
{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
|
|
|
Done checking."#;
|
|
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
assert_eq!(tools.len(), 1);
|
|
|
|
let text = parser.get_text_content();
|
|
assert!(text.contains("I'll check"), "Leading text should be preserved");
|
|
assert!(text.contains("Done checking"), "Trailing text should be preserved");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Incomplete tool calls at various truncation points
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_incomplete_after_tool_key() {
|
|
let mut parser = StreamingToolParser::new();
|
|
parser.process_chunk(&chunk(r#"{"tool":"#, false));
|
|
assert!(parser.has_incomplete_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_incomplete_after_tool_name() {
|
|
let mut parser = StreamingToolParser::new();
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file""#, false));
|
|
assert!(parser.has_incomplete_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_incomplete_after_args_key() {
|
|
let mut parser = StreamingToolParser::new();
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args":"#, false));
|
|
assert!(parser.has_incomplete_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_incomplete_mid_args_object() {
|
|
let mut parser = StreamingToolParser::new();
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path":"#, false));
|
|
assert!(parser.has_incomplete_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_incomplete_mid_string_value() {
|
|
let mut parser = StreamingToolParser::new();
|
|
parser.process_chunk(&chunk(r#"{"tool": "shell", "args": {"command": "ls -la /very/long/path"#, false));
|
|
assert!(parser.has_incomplete_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_incomplete_missing_final_brace() {
|
|
let mut parser = StreamingToolParser::new();
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}"#, false));
|
|
assert!(parser.has_incomplete_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_complete_tool_call_not_incomplete() {
|
|
let mut parser = StreamingToolParser::new();
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
|
|
assert!(!parser.has_incomplete_tool_call(), "Complete tool call should not be marked incomplete");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Parser reset behavior
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_reset_clears_buffer() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
parser.process_chunk(&chunk("Some content here", false));
|
|
assert!(!parser.get_text_content().is_empty());
|
|
|
|
parser.reset();
|
|
|
|
assert!(parser.get_text_content().is_empty(), "Buffer should be empty after reset");
|
|
}
|
|
|
|
#[test]
|
|
fn test_reset_clears_incomplete_state() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Create incomplete tool call
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"#, false));
|
|
assert!(parser.has_incomplete_tool_call());
|
|
|
|
parser.reset();
|
|
|
|
assert!(!parser.has_incomplete_tool_call(), "Incomplete state should be cleared after reset");
|
|
}
|
|
|
|
#[test]
|
|
fn test_reset_clears_unexecuted_state() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Create complete but "unexecuted" tool call
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
|
|
assert!(parser.has_unexecuted_tool_call());
|
|
|
|
parser.reset();
|
|
|
|
assert!(!parser.has_unexecuted_tool_call(), "Unexecuted state should be cleared after reset");
|
|
}
|
|
|
|
#[test]
|
|
fn test_reset_allows_new_tool_calls() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// First tool call
|
|
let tools1 = parser.process_chunk(&chunk(
|
|
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
|
false
|
|
));
|
|
assert_eq!(tools1.len(), 1);
|
|
|
|
parser.reset();
|
|
|
|
// Second tool call after reset
|
|
let tools2 = parser.process_chunk(&chunk(
|
|
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
|
|
false
|
|
));
|
|
assert_eq!(tools2.len(), 1);
|
|
assert_eq!(tools2[0].tool, "shell");
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Buffer management and edge cases
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_streaming_chunks_accumulate() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Stream in chunks
|
|
parser.process_chunk(&chunk(r#"{"tool": "#, false));
|
|
parser.process_chunk(&chunk(r#""read_file", "#, false));
|
|
parser.process_chunk(&chunk(r#""args": {"file_path": "#, false));
|
|
parser.process_chunk(&chunk(r#""test.txt"}}"#, false));
|
|
|
|
// Should have accumulated the complete tool call
|
|
let text = parser.get_text_content();
|
|
assert!(text.contains(r#""tool""#));
|
|
assert!(text.contains(r#""read_file""#));
|
|
}
|
|
|
|
#[test]
|
|
fn test_finished_chunk_triggers_final_parse() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Incomplete chunks
|
|
parser.process_chunk(&chunk(r#"{"tool": "read_file", "#, false));
|
|
let tools1 = parser.process_chunk(&chunk(r#""args": {"file_path": "test.txt"}}"#, false));
|
|
|
|
// Tool should be detected before finished
|
|
assert!(!tools1.is_empty() || !parser.has_unexecuted_tool_call(),
|
|
"Tool should be detected during streaming or marked as unexecuted");
|
|
}
|
|
|
|
#[test]
|
|
fn test_empty_chunks_ignored() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
parser.process_chunk(&chunk("", false));
|
|
parser.process_chunk(&chunk("", false));
|
|
|
|
assert!(parser.get_text_content().is_empty());
|
|
assert!(!parser.has_incomplete_tool_call());
|
|
assert!(!parser.has_unexecuted_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_whitespace_only_chunks() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
parser.process_chunk(&chunk(" \n\t ", false));
|
|
|
|
assert!(!parser.has_incomplete_tool_call());
|
|
assert!(!parser.has_unexecuted_tool_call());
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_with_escaped_quotes() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
let content = r#"{"tool": "shell", "args": {"command": "echo \"hello\""}}"#;
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
assert_eq!(tools.len(), 1);
|
|
assert_eq!(tools[0].tool, "shell");
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_with_escaped_backslashes() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
let content = r#"{"tool": "write_file", "args": {"file_path": "C:\\Users\\test.txt", "content": "data"}}"#;
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
assert_eq!(tools.len(), 1);
|
|
assert_eq!(tools[0].tool, "write_file");
|
|
}
|
|
|
|
#[test]
|
|
fn test_json_with_nested_braces_in_string() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
let content = r#"{"tool": "write_file", "args": {"content": "{\"nested\": {\"json\": true}}"}}"#;
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
assert_eq!(tools.len(), 1);
|
|
assert_eq!(tools[0].tool, "write_file");
|
|
}
|
|
|
|
#[test]
|
|
fn test_text_buffer_length_tracking() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
parser.process_chunk(&chunk("Hello", false));
|
|
assert_eq!(parser.text_buffer_len(), 5);
|
|
|
|
parser.process_chunk(&chunk(" World", false));
|
|
assert_eq!(parser.text_buffer_len(), 11);
|
|
|
|
parser.reset();
|
|
assert_eq!(parser.text_buffer_len(), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn test_message_stopped_flag() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
parser.process_chunk(&chunk("Hello", false));
|
|
assert!(!parser.is_message_stopped());
|
|
|
|
parser.process_chunk(&chunk(" World", true));
|
|
assert!(parser.is_message_stopped());
|
|
|
|
parser.reset();
|
|
assert!(!parser.is_message_stopped());
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Tool call pattern variations
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_tool_pattern_no_spaces() {
|
|
let mut parser = StreamingToolParser::new();
|
|
let tools = parser.process_chunk(&chunk(
|
|
r#"{"tool":"read_file","args":{"file_path":"test.txt"}}"#,
|
|
false
|
|
));
|
|
assert_eq!(tools.len(), 1);
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: mark_tool_calls_consumed functionality
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_mark_consumed_clears_unexecuted_state() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Add a complete tool call
|
|
parser.process_chunk(&chunk(
|
|
r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#,
|
|
false
|
|
));
|
|
|
|
// Should be detected as unexecuted
|
|
assert!(parser.has_unexecuted_tool_call());
|
|
|
|
// Mark as consumed
|
|
parser.mark_tool_calls_consumed();
|
|
|
|
// Should no longer be detected as unexecuted
|
|
assert!(!parser.has_unexecuted_tool_call(),
|
|
"After marking consumed, has_unexecuted_tool_call should return false");
|
|
}
|
|
|
|
#[test]
|
|
fn test_mark_consumed_allows_new_tool_detection() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// First tool call
|
|
parser.process_chunk(&chunk(
|
|
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
|
false
|
|
));
|
|
parser.mark_tool_calls_consumed();
|
|
|
|
// Second tool call (without reset)
|
|
parser.process_chunk(&chunk(
|
|
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
|
|
false
|
|
));
|
|
|
|
// Should detect the new unexecuted tool call
|
|
assert!(parser.has_unexecuted_tool_call(),
|
|
"New tool call after consumed position should be detected");
|
|
}
|
|
|
|
#[test]
|
|
fn test_bare_brace_not_incomplete() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Just a bare opening brace - not a tool call pattern
|
|
parser.process_chunk(&chunk(r#"{""#, false));
|
|
|
|
// Should NOT be detected as incomplete because it doesn't match tool patterns
|
|
assert!(!parser.has_incomplete_tool_call(),
|
|
"Bare {{ should not be detected as incomplete tool call");
|
|
}
|
|
|
|
#[test]
|
|
fn test_duplicate_tool_call_pattern() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Simulate the problematic pattern: tool call, garbage, duplicate tool call
|
|
let content = concat!(
|
|
r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#,
|
|
"\n\n{\"\n\n",
|
|
r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#
|
|
);
|
|
let tools = parser.process_chunk(&chunk(content, false));
|
|
|
|
// Should detect at least one tool call
|
|
assert!(!tools.is_empty(), "Should detect at least one tool call");
|
|
|
|
// After processing, there should be an unexecuted tool call (the duplicate)
|
|
// because the parser only returns the first one it finds during streaming
|
|
assert!(parser.has_unexecuted_tool_call(),
|
|
"Should detect the duplicate as unexecuted");
|
|
}
|
|
|
|
#[test]
|
|
fn test_multiple_tool_calls_returned_on_finish() {
|
|
let mut parser = StreamingToolParser::new();
|
|
|
|
// Two complete tool calls in one chunk, with finished=true
|
|
let content = concat!(
|
|
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
|
"\nSome text\n",
|
|
r#"{"tool": "shell", "args": {"command": "ls"}}"#
|
|
);
|
|
|
|
// First, add content without finishing
|
|
parser.process_chunk(&chunk(content, false));
|
|
|
|
// Now finish the stream - should return ALL tool calls
|
|
let tools = parser.process_chunk(&chunk("", true));
|
|
|
|
// Should return both tool calls
|
|
assert_eq!(tools.len(), 2, "Should return both tool calls when stream finishes");
|
|
assert_eq!(tools[0].tool, "read_file");
|
|
assert_eq!(tools[1].tool, "shell");
|
|
}
|
|
|
|
#[test]
|
|
fn test_tool_pattern_extra_spaces() {
|
|
let mut parser = StreamingToolParser::new();
|
|
let tools = parser.process_chunk(&chunk(
|
|
r#"{ "tool" : "read_file" , "args" : { "file_path" : "test.txt" } }"#,
|
|
false
|
|
));
|
|
assert_eq!(tools.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_tool_pattern_with_newlines() {
|
|
let mut parser = StreamingToolParser::new();
|
|
// Note: The parser looks for specific patterns like {"tool": or { "tool":
|
|
// Multi-line JSON with newlines between { and "tool" won't match
|
|
// This is expected behavior - the pattern matching is intentionally strict
|
|
let _tools = parser.process_chunk(&chunk(
|
|
r#"{
|
|
"tool": "read_file",
|
|
"args": {
|
|
"file_path": "test.txt"
|
|
}
|
|
}"#,
|
|
false
|
|
));
|
|
// This won't be detected as a tool call due to newline after {
|
|
// The has_unexecuted_tool_call check also won't find it
|
|
// This is a known limitation of the pattern-based detection
|
|
}
|
|
|
|
// =============================================================================
|
|
// Test: Edge cases for has_message_like_keys validation
|
|
// =============================================================================
|
|
|
|
#[test]
|
|
fn test_normal_args_accepted() {
|
|
let mut parser = StreamingToolParser::new();
|
|
let tools = parser.process_chunk(&chunk(
|
|
r#"{"tool": "read_file", "args": {"file_path": "test.txt", "start": 0, "end": 100}}"#,
|
|
false
|
|
));
|
|
assert_eq!(tools.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_content_with_phrases_in_value_accepted() {
|
|
let mut parser = StreamingToolParser::new();
|
|
// Phrases like "I'll" in VALUES should be fine (only keys are checked)
|
|
let tools = parser.process_chunk(&chunk(
|
|
r#"{"tool": "write_file", "args": {"file_path": "test.txt", "content": "I'll help you with that. Let me explain."}}"#,
|
|
false
|
|
));
|
|
assert_eq!(tools.len(), 1);
|
|
}
|