diff --git a/crates/g3-core/tests/streaming_parser_stuttering_test.rs b/crates/g3-core/tests/streaming_parser_stuttering_test.rs new file mode 100644 index 0000000..4e3fc8a --- /dev/null +++ b/crates/g3-core/tests/streaming_parser_stuttering_test.rs @@ -0,0 +1,240 @@ +//! Integration tests for streaming parser stuttering bug fix (fa3c920) +//! +//! BEHAVIOR PROTECTED: +//! When an LLM "stutters" and emits incomplete tool call fragments followed by +//! complete tool calls, the parser should: +//! 1. Not get stuck waiting for the incomplete fragment to complete +//! 2. Successfully parse complete tool calls that appear after the fragment +//! +//! SURFACE TARGETED: +//! StreamingToolParser - the public API for processing streaming chunks +//! +//! INTENTIONALLY NOT ASSERTED: +//! - Internal parser state transitions +//! - Specific invalidation mechanism details +//! - Order of internal operations +//! - Behavior of patterns that don't match the actual bug scenario + +use g3_core::StreamingToolParser; +use g3_providers::CompletionChunk; + +/// Helper to create a completion chunk +fn chunk(content: &str, finished: bool) -> CompletionChunk { + CompletionChunk { + content: content.to_string(), + finished, + tool_calls: None, + usage: None, + stop_reason: None, + tool_call_streaming: None, + } +} + +// ============================================================================= +// CHARACTERIZATION: The exact stuttering pattern from the bug report +// ============================================================================= + +/// Test the exact pattern observed in butler session butler_c6ab59af2e4f991c +/// where the LLM emitted: complete -> incomplete fragment -> complete +/// +/// This is the critical bug fix test - before the fix, the parser would get +/// stuck on the incomplete fragment and return zero tool calls. +#[test] +fn test_stuttering_complete_incomplete_complete() { + let mut parser = StreamingToolParser::new(); + + // This is the exact pattern that caused the bug: + // 1. Complete tool call + // 2. Incomplete fragment (just {"tool":) + // 3. Complete tool call again + let content = r#"{"tool": "shell", "args": {"command": "ls"}} + +{"tool": + +{"tool": "shell", "args": {"command": "pwd"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // CRITICAL: We must get at least one valid tool call + // Before the fix, the parser would get stuck on the incomplete fragment + // and return zero tool calls + assert!( + !tools.is_empty(), + "Parser must not get stuck on incomplete fragment. Expected tool calls, got none." + ); + + // Verify we got valid tool calls (at least one should be "shell") + assert!( + tools.iter().any(|t| t.tool == "shell"), + "Expected at least one 'shell' tool call" + ); +} + +/// Verify the parser finds at least one complete tool call even with stuttering +#[test] +fn test_stuttering_finds_at_least_one_complete_call() { + let mut parser = StreamingToolParser::new(); + + // Complete -> incomplete -> complete with different commands + let content = r#"{"tool": "shell", "args": {"command": "first"}} + +{"tool": + +{"tool": "shell", "args": {"command": "second"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // CHARACTERIZATION: The parser finds at least one complete tool call. + // The exact number depends on implementation details (streaming vs batch parsing). + // The critical behavior is that it doesn't return zero (the original bug). + assert!( + !tools.is_empty(), + "Expected at least 1 tool call, got none" + ); +} + +// ============================================================================= +// CHARACTERIZATION: Edge cases that should NOT trigger invalidation +// ============================================================================= + +/// Tool call patterns inside JSON strings should not cause invalidation +#[test] +fn test_tool_pattern_in_string_value_not_invalidated() { + let mut parser = StreamingToolParser::new(); + + // Writing example code that contains a tool call pattern + let content = r#"{"tool": "write_file", "args": {"file_path": "example.md", "content": "Example:\n{\"tool\": \"shell\"}"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // Should parse the outer tool call correctly + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].tool, "write_file"); + // The inner pattern should be part of the content, not a separate tool call + assert!(tools[0].args["content"] + .as_str() + .unwrap() + .contains("{\"tool\"")); +} + +/// Nested JSON objects should not trigger false invalidation +#[test] +fn test_nested_json_not_invalidated() { + let mut parser = StreamingToolParser::new(); + + // Tool call with nested JSON in args + let content = r#"{"tool": "shell", "args": {"command": "echo '{\"nested\": true}'"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].tool, "shell"); +} + +// ============================================================================= +// CHARACTERIZATION: Recovery behavior +// ============================================================================= + +/// Parser should work correctly after reset +#[test] +fn test_parser_reset_clears_state() { + let mut parser = StreamingToolParser::new(); + + // First: process content with stuttering + let content1 = r#"{"tool": "shell", "args": {"command": "ls"}} + +{"tool": + +{"tool": "shell", "args": {"command": "pwd"}}"#; + let _tools1 = parser.process_chunk(&chunk(content1, true)); + + // Reset for new message + parser.reset(); + + // Second message should work normally + let content2 = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#; + let tools2 = parser.process_chunk(&chunk(content2, true)); + + assert_eq!(tools2.len(), 1); + assert_eq!(tools2[0].tool, "read_file"); +} + +/// Incomplete tool call detection works +#[test] +fn test_incomplete_detection() { + let mut parser = StreamingToolParser::new(); + + // Incomplete fragment + parser.process_chunk(&chunk("{\"tool\":", false)); + assert!( + parser.has_incomplete_tool_call(), + "Should detect incomplete tool call" + ); +} + +// ============================================================================= +// CHARACTERIZATION: Multiple complete tool calls (no stuttering) +// ============================================================================= + +/// Multiple complete tool calls should all be found +#[test] +fn test_multiple_complete_tool_calls() { + let mut parser = StreamingToolParser::new(); + + let content = r#"{"tool": "shell", "args": {"command": "ls"}} + +{"tool": "read_file", "args": {"file_path": "test.txt"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + assert_eq!(tools.len(), 2, "Should find both tool calls"); + assert_eq!(tools[0].tool, "shell"); + assert_eq!(tools[1].tool, "read_file"); +} + +// ============================================================================= +// CHARACTERIZATION: Boundary conditions +// ============================================================================= + +/// Minimal stutter pattern with complete call first +#[test] +fn test_minimal_stutter_with_complete_first() { + let mut parser = StreamingToolParser::new(); + + // Complete call, then incomplete, then complete + let content = r#"{"tool": "shell", "args": {}} +{"tool": +{"tool": "shell", "args": {}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + assert!(!tools.is_empty(), "Should find at least one complete tool call"); +} + +/// Stutter at chunk boundary - incomplete in one chunk, complete in next +#[test] +fn test_stutter_split_across_chunk_boundary() { + let mut parser = StreamingToolParser::new(); + + // First chunk: complete tool call + let tools1 = parser.process_chunk(&chunk( + r#"{"tool": "shell", "args": {"command": "ls"}}"#, + false, + )); + assert_eq!(tools1.len(), 1, "First complete tool call should be detected"); + + // Mark as consumed + parser.mark_tool_calls_consumed(); + + // Second chunk: incomplete fragment + parser.process_chunk(&chunk("\n{\"tool\":", false)); + + // Third chunk: new complete tool call (finished) + let tools3 = parser.process_chunk(&chunk( + "\n{\"tool\": \"read_file\", \"args\": {\"file_path\": \"test.txt\"}}", + true, + )); + + // Should find the complete tool call at stream end + assert!(!tools3.is_empty(), "Should find complete tool call at stream end"); +}