From 58bbfde6f490503f6f230ed6f5223d223cc8f1a1 Mon Sep 17 00:00:00 2001
From: "Dhanji R. Prasanna" <d@wideplay.com>
Date: Fri, 30 Jan 2026 14:30:27 +1100
Subject: [PATCH] test: add integration tests for streaming parser stuttering
 bug fix

Add characterization tests for the streaming parser stuttering bug fix (fa3c920).
These tests verify that when an LLM "stutters" and emits incomplete tool call
fragments followed by complete tool calls, the parser:

1. Does not get stuck waiting for the incomplete fragment to complete
2. Successfully parses complete tool calls that appear after the fragment

Tests cover:
- The exact pattern from butler session butler_c6ab59af2e4f991c
- Edge cases that should NOT trigger invalidation (nested JSON, patterns in strings)
- Recovery behavior after reset
- Multiple complete tool calls
- Boundary conditions (chunk boundaries, minimal patterns)

Agent: hopper
---
 .../tests/streaming_parser_stuttering_test.rs | 240 ++++++++++++++++++
 1 file changed, 240 insertions(+)
 create mode 100644 crates/g3-core/tests/streaming_parser_stuttering_test.rs

diff --git a/crates/g3-core/tests/streaming_parser_stuttering_test.rs b/crates/g3-core/tests/streaming_parser_stuttering_test.rs
new file mode 100644
index 0000000..4e3fc8a
--- /dev/null
+++ b/crates/g3-core/tests/streaming_parser_stuttering_test.rs
@@ -0,0 +1,240 @@
+//! Integration tests for streaming parser stuttering bug fix (fa3c920)
+//!
+//! BEHAVIOR PROTECTED:
+//! When an LLM "stutters" and emits incomplete tool call fragments followed by
+//! complete tool calls, the parser should:
+//! 1. Not get stuck waiting for the incomplete fragment to complete
+//! 2. Successfully parse complete tool calls that appear after the fragment
+//!
+//! SURFACE TARGETED:
+//! StreamingToolParser - the public API for processing streaming chunks
+//!
+//! INTENTIONALLY NOT ASSERTED:
+//! - Internal parser state transitions
+//! - Specific invalidation mechanism details
+//! - Order of internal operations
+//! - Behavior of patterns that don't match the actual bug scenario
+
+use g3_core::StreamingToolParser;
+use g3_providers::CompletionChunk;
+
+/// Helper to create a completion chunk
+fn chunk(content: &str, finished: bool) -> CompletionChunk {
+    CompletionChunk {
+        content: content.to_string(),
+        finished,
+        tool_calls: None,
+        usage: None,
+        stop_reason: None,
+        tool_call_streaming: None,
+    }
+}
+
+// =============================================================================
+// CHARACTERIZATION: The exact stuttering pattern from the bug report
+// =============================================================================
+
+/// Test the exact pattern observed in butler session butler_c6ab59af2e4f991c
+/// where the LLM emitted: complete -> incomplete fragment -> complete
+///
+/// This is the critical bug fix test - before the fix, the parser would get
+/// stuck on the incomplete fragment and return zero tool calls.
+#[test]
+fn test_stuttering_complete_incomplete_complete() {
+    let mut parser = StreamingToolParser::new();
+
+    // This is the exact pattern that caused the bug:
+    // 1. Complete tool call
+    // 2. Incomplete fragment (just {"tool":)
+    // 3. Complete tool call again
+    let content = r#"{"tool": "shell", "args": {"command": "ls"}}
+
+{"tool":
+
+{"tool": "shell", "args": {"command": "pwd"}}"#;
+
+    let tools = parser.process_chunk(&chunk(content, true));
+
+    // CRITICAL: We must get at least one valid tool call
+    // Before the fix, the parser would get stuck on the incomplete fragment
+    // and return zero tool calls
+    assert!(
+        !tools.is_empty(),
+        "Parser must not get stuck on incomplete fragment. Expected tool calls, got none."
+    );
+
+    // Verify we got valid tool calls (at least one should be "shell")
+    assert!(
+        tools.iter().any(|t| t.tool == "shell"),
+        "Expected at least one 'shell' tool call"
+    );
+}
+
+/// Verify the parser finds at least one complete tool call even with stuttering
+#[test]
+fn test_stuttering_finds_at_least_one_complete_call() {
+    let mut parser = StreamingToolParser::new();
+
+    // Complete -> incomplete -> complete with different commands
+    let content = r#"{"tool": "shell", "args": {"command": "first"}}
+
+{"tool":
+
+{"tool": "shell", "args": {"command": "second"}}"#;
+
+    let tools = parser.process_chunk(&chunk(content, true));
+
+    // CHARACTERIZATION: The parser finds at least one complete tool call.
+    // The exact number depends on implementation details (streaming vs batch parsing).
+    // The critical behavior is that it doesn't return zero (the original bug).
+    assert!(
+        !tools.is_empty(),
+        "Expected at least 1 tool call, got none"
+    );
+}
+
+// =============================================================================
+// CHARACTERIZATION: Edge cases that should NOT trigger invalidation
+// =============================================================================
+
+/// Tool call patterns inside JSON strings should not cause invalidation
+#[test]
+fn test_tool_pattern_in_string_value_not_invalidated() {
+    let mut parser = StreamingToolParser::new();
+
+    // Writing example code that contains a tool call pattern
+    let content = r#"{"tool": "write_file", "args": {"file_path": "example.md", "content": "Example:\n{\"tool\": \"shell\"}"}}"#;
+
+    let tools = parser.process_chunk(&chunk(content, true));
+
+    // Should parse the outer tool call correctly
+    assert_eq!(tools.len(), 1);
+    assert_eq!(tools[0].tool, "write_file");
+    // The inner pattern should be part of the content, not a separate tool call
+    assert!(tools[0].args["content"]
+        .as_str()
+        .unwrap()
+        .contains("{\"tool\""));
+}
+
+/// Nested JSON objects should not trigger false invalidation
+#[test]
+fn test_nested_json_not_invalidated() {
+    let mut parser = StreamingToolParser::new();
+
+    // Tool call with nested JSON in args
+    let content = r#"{"tool": "shell", "args": {"command": "echo '{\"nested\": true}'"}}"#;
+
+    let tools = parser.process_chunk(&chunk(content, true));
+
+    assert_eq!(tools.len(), 1);
+    assert_eq!(tools[0].tool, "shell");
+}
+
+// =============================================================================
+// CHARACTERIZATION: Recovery behavior
+// =============================================================================
+
+/// Parser should work correctly after reset
+#[test]
+fn test_parser_reset_clears_state() {
+    let mut parser = StreamingToolParser::new();
+
+    // First: process content with stuttering
+    let content1 = r#"{"tool": "shell", "args": {"command": "ls"}}
+
+{"tool":
+
+{"tool": "shell", "args": {"command": "pwd"}}"#;
+    let _tools1 = parser.process_chunk(&chunk(content1, true));
+
+    // Reset for new message
+    parser.reset();
+
+    // Second message should work normally
+    let content2 = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
+    let tools2 = parser.process_chunk(&chunk(content2, true));
+
+    assert_eq!(tools2.len(), 1);
+    assert_eq!(tools2[0].tool, "read_file");
+}
+
+/// Incomplete tool call detection works
+#[test]
+fn test_incomplete_detection() {
+    let mut parser = StreamingToolParser::new();
+
+    // Incomplete fragment
+    parser.process_chunk(&chunk("{\"tool\":", false));
+    assert!(
+        parser.has_incomplete_tool_call(),
+        "Should detect incomplete tool call"
+    );
+}
+
+// =============================================================================
+// CHARACTERIZATION: Multiple complete tool calls (no stuttering)
+// =============================================================================
+
+/// Multiple complete tool calls should all be found
+#[test]
+fn test_multiple_complete_tool_calls() {
+    let mut parser = StreamingToolParser::new();
+
+    let content = r#"{"tool": "shell", "args": {"command": "ls"}}
+
+{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
+
+    let tools = parser.process_chunk(&chunk(content, true));
+
+    assert_eq!(tools.len(), 2, "Should find both tool calls");
+    assert_eq!(tools[0].tool, "shell");
+    assert_eq!(tools[1].tool, "read_file");
+}
+
+// =============================================================================
+// CHARACTERIZATION: Boundary conditions
+// =============================================================================
+
+/// Minimal stutter pattern with complete call first
+#[test]
+fn test_minimal_stutter_with_complete_first() {
+    let mut parser = StreamingToolParser::new();
+
+    // Complete call, then incomplete, then complete
+    let content = r#"{"tool": "shell", "args": {}}
+{"tool":
+{"tool": "shell", "args": {}}"#;
+
+    let tools = parser.process_chunk(&chunk(content, true));
+
+    assert!(!tools.is_empty(), "Should find at least one complete tool call");
+}
+
+/// Stutter at chunk boundary - incomplete in one chunk, complete in next
+#[test]
+fn test_stutter_split_across_chunk_boundary() {
+    let mut parser = StreamingToolParser::new();
+
+    // First chunk: complete tool call
+    let tools1 = parser.process_chunk(&chunk(
+        r#"{"tool": "shell", "args": {"command": "ls"}}"#,
+        false,
+    ));
+    assert_eq!(tools1.len(), 1, "First complete tool call should be detected");
+
+    // Mark as consumed
+    parser.mark_tool_calls_consumed();
+
+    // Second chunk: incomplete fragment
+    parser.process_chunk(&chunk("\n{\"tool\":", false));
+
+    // Third chunk: new complete tool call (finished)
+    let tools3 = parser.process_chunk(&chunk(
+        "\n{\"tool\": \"read_file\", \"args\": {\"file_path\": \"test.txt\"}}",
+        true,
+    ));
+
+    // Should find the complete tool call at stream end
+    assert!(!tools3.is_empty(), "Should find complete tool call at stream end");
+}