Merge sessions/hopper/297c7be9

2026-01-30 14:30:53 +11:00
parent 3003bdebaa 58bbfde6f4
commit 51f12769d5
1 changed files with 240 additions and 0 deletions
--- a/crates/g3-core/tests/streaming_parser_stuttering_test.rs
+++ b/crates/g3-core/tests/streaming_parser_stuttering_test.rs
@@ -0,0 +1,240 @@
 //! Integration tests for streaming parser stuttering bug fix (fa3c920)
 //!
 //! BEHAVIOR PROTECTED:
 //! When an LLM "stutters" and emits incomplete tool call fragments followed by
 //! complete tool calls, the parser should:
 //! 1. Not get stuck waiting for the incomplete fragment to complete
 //! 2. Successfully parse complete tool calls that appear after the fragment
 //!
 //! SURFACE TARGETED:
 //! StreamingToolParser - the public API for processing streaming chunks
 //!
 //! INTENTIONALLY NOT ASSERTED:
 //! - Internal parser state transitions
 //! - Specific invalidation mechanism details
 //! - Order of internal operations
 //! - Behavior of patterns that don't match the actual bug scenario
 use g3_core::StreamingToolParser;
 use g3_providers::CompletionChunk;
 /// Helper to create a completion chunk
 fn chunk(content: &str, finished: bool) -> CompletionChunk {
    CompletionChunk {
        content: content.to_string(),
        finished,
        tool_calls: None,
        usage: None,
        stop_reason: None,
        tool_call_streaming: None,
    }
 }
 // =============================================================================
 // CHARACTERIZATION: The exact stuttering pattern from the bug report
 // =============================================================================
 /// Test the exact pattern observed in butler session butler_c6ab59af2e4f991c
 /// where the LLM emitted: complete -> incomplete fragment -> complete
 ///
 /// This is the critical bug fix test - before the fix, the parser would get
 /// stuck on the incomplete fragment and return zero tool calls.
 #[test]
 fn test_stuttering_complete_incomplete_complete() {
    let mut parser = StreamingToolParser::new();
    // This is the exact pattern that caused the bug:
    // 1. Complete tool call
    // 2. Incomplete fragment (just {"tool":)
    // 3. Complete tool call again
    let content = r#"{"tool": "shell", "args": {"command": "ls"}}
 {"tool":
 {"tool": "shell", "args": {"command": "pwd"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // CRITICAL: We must get at least one valid tool call
    // Before the fix, the parser would get stuck on the incomplete fragment
    // and return zero tool calls
    assert!(
        !tools.is_empty(),
        "Parser must not get stuck on incomplete fragment. Expected tool calls, got none."
    );
    // Verify we got valid tool calls (at least one should be "shell")
    assert!(
        tools.iter().any(|t| t.tool == "shell"),
        "Expected at least one 'shell' tool call"
    );
 }
 /// Verify the parser finds at least one complete tool call even with stuttering
 #[test]
 fn test_stuttering_finds_at_least_one_complete_call() {
    let mut parser = StreamingToolParser::new();
    // Complete -> incomplete -> complete with different commands
    let content = r#"{"tool": "shell", "args": {"command": "first"}}
 {"tool":
 {"tool": "shell", "args": {"command": "second"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // CHARACTERIZATION: The parser finds at least one complete tool call.
    // The exact number depends on implementation details (streaming vs batch parsing).
    // The critical behavior is that it doesn't return zero (the original bug).
    assert!(
        !tools.is_empty(),
        "Expected at least 1 tool call, got none"
    );
 }
 // =============================================================================
 // CHARACTERIZATION: Edge cases that should NOT trigger invalidation
 // =============================================================================
 /// Tool call patterns inside JSON strings should not cause invalidation
 #[test]
 fn test_tool_pattern_in_string_value_not_invalidated() {
    let mut parser = StreamingToolParser::new();
    // Writing example code that contains a tool call pattern
    let content = r#"{"tool": "write_file", "args": {"file_path": "example.md", "content": "Example:\n{\"tool\": \"shell\"}"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    // Should parse the outer tool call correctly
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].tool, "write_file");
    // The inner pattern should be part of the content, not a separate tool call
    assert!(tools[0].args["content"]
        .as_str()
        .unwrap()
        .contains("{\"tool\""));
 }
 /// Nested JSON objects should not trigger false invalidation
 #[test]
 fn test_nested_json_not_invalidated() {
    let mut parser = StreamingToolParser::new();
    // Tool call with nested JSON in args
    let content = r#"{"tool": "shell", "args": {"command": "echo '{\"nested\": true}'"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    assert_eq!(tools.len(), 1);
    assert_eq!(tools[0].tool, "shell");
 }
 // =============================================================================
 // CHARACTERIZATION: Recovery behavior
 // =============================================================================
 /// Parser should work correctly after reset
 #[test]
 fn test_parser_reset_clears_state() {
    let mut parser = StreamingToolParser::new();
    // First: process content with stuttering
    let content1 = r#"{"tool": "shell", "args": {"command": "ls"}}
 {"tool":
 {"tool": "shell", "args": {"command": "pwd"}}"#;
    let _tools1 = parser.process_chunk(&chunk(content1, true));
    // Reset for new message
    parser.reset();
    // Second message should work normally
    let content2 = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
    let tools2 = parser.process_chunk(&chunk(content2, true));
    assert_eq!(tools2.len(), 1);
    assert_eq!(tools2[0].tool, "read_file");
 }
 /// Incomplete tool call detection works
 #[test]
 fn test_incomplete_detection() {
    let mut parser = StreamingToolParser::new();
    // Incomplete fragment
    parser.process_chunk(&chunk("{\"tool\":", false));
    assert!(
        parser.has_incomplete_tool_call(),
        "Should detect incomplete tool call"
    );
 }
 // =============================================================================
 // CHARACTERIZATION: Multiple complete tool calls (no stuttering)
 // =============================================================================
 /// Multiple complete tool calls should all be found
 #[test]
 fn test_multiple_complete_tool_calls() {
    let mut parser = StreamingToolParser::new();
    let content = r#"{"tool": "shell", "args": {"command": "ls"}}
 {"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    assert_eq!(tools.len(), 2, "Should find both tool calls");
    assert_eq!(tools[0].tool, "shell");
    assert_eq!(tools[1].tool, "read_file");
 }
 // =============================================================================
 // CHARACTERIZATION: Boundary conditions
 // =============================================================================
 /// Minimal stutter pattern with complete call first
 #[test]
 fn test_minimal_stutter_with_complete_first() {
    let mut parser = StreamingToolParser::new();
    // Complete call, then incomplete, then complete
    let content = r#"{"tool": "shell", "args": {}}
 {"tool":
 {"tool": "shell", "args": {}}"#;
    let tools = parser.process_chunk(&chunk(content, true));
    assert!(!tools.is_empty(), "Should find at least one complete tool call");
 }
 /// Stutter at chunk boundary - incomplete in one chunk, complete in next
 #[test]
 fn test_stutter_split_across_chunk_boundary() {
    let mut parser = StreamingToolParser::new();
    // First chunk: complete tool call
    let tools1 = parser.process_chunk(&chunk(
        r#"{"tool": "shell", "args": {"command": "ls"}}"#,
        false,
    ));
    assert_eq!(tools1.len(), 1, "First complete tool call should be detected");
    // Mark as consumed
    parser.mark_tool_calls_consumed();
    // Second chunk: incomplete fragment
    parser.process_chunk(&chunk("\n{\"tool\":", false));
    // Third chunk: new complete tool call (finished)
    let tools3 = parser.process_chunk(&chunk(
        "\n{\"tool\": \"read_file\", \"args\": {\"file_path\": \"test.txt\"}}",
        true,
    ));
    // Should find the complete tool call at stream end
    assert!(!tools3.is_empty(), "Should find complete tool call at stream end");
 }