Merge sessions/hopper/297c7be9
This commit is contained in:
240
crates/g3-core/tests/streaming_parser_stuttering_test.rs
Normal file
240
crates/g3-core/tests/streaming_parser_stuttering_test.rs
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
//! Integration tests for streaming parser stuttering bug fix (fa3c920)
|
||||||
|
//!
|
||||||
|
//! BEHAVIOR PROTECTED:
|
||||||
|
//! When an LLM "stutters" and emits incomplete tool call fragments followed by
|
||||||
|
//! complete tool calls, the parser should:
|
||||||
|
//! 1. Not get stuck waiting for the incomplete fragment to complete
|
||||||
|
//! 2. Successfully parse complete tool calls that appear after the fragment
|
||||||
|
//!
|
||||||
|
//! SURFACE TARGETED:
|
||||||
|
//! StreamingToolParser - the public API for processing streaming chunks
|
||||||
|
//!
|
||||||
|
//! INTENTIONALLY NOT ASSERTED:
|
||||||
|
//! - Internal parser state transitions
|
||||||
|
//! - Specific invalidation mechanism details
|
||||||
|
//! - Order of internal operations
|
||||||
|
//! - Behavior of patterns that don't match the actual bug scenario
|
||||||
|
|
||||||
|
use g3_core::StreamingToolParser;
|
||||||
|
use g3_providers::CompletionChunk;
|
||||||
|
|
||||||
|
/// Helper to create a completion chunk
|
||||||
|
fn chunk(content: &str, finished: bool) -> CompletionChunk {
|
||||||
|
CompletionChunk {
|
||||||
|
content: content.to_string(),
|
||||||
|
finished,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
stop_reason: None,
|
||||||
|
tool_call_streaming: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CHARACTERIZATION: The exact stuttering pattern from the bug report
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/// Test the exact pattern observed in butler session butler_c6ab59af2e4f991c
|
||||||
|
/// where the LLM emitted: complete -> incomplete fragment -> complete
|
||||||
|
///
|
||||||
|
/// This is the critical bug fix test - before the fix, the parser would get
|
||||||
|
/// stuck on the incomplete fragment and return zero tool calls.
|
||||||
|
#[test]
|
||||||
|
fn test_stuttering_complete_incomplete_complete() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// This is the exact pattern that caused the bug:
|
||||||
|
// 1. Complete tool call
|
||||||
|
// 2. Incomplete fragment (just {"tool":)
|
||||||
|
// 3. Complete tool call again
|
||||||
|
let content = r#"{"tool": "shell", "args": {"command": "ls"}}
|
||||||
|
|
||||||
|
{"tool":
|
||||||
|
|
||||||
|
{"tool": "shell", "args": {"command": "pwd"}}"#;
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(content, true));
|
||||||
|
|
||||||
|
// CRITICAL: We must get at least one valid tool call
|
||||||
|
// Before the fix, the parser would get stuck on the incomplete fragment
|
||||||
|
// and return zero tool calls
|
||||||
|
assert!(
|
||||||
|
!tools.is_empty(),
|
||||||
|
"Parser must not get stuck on incomplete fragment. Expected tool calls, got none."
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify we got valid tool calls (at least one should be "shell")
|
||||||
|
assert!(
|
||||||
|
tools.iter().any(|t| t.tool == "shell"),
|
||||||
|
"Expected at least one 'shell' tool call"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Verify the parser finds at least one complete tool call even with stuttering
|
||||||
|
#[test]
|
||||||
|
fn test_stuttering_finds_at_least_one_complete_call() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// Complete -> incomplete -> complete with different commands
|
||||||
|
let content = r#"{"tool": "shell", "args": {"command": "first"}}
|
||||||
|
|
||||||
|
{"tool":
|
||||||
|
|
||||||
|
{"tool": "shell", "args": {"command": "second"}}"#;
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(content, true));
|
||||||
|
|
||||||
|
// CHARACTERIZATION: The parser finds at least one complete tool call.
|
||||||
|
// The exact number depends on implementation details (streaming vs batch parsing).
|
||||||
|
// The critical behavior is that it doesn't return zero (the original bug).
|
||||||
|
assert!(
|
||||||
|
!tools.is_empty(),
|
||||||
|
"Expected at least 1 tool call, got none"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CHARACTERIZATION: Edge cases that should NOT trigger invalidation
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/// Tool call patterns inside JSON strings should not cause invalidation
|
||||||
|
#[test]
|
||||||
|
fn test_tool_pattern_in_string_value_not_invalidated() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// Writing example code that contains a tool call pattern
|
||||||
|
let content = r#"{"tool": "write_file", "args": {"file_path": "example.md", "content": "Example:\n{\"tool\": \"shell\"}"}}"#;
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(content, true));
|
||||||
|
|
||||||
|
// Should parse the outer tool call correctly
|
||||||
|
assert_eq!(tools.len(), 1);
|
||||||
|
assert_eq!(tools[0].tool, "write_file");
|
||||||
|
// The inner pattern should be part of the content, not a separate tool call
|
||||||
|
assert!(tools[0].args["content"]
|
||||||
|
.as_str()
|
||||||
|
.unwrap()
|
||||||
|
.contains("{\"tool\""));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Nested JSON objects should not trigger false invalidation
|
||||||
|
#[test]
|
||||||
|
fn test_nested_json_not_invalidated() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// Tool call with nested JSON in args
|
||||||
|
let content = r#"{"tool": "shell", "args": {"command": "echo '{\"nested\": true}'"}}"#;
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(content, true));
|
||||||
|
|
||||||
|
assert_eq!(tools.len(), 1);
|
||||||
|
assert_eq!(tools[0].tool, "shell");
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CHARACTERIZATION: Recovery behavior
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/// Parser should work correctly after reset
|
||||||
|
#[test]
|
||||||
|
fn test_parser_reset_clears_state() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// First: process content with stuttering
|
||||||
|
let content1 = r#"{"tool": "shell", "args": {"command": "ls"}}
|
||||||
|
|
||||||
|
{"tool":
|
||||||
|
|
||||||
|
{"tool": "shell", "args": {"command": "pwd"}}"#;
|
||||||
|
let _tools1 = parser.process_chunk(&chunk(content1, true));
|
||||||
|
|
||||||
|
// Reset for new message
|
||||||
|
parser.reset();
|
||||||
|
|
||||||
|
// Second message should work normally
|
||||||
|
let content2 = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
|
||||||
|
let tools2 = parser.process_chunk(&chunk(content2, true));
|
||||||
|
|
||||||
|
assert_eq!(tools2.len(), 1);
|
||||||
|
assert_eq!(tools2[0].tool, "read_file");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Incomplete tool call detection works
|
||||||
|
#[test]
|
||||||
|
fn test_incomplete_detection() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// Incomplete fragment
|
||||||
|
parser.process_chunk(&chunk("{\"tool\":", false));
|
||||||
|
assert!(
|
||||||
|
parser.has_incomplete_tool_call(),
|
||||||
|
"Should detect incomplete tool call"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CHARACTERIZATION: Multiple complete tool calls (no stuttering)
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/// Multiple complete tool calls should all be found
|
||||||
|
#[test]
|
||||||
|
fn test_multiple_complete_tool_calls() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
let content = r#"{"tool": "shell", "args": {"command": "ls"}}
|
||||||
|
|
||||||
|
{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(content, true));
|
||||||
|
|
||||||
|
assert_eq!(tools.len(), 2, "Should find both tool calls");
|
||||||
|
assert_eq!(tools[0].tool, "shell");
|
||||||
|
assert_eq!(tools[1].tool, "read_file");
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// CHARACTERIZATION: Boundary conditions
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/// Minimal stutter pattern with complete call first
|
||||||
|
#[test]
|
||||||
|
fn test_minimal_stutter_with_complete_first() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// Complete call, then incomplete, then complete
|
||||||
|
let content = r#"{"tool": "shell", "args": {}}
|
||||||
|
{"tool":
|
||||||
|
{"tool": "shell", "args": {}}"#;
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(content, true));
|
||||||
|
|
||||||
|
assert!(!tools.is_empty(), "Should find at least one complete tool call");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stutter at chunk boundary - incomplete in one chunk, complete in next
|
||||||
|
#[test]
|
||||||
|
fn test_stutter_split_across_chunk_boundary() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// First chunk: complete tool call
|
||||||
|
let tools1 = parser.process_chunk(&chunk(
|
||||||
|
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
assert_eq!(tools1.len(), 1, "First complete tool call should be detected");
|
||||||
|
|
||||||
|
// Mark as consumed
|
||||||
|
parser.mark_tool_calls_consumed();
|
||||||
|
|
||||||
|
// Second chunk: incomplete fragment
|
||||||
|
parser.process_chunk(&chunk("\n{\"tool\":", false));
|
||||||
|
|
||||||
|
// Third chunk: new complete tool call (finished)
|
||||||
|
let tools3 = parser.process_chunk(&chunk(
|
||||||
|
"\n{\"tool\": \"read_file\", \"args\": {\"file_path\": \"test.txt\"}}",
|
||||||
|
true,
|
||||||
|
));
|
||||||
|
|
||||||
|
// Should find the complete tool call at stream end
|
||||||
|
assert!(!tools3.is_empty(), "Should find complete tool call at stream end");
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user