fix: prevent parser poisoning from inline tool-call JSON patterns

The streaming parser was incorrectly detecting tool call patterns that
appeared inline in prose (e.g., when explaining the format), causing
g3 to return control mid-task.

Fix: Modified find_first_tool_call_start() and find_last_tool_call_start()
to only recognize patterns that appear on their own line (at start of
buffer or after newline with only whitespace before the pattern).

Changes:
- Added is_on_own_line() helper to check line-boundary conditions
- Updated detection methods to skip inline patterns
- Removed sanitize_inline_tool_patterns() and LBRACE_HOMOGLYPH (no longer needed)
- Rewrote tests for new behavior
- Added streaming_repro tests that use process_chunk() to verify the exact bug scenario

28 tests covering: streaming repro, line boundaries, Unicode, code contexts, edge cases
This commit is contained in:
Dhanji R. Prasanna
2026-01-15 08:54:47 +05:30
parent 616e0898c7
commit 999ac6fe66
4 changed files with 390 additions and 240 deletions

View File

@@ -1,21 +1,24 @@
//! Parser Sanitization Edge Case Tests
//! Parser Line-Boundary Detection Tests
//!
//! CHARACTERIZATION: These tests verify edge cases for the inline tool pattern
//! sanitization that prevents parser poisoning.
//! CHARACTERIZATION: These tests verify that tool call patterns are only detected
//! when they appear on their own line (at start of text or after a newline with
//! only whitespace before the pattern).
//!
//! What these tests protect:
//! - Tool call patterns in various contexts (code blocks, quotes, etc.)
//! - Tool call patterns in various contexts (code blocks, quotes, etc.) are IGNORED
//! - Tool calls on their own line are DETECTED
//! - Edge cases at line boundaries
//! - Unicode handling in sanitization
//! - Unicode handling
//!
//! What these tests intentionally do NOT assert:
//! - Internal parser state
//! - Exact sanitization implementation
//! - Exact detection implementation
//!
//! Related commits:
//! - 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns
//! - Original: 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns
//! - Updated: Line-boundary detection instead of sanitization
use g3_core::streaming_parser::sanitize_inline_tool_patterns;
use g3_core::StreamingToolParser;
// =============================================================================
// Test: Code block contexts
@@ -24,35 +27,35 @@ use g3_core::streaming_parser::sanitize_inline_tool_patterns;
mod code_block_contexts {
use super::*;
/// Test tool pattern in markdown inline code
/// Test tool pattern in markdown inline code - should be IGNORED
#[test]
fn test_inline_code_backticks() {
let input = "Use `{\"tool\": \"shell\"}` to run commands";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// Should be sanitized since it's inline
assert!(!result.contains("{\"tool\":"), "Inline code should be sanitized");
// Should be ignored since it's inline
assert!(result.is_none(), "Inline code should be ignored");
}
/// Test tool pattern after code fence (should NOT be sanitized)
/// Test tool pattern after code fence (should be DETECTED - it's on its own line)
#[test]
fn test_after_code_fence_standalone() {
// Tool call on its own line after a code fence marker
let input = "```\n{\"tool\": \"shell\", \"args\": {}}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// The tool call is on its own line, should NOT be sanitized
let lines: Vec<&str> = result.lines().collect();
assert!(lines[1].starts_with("{\"tool\":"), "Standalone after fence should not be sanitized");
// The tool call is on its own line, should be detected
assert!(result.is_some(), "Standalone after fence should be detected");
assert_eq!(result.unwrap(), 4, "Should be at position 4 (after ```\\n)");
}
/// Test tool pattern in prose explanation
/// Test tool pattern in prose explanation - should be IGNORED
#[test]
fn test_prose_explanation() {
let input = "The format is {\"tool\": \"name\", \"args\": {...}} where name is the tool";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
assert!(!result.contains("{\"tool\":"), "Prose should be sanitized");
assert!(result.is_none(), "Prose should be ignored");
}
}
@@ -67,40 +70,43 @@ mod line_boundary_cases {
#[test]
fn test_empty_lines_before_tool_call() {
let input = "\n\n{\"tool\": \"shell\", \"args\": {}}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// Tool call is on its own line (after empty lines), should NOT be sanitized
assert!(result.contains("{\"tool\":"), "Standalone after empty lines should not be sanitized");
// Tool call is on its own line (after empty lines), should be detected
assert!(result.is_some(), "Standalone after empty lines should be detected");
assert_eq!(result.unwrap(), 2, "Should be at position 2 (after two newlines)");
}
/// Test whitespace-only lines
#[test]
fn test_whitespace_only_lines() {
let input = " \n \n{\"tool\": \"shell\", \"args\": {}}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// Tool call is on its own line, should NOT be sanitized
assert!(result.contains("{\"tool\":"), "Standalone after whitespace lines should not be sanitized");
// Tool call is on its own line, should be detected
assert!(result.is_some(), "Standalone after whitespace lines should be detected");
}
/// Test tool call with leading whitespace (indented)
#[test]
fn test_indented_tool_call() {
let input = " {\"tool\": \"shell\", \"args\": {}}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// Indented but on its own line, should NOT be sanitized
assert!(result.contains("{\"tool\":"), "Indented standalone should not be sanitized");
// Indented but on its own line, should be detected
assert!(result.is_some(), "Indented standalone should be detected");
assert_eq!(result.unwrap(), 4, "Should be at position 4 (after 4 spaces)");
}
/// Test tool call with tabs
#[test]
fn test_tab_indented_tool_call() {
let input = "\t{\"tool\": \"shell\", \"args\": {}}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// Tab-indented but on its own line, should NOT be sanitized
assert!(result.contains("{\"tool\":"), "Tab-indented standalone should not be sanitized");
// Tab-indented but on its own line, should be detected
assert!(result.is_some(), "Tab-indented standalone should be detected");
assert_eq!(result.unwrap(), 1, "Should be at position 1 (after tab)");
}
}
@@ -111,44 +117,55 @@ mod line_boundary_cases {
mod unicode_handling {
use super::*;
/// Test tool pattern after emoji
/// Test tool pattern after emoji - should be IGNORED (inline)
#[test]
fn test_after_emoji() {
let input = "🔧 {\"tool\": \"shell\"}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// Emoji before means it's inline, should be sanitized
assert!(!result.contains("{\"tool\":"), "After emoji should be sanitized");
// Emoji before means it's inline, should be ignored
assert!(result.is_none(), "After emoji should be ignored");
}
/// Test tool pattern after bullet point
/// Test tool pattern after bullet point - should be IGNORED (inline)
#[test]
fn test_after_bullet() {
let input = "• {\"tool\": \"shell\"}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// Bullet before means it's inline, should be sanitized
assert!(!result.contains("{\"tool\":"), "After bullet should be sanitized");
// Bullet before means it's inline, should be ignored
assert!(result.is_none(), "After bullet should be ignored");
}
/// Test tool pattern after CJK text
/// Test tool pattern after CJK text - should be IGNORED (inline)
#[test]
fn test_after_cjk() {
let input = "使用 {\"tool\": \"shell\"} 命令";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// CJK text before means it's inline, should be sanitized
assert!(!result.contains("{\"tool\":"), "After CJK should be sanitized");
// CJK text before means it's inline, should be ignored
assert!(result.is_none(), "After CJK should be ignored");
}
/// Test tool pattern with Unicode in args (should still detect pattern)
/// Test tool pattern with Unicode in args on its own line - should be DETECTED
#[test]
fn test_unicode_in_args() {
let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}";
let result = sanitize_inline_tool_patterns(input);
fn test_unicode_in_args_standalone() {
let input = "{\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}";
let result = StreamingToolParser::find_first_tool_call_start(input);
// Should be sanitized (inline)
assert!(!result.contains("{\"tool\":"), "Unicode in args should still be detected");
// Standalone, should be detected
assert!(result.is_some(), "Unicode in args standalone should be detected");
assert_eq!(result.unwrap(), 0, "Should be at position 0");
}
/// Test tool pattern with Unicode in args inline - should be IGNORED
#[test]
fn test_unicode_in_args_inline() {
let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}";
let result = StreamingToolParser::find_first_tool_call_start(input);
// Inline, should be ignored
assert!(result.is_none(), "Unicode in args inline should be ignored");
}
}
@@ -159,37 +176,35 @@ mod unicode_handling {
mod multiple_patterns {
use super::*;
/// Test three tool patterns on one line
/// Test three tool patterns on one line - all should be IGNORED
#[test]
fn test_three_patterns() {
let input = "Compare {\"tool\": \"a\"} vs {\"tool\": \"b\"} vs {\"tool\": \"c\"}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
// All should be sanitized
assert!(!result.contains("{\"tool\":"), "All three should be sanitized");
// All are inline, should be ignored
assert!(result.is_none(), "All three inline should be ignored");
}
/// Test mixed: one standalone, one inline
/// Test mixed: one inline (ignored), one standalone (detected)
#[test]
fn test_mixed_standalone_and_inline() {
let input = "Text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
let lines: Vec<&str> = result.lines().collect();
// First line should have sanitized pattern
assert!(!lines[0].contains("{\"tool\":"), "Inline should be sanitized");
// Second line should NOT be sanitized (standalone)
assert!(lines[1].starts_with("{\"tool\":"), "Standalone should not be sanitized");
// Should find the standalone one, not the inline one
assert!(result.is_some(), "Should find standalone");
// The standalone one starts after the newline
let newline_pos = input.find('\n').unwrap();
assert_eq!(result.unwrap(), newline_pos + 1, "Should find standalone after newline");
}
}
// =============================================================================
// Test: Edge cases that should NOT trigger sanitization
// Test: Edge cases that should NOT be detected (not tool patterns)
// =============================================================================
mod no_sanitization_cases {
mod no_detection_cases {
use super::*;
/// Test similar but not matching patterns
@@ -203,8 +218,8 @@ mod no_sanitization_cases {
];
for input in inputs {
let result = sanitize_inline_tool_patterns(input);
assert_eq!(result, input, "'{}' should not be modified", input);
let result = StreamingToolParser::find_first_tool_call_start(input);
assert!(result.is_none(), "'{}' should not be detected", input);
}
}
@@ -218,8 +233,8 @@ mod no_sanitization_cases {
];
for input in inputs {
let result = sanitize_inline_tool_patterns(input);
assert_eq!(result, input, "'{}' should not be modified", input);
let result = StreamingToolParser::find_first_tool_call_start(input);
assert!(result.is_none(), "'{}' should not be detected", input);
}
}
@@ -227,8 +242,8 @@ mod no_sanitization_cases {
#[test]
fn test_tool_as_value() {
let input = "{\"name\": \"tool\"}";
let result = sanitize_inline_tool_patterns(input);
assert_eq!(result, input, "'tool' as value should not trigger sanitization");
let result = StreamingToolParser::find_first_tool_call_start(input);
assert!(result.is_none(), "'tool' as value should not trigger detection");
}
}
@@ -249,22 +264,170 @@ For example:
This will execute the command."#;
let result = sanitize_inline_tool_patterns(input);
let lines: Vec<&str> = result.lines().collect();
let result = StreamingToolParser::find_first_tool_call_start(input);
// First line has inline pattern - should be sanitized
assert!(!lines[0].contains("{\"tool\":"), "Inline in docs should be sanitized");
// Should find the standalone example, not the inline one
assert!(result.is_some(), "Should find standalone example");
// The standalone example should NOT be sanitized
assert!(lines[3].starts_with("{\"tool\":"), "Standalone example should not be sanitized");
// The standalone example is on line 4 (0-indexed line 3)
// "To call a tool...\n\nFor example:\n" = 64 + 1 + 13 + 1 = 79 chars before it
// Actually let's just verify it's NOT at position 33 (the inline one)
assert!(result.unwrap() > 50, "Should skip inline and find standalone");
}
/// Test code example in prose
/// Test code example in prose - should be IGNORED
#[test]
fn test_code_in_prose() {
let input = "The agent responds with {\"tool\": \"read_file\"} when it needs to read files.";
let result = sanitize_inline_tool_patterns(input);
let result = StreamingToolParser::find_first_tool_call_start(input);
assert!(!result.contains("{\"tool\":"), "Code in prose should be sanitized");
assert!(result.is_none(), "Code in prose should be ignored");
}
/// Test the exact scenario from the bug: LLM explaining tool format
#[test]
fn test_llm_explanation_scenario() {
let input = r#"I'll use the shell tool. The format is {"tool": "shell", "args": {...}}.
{"tool": "shell", "args": {"command": "ls -la"}}"#;
let result = StreamingToolParser::find_first_tool_call_start(input);
// Should find the actual tool call, not the explanation
assert!(result.is_some(), "Should find actual tool call");
// The actual tool call is on the last line, after two newlines
let last_newline = input.rfind('\n').unwrap();
assert_eq!(result.unwrap(), last_newline + 1, "Should find tool call on last line");
}
}
// =============================================================================
// Test: is_on_own_line helper function
// =============================================================================
mod is_on_own_line_tests {
use super::*;
#[test]
fn test_position_zero() {
assert!(StreamingToolParser::is_on_own_line("anything", 0));
}
#[test]
fn test_after_newline_no_whitespace() {
let text = "line1\nline2";
assert!(StreamingToolParser::is_on_own_line(text, 6)); // position of 'l' in line2
}
#[test]
fn test_after_newline_with_whitespace() {
let text = "line1\n indented";
assert!(StreamingToolParser::is_on_own_line(text, 8)); // position of 'i' in indented
}
#[test]
fn test_middle_of_line() {
let text = "some text here";
assert!(!StreamingToolParser::is_on_own_line(text, 5)); // position of 't' in text
}
#[test]
fn test_after_non_whitespace() {
let text = "prefix{";
assert!(!StreamingToolParser::is_on_own_line(text, 6)); // position of '{'
}
}
// =============================================================================
// Test: End-to-end streaming repro of the parser poisoning bug
// =============================================================================
mod streaming_repro {
use super::*;
use g3_providers::CompletionChunk;
fn chunk(content: &str, finished: bool) -> CompletionChunk {
CompletionChunk {
content: content.to_string(),
finished,
tool_calls: None,
usage: None,
stop_reason: None,
}
}
/// EXACT REPRO: LLM explains tool format inline, then emits real tool call.
/// Before the fix, the parser would detect the inline pattern and try to
/// parse it as a tool call, causing premature return of control.
#[test]
fn test_inline_explanation_does_not_trigger_tool_detection() {
let mut parser = StreamingToolParser::new();
// Simulate streaming chunks as the LLM explains tool format
let tools = parser.process_chunk(&chunk(
"I'll help you with that. The tool call format is ",
false,
));
assert!(tools.is_empty(), "No tool call yet");
// THIS IS THE BUG: inline JSON pattern in explanation
let tools = parser.process_chunk(&chunk(
r#"{"tool": "shell", "args": {...}}"#,
false,
));
// Before fix: this would incorrectly detect a tool call
// After fix: this should be ignored (it's inline, not on its own line)
assert!(tools.is_empty(), "Inline pattern should NOT trigger tool detection");
// More explanation
let tools = parser.process_chunk(&chunk(
" where you specify the command.\n\n",
false,
));
assert!(tools.is_empty(), "Still no tool call");
// NOW the real tool call on its own line
let tools = parser.process_chunk(&chunk(
r#"{"tool": "shell", "args": {"command": "ls -la"}}"#,
true,
));
// Should detect exactly ONE tool call - the real one
assert_eq!(tools.len(), 1, "Should detect exactly one tool call");
assert_eq!(tools[0].tool, "shell");
assert_eq!(tools[0].args["command"], "ls -la");
}
/// Test that multiple inline patterns in a single chunk are all ignored
#[test]
fn test_multiple_inline_patterns_in_chunk_ignored() {
let mut parser = StreamingToolParser::new();
let tools = parser.process_chunk(&chunk(
r#"Compare {"tool": "a"} with {"tool": "b"} and {"tool": "c"}"#,
true,
));
assert!(tools.is_empty(), "All inline patterns should be ignored");
}
/// Test streaming where tool call arrives across multiple chunks
#[test]
fn test_tool_call_split_across_chunks() {
let mut parser = StreamingToolParser::new();
// First chunk: prose then start of tool call on new line
let tools = parser.process_chunk(&chunk("Here's the command:\n{\"tool\": ", false));
assert!(tools.is_empty(), "Incomplete tool call");
// Second chunk: rest of tool call
let tools = parser.process_chunk(&chunk(
r#""shell", "args": {"command": "pwd"}}"#,
true,
));
assert_eq!(tools.len(), 1, "Should detect the complete tool call");
assert_eq!(tools[0].tool, "shell");
}
}

View File

@@ -668,46 +668,46 @@ mod streaming_utilities_characterization {
}
// =============================================================================
// Characterization Tests: Parser Sanitization
// Characterization Tests: Parser Line-Boundary Detection
// =============================================================================
mod parser_sanitization_characterization {
use g3_core::{sanitize_inline_tool_patterns, LBRACE_HOMOGLYPH};
mod parser_line_boundary_characterization {
use g3_core::StreamingToolParser;
/// CHARACTERIZATION: Standalone tool calls are not sanitized
/// CHARACTERIZATION: Standalone tool calls at start of text are detected
#[test]
fn standalone_tool_calls_preserved() {
fn standalone_tool_calls_detected() {
let input = r#"{"tool": "shell", "args": {}}"#;
let output = sanitize_inline_tool_patterns(input);
assert_eq!(output, input, "Standalone tool call should be preserved");
let pos = StreamingToolParser::find_first_tool_call_start(input);
assert!(pos.is_some(), "Standalone tool call should be detected");
assert_eq!(pos.unwrap(), 0, "Should be at position 0");
}
/// CHARACTERIZATION: Inline tool patterns are sanitized
/// CHARACTERIZATION: Inline tool patterns are ignored (not detected)
#[test]
fn inline_patterns_sanitized() {
fn inline_patterns_ignored() {
let input = r#"Example: {"tool": "shell"} in text"#;
let output = sanitize_inline_tool_patterns(input);
let pos = StreamingToolParser::find_first_tool_call_start(input);
assert!(
output.contains(LBRACE_HOMOGLYPH),
"Inline pattern should be sanitized: {}",
output
);
assert!(
!output.starts_with('{'),
"Should not start with regular brace"
pos.is_none(),
"Inline pattern should be ignored, but found at {:?}",
pos
);
}
/// CHARACTERIZATION: Tool call on its own line is preserved
/// CHARACTERIZATION: Tool call on its own line (after newline) is detected
#[test]
fn tool_call_on_own_line_preserved() {
fn tool_call_on_own_line_detected() {
let input = "Some text\n{\"tool\": \"shell\"}\nMore text";
let output = sanitize_inline_tool_patterns(input);
// The tool call line should be preserved
let pos = StreamingToolParser::find_first_tool_call_start(input);
assert!(
output.contains("{\"tool\""),
"Tool call on own line should be preserved: {}",
output
pos.is_some(),
"Tool call on own line should be detected"
);
// Should find it after the newline (position 10 = len("Some text\n"))
assert_eq!(
pos.unwrap(), 10,
"Should find tool call at position after newline"
);
}
}