fix: prevent parser poisoning from inline tool-call JSON patterns

The streaming parser was incorrectly detecting tool call patterns that appeared inline in prose (e.g., when explaining the format), causing g3 to return control mid-task. Fix: Modified find_first_tool_call_start() and find_last_tool_call_start() to only recognize patterns that appear on their own line (at start of buffer or after newline with only whitespace before the pattern). Changes: - Added is_on_own_line() helper to check line-boundary conditions - Updated detection methods to skip inline patterns - Removed sanitize_inline_tool_patterns() and LBRACE_HOMOGLYPH (no longer needed) - Rewrote tests for new behavior - Added streaming_repro tests that use process_chunk() to verify the exact bug scenario 28 tests covering: streaming repro, line boundaries, Unicode, code contexts, edge cases
2026-01-15 08:54:47 +05:30
parent 616e0898c7
commit 999ac6fe66
4 changed files with 390 additions and 240 deletions
--- a/crates/g3-core/tests/parser_sanitization_test.rs
+++ b/crates/g3-core/tests/parser_sanitization_test.rs
@@ -1,21 +1,24 @@
-//! Parser Sanitization Edge Case Tests
+//! Parser Line-Boundary Detection Tests
 //!
-//! CHARACTERIZATION: These tests verify edge cases for the inline tool pattern
-//! sanitization that prevents parser poisoning.
+//! CHARACTERIZATION: These tests verify that tool call patterns are only detected
+//! when they appear on their own line (at start of text or after a newline with
+//! only whitespace before the pattern).
 //!
 //! What these tests protect:
-//! - Tool call patterns in various contexts (code blocks, quotes, etc.)
+//! - Tool call patterns in various contexts (code blocks, quotes, etc.) are IGNORED
+//! - Tool calls on their own line are DETECTED
 //! - Edge cases at line boundaries
-//! - Unicode handling in sanitization
+//! - Unicode handling
 //!
 //! What these tests intentionally do NOT assert:
 //! - Internal parser state
-//! - Exact sanitization implementation
+//! - Exact detection implementation
 //!
 //! Related commits:
-//! - 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns
+//! - Original: 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns
+//! - Updated: Line-boundary detection instead of sanitization

-use g3_core::streaming_parser::sanitize_inline_tool_patterns;
+use g3_core::StreamingToolParser;

 // =============================================================================
 // Test: Code block contexts
@@ -24,35 +27,35 @@ use g3_core::streaming_parser::sanitize_inline_tool_patterns;
 mod code_block_contexts {
    use super::*;

-    /// Test tool pattern in markdown inline code
+    /// Test tool pattern in markdown inline code - should be IGNORED
    #[test]
    fn test_inline_code_backticks() {
        let input = "Use `{\"tool\": \"shell\"}` to run commands";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Should be sanitized since it's inline
-        assert!(!result.contains("{\"tool\":"), "Inline code should be sanitized");
+        // Should be ignored since it's inline
+        assert!(result.is_none(), "Inline code should be ignored");
    }

-    /// Test tool pattern after code fence (should NOT be sanitized)
+    /// Test tool pattern after code fence (should be DETECTED - it's on its own line)
    #[test]
    fn test_after_code_fence_standalone() {
        // Tool call on its own line after a code fence marker
        let input = "```\n{\"tool\": \"shell\", \"args\": {}}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // The tool call is on its own line, should NOT be sanitized
-        let lines: Vec<&str> = result.lines().collect();
-        assert!(lines[1].starts_with("{\"tool\":"), "Standalone after fence should not be sanitized");
+        // The tool call is on its own line, should be detected
+        assert!(result.is_some(), "Standalone after fence should be detected");
+        assert_eq!(result.unwrap(), 4, "Should be at position 4 (after ```\\n)");
    }

-    /// Test tool pattern in prose explanation
+    /// Test tool pattern in prose explanation - should be IGNORED
    #[test]
    fn test_prose_explanation() {
        let input = "The format is {\"tool\": \"name\", \"args\": {...}} where name is the tool";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        assert!(!result.contains("{\"tool\":"), "Prose should be sanitized");
+        assert!(result.is_none(), "Prose should be ignored");
    }
 }

@@ -67,40 +70,43 @@ mod line_boundary_cases {
    #[test]
    fn test_empty_lines_before_tool_call() {
        let input = "\n\n{\"tool\": \"shell\", \"args\": {}}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Tool call is on its own line (after empty lines), should NOT be sanitized
-        assert!(result.contains("{\"tool\":"), "Standalone after empty lines should not be sanitized");
+        // Tool call is on its own line (after empty lines), should be detected
+        assert!(result.is_some(), "Standalone after empty lines should be detected");
+        assert_eq!(result.unwrap(), 2, "Should be at position 2 (after two newlines)");
    }

    /// Test whitespace-only lines
    #[test]
    fn test_whitespace_only_lines() {
        let input = "   \n  \n{\"tool\": \"shell\", \"args\": {}}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Tool call is on its own line, should NOT be sanitized
-        assert!(result.contains("{\"tool\":"), "Standalone after whitespace lines should not be sanitized");
+        // Tool call is on its own line, should be detected
+        assert!(result.is_some(), "Standalone after whitespace lines should be detected");
    }

    /// Test tool call with leading whitespace (indented)
    #[test]
    fn test_indented_tool_call() {
        let input = "    {\"tool\": \"shell\", \"args\": {}}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Indented but on its own line, should NOT be sanitized
-        assert!(result.contains("{\"tool\":"), "Indented standalone should not be sanitized");
+        // Indented but on its own line, should be detected
+        assert!(result.is_some(), "Indented standalone should be detected");
+        assert_eq!(result.unwrap(), 4, "Should be at position 4 (after 4 spaces)");
    }

    /// Test tool call with tabs
    #[test]
    fn test_tab_indented_tool_call() {
        let input = "\t{\"tool\": \"shell\", \"args\": {}}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Tab-indented but on its own line, should NOT be sanitized
-        assert!(result.contains("{\"tool\":"), "Tab-indented standalone should not be sanitized");
+        // Tab-indented but on its own line, should be detected
+        assert!(result.is_some(), "Tab-indented standalone should be detected");
+        assert_eq!(result.unwrap(), 1, "Should be at position 1 (after tab)");
    }
 }

@@ -111,44 +117,55 @@ mod line_boundary_cases {
 mod unicode_handling {
    use super::*;

-    /// Test tool pattern after emoji
+    /// Test tool pattern after emoji - should be IGNORED (inline)
    #[test]
    fn test_after_emoji() {
        let input = "🔧 {\"tool\": \"shell\"}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Emoji before means it's inline, should be sanitized
-        assert!(!result.contains("{\"tool\":"), "After emoji should be sanitized");
+        // Emoji before means it's inline, should be ignored
+        assert!(result.is_none(), "After emoji should be ignored");
    }

-    /// Test tool pattern after bullet point
+    /// Test tool pattern after bullet point - should be IGNORED (inline)
    #[test]
    fn test_after_bullet() {
        let input = "• {\"tool\": \"shell\"}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Bullet before means it's inline, should be sanitized
-        assert!(!result.contains("{\"tool\":"), "After bullet should be sanitized");
+        // Bullet before means it's inline, should be ignored
+        assert!(result.is_none(), "After bullet should be ignored");
    }

-    /// Test tool pattern after CJK text
+    /// Test tool pattern after CJK text - should be IGNORED (inline)
    #[test]
    fn test_after_cjk() {
        let input = "使用 {\"tool\": \"shell\"} 命令";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // CJK text before means it's inline, should be sanitized
-        assert!(!result.contains("{\"tool\":"), "After CJK should be sanitized");
+        // CJK text before means it's inline, should be ignored
+        assert!(result.is_none(), "After CJK should be ignored");
    }

-    /// Test tool pattern with Unicode in args (should still detect pattern)
+    /// Test tool pattern with Unicode in args on its own line - should be DETECTED
    #[test]
-    fn test_unicode_in_args() {
-        let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}";
-        let result = sanitize_inline_tool_patterns(input);
+    fn test_unicode_in_args_standalone() {
+        let input = "{\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}";
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // Should be sanitized (inline)
-        assert!(!result.contains("{\"tool\":"), "Unicode in args should still be detected");
+        // Standalone, should be detected
+        assert!(result.is_some(), "Unicode in args standalone should be detected");
+        assert_eq!(result.unwrap(), 0, "Should be at position 0");
+    }
+
+    /// Test tool pattern with Unicode in args inline - should be IGNORED
+    #[test]
+    fn test_unicode_in_args_inline() {
+        let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}";
+        let result = StreamingToolParser::find_first_tool_call_start(input);
+        
+        // Inline, should be ignored
+        assert!(result.is_none(), "Unicode in args inline should be ignored");
    }
 }

@@ -159,37 +176,35 @@ mod unicode_handling {
 mod multiple_patterns {
    use super::*;

-    /// Test three tool patterns on one line
+    /// Test three tool patterns on one line - all should be IGNORED
    #[test]
    fn test_three_patterns() {
        let input = "Compare {\"tool\": \"a\"} vs {\"tool\": \"b\"} vs {\"tool\": \"c\"}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // All should be sanitized
-        assert!(!result.contains("{\"tool\":"), "All three should be sanitized");
+        // All are inline, should be ignored
+        assert!(result.is_none(), "All three inline should be ignored");
    }

-    /// Test mixed: one standalone, one inline
+    /// Test mixed: one inline (ignored), one standalone (detected)
    #[test]
    fn test_mixed_standalone_and_inline() {
        let input = "Text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        let lines: Vec<&str> = result.lines().collect();
-        
-        // First line should have sanitized pattern
-        assert!(!lines[0].contains("{\"tool\":"), "Inline should be sanitized");
-        
-        // Second line should NOT be sanitized (standalone)
-        assert!(lines[1].starts_with("{\"tool\":"), "Standalone should not be sanitized");
+        // Should find the standalone one, not the inline one
+        assert!(result.is_some(), "Should find standalone");
+        // The standalone one starts after the newline
+        let newline_pos = input.find('\n').unwrap();
+        assert_eq!(result.unwrap(), newline_pos + 1, "Should find standalone after newline");
    }
 }

 // =============================================================================
-// Test: Edge cases that should NOT trigger sanitization
+// Test: Edge cases that should NOT be detected (not tool patterns)
 // =============================================================================

-mod no_sanitization_cases {
+mod no_detection_cases {
    use super::*;

    /// Test similar but not matching patterns
@@ -203,8 +218,8 @@ mod no_sanitization_cases {
        ];
        
        for input in inputs {
-            let result = sanitize_inline_tool_patterns(input);
-            assert_eq!(result, input, "'{}' should not be modified", input);
+            let result = StreamingToolParser::find_first_tool_call_start(input);
+            assert!(result.is_none(), "'{}' should not be detected", input);
        }
    }

@@ -218,8 +233,8 @@ mod no_sanitization_cases {
        ];
        
        for input in inputs {
-            let result = sanitize_inline_tool_patterns(input);
-            assert_eq!(result, input, "'{}' should not be modified", input);
+            let result = StreamingToolParser::find_first_tool_call_start(input);
+            assert!(result.is_none(), "'{}' should not be detected", input);
        }
    }

@@ -227,8 +242,8 @@ mod no_sanitization_cases {
    #[test]
    fn test_tool_as_value() {
        let input = "{\"name\": \"tool\"}";
-        let result = sanitize_inline_tool_patterns(input);
-        assert_eq!(result, input, "'tool' as value should not trigger sanitization");
+        let result = StreamingToolParser::find_first_tool_call_start(input);
+        assert!(result.is_none(), "'tool' as value should not trigger detection");
    }
 }

@@ -249,22 +264,170 @@ For example:

 This will execute the command."#;
        
-        let result = sanitize_inline_tool_patterns(input);
-        let lines: Vec<&str> = result.lines().collect();
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        // First line has inline pattern - should be sanitized
-        assert!(!lines[0].contains("{\"tool\":"), "Inline in docs should be sanitized");
+        // Should find the standalone example, not the inline one
+        assert!(result.is_some(), "Should find standalone example");
        
-        // The standalone example should NOT be sanitized
-        assert!(lines[3].starts_with("{\"tool\":"), "Standalone example should not be sanitized");
+        // The standalone example is on line 4 (0-indexed line 3)
+        // "To call a tool...\n\nFor example:\n" = 64 + 1 + 13 + 1 = 79 chars before it
+        // Actually let's just verify it's NOT at position 33 (the inline one)
+        assert!(result.unwrap() > 50, "Should skip inline and find standalone");
    }

-    /// Test code example in prose
+    /// Test code example in prose - should be IGNORED
    #[test]
    fn test_code_in_prose() {
        let input = "The agent responds with {\"tool\": \"read_file\"} when it needs to read files.";
-        let result = sanitize_inline_tool_patterns(input);
+        let result = StreamingToolParser::find_first_tool_call_start(input);
        
-        assert!(!result.contains("{\"tool\":"), "Code in prose should be sanitized");
+        assert!(result.is_none(), "Code in prose should be ignored");
+    }
+
+    /// Test the exact scenario from the bug: LLM explaining tool format
+    #[test]
+    fn test_llm_explanation_scenario() {
+        let input = r#"I'll use the shell tool. The format is {"tool": "shell", "args": {...}}.
+
+{"tool": "shell", "args": {"command": "ls -la"}}"#;
+        
+        let result = StreamingToolParser::find_first_tool_call_start(input);
+        
+        // Should find the actual tool call, not the explanation
+        assert!(result.is_some(), "Should find actual tool call");
+        
+        // The actual tool call is on the last line, after two newlines
+        let last_newline = input.rfind('\n').unwrap();
+        assert_eq!(result.unwrap(), last_newline + 1, "Should find tool call on last line");
+    }
+}
+
+// =============================================================================
+// Test: is_on_own_line helper function
+// =============================================================================
+
+mod is_on_own_line_tests {
+    use super::*;
+
+    #[test]
+    fn test_position_zero() {
+        assert!(StreamingToolParser::is_on_own_line("anything", 0));
+    }
+
+    #[test]
+    fn test_after_newline_no_whitespace() {
+        let text = "line1\nline2";
+        assert!(StreamingToolParser::is_on_own_line(text, 6)); // position of 'l' in line2
+    }
+
+    #[test]
+    fn test_after_newline_with_whitespace() {
+        let text = "line1\n  indented";
+        assert!(StreamingToolParser::is_on_own_line(text, 8)); // position of 'i' in indented
+    }
+
+    #[test]
+    fn test_middle_of_line() {
+        let text = "some text here";
+        assert!(!StreamingToolParser::is_on_own_line(text, 5)); // position of 't' in text
+    }
+
+    #[test]
+    fn test_after_non_whitespace() {
+        let text = "prefix{";
+        assert!(!StreamingToolParser::is_on_own_line(text, 6)); // position of '{'
+    }
+}
+
+// =============================================================================
+// Test: End-to-end streaming repro of the parser poisoning bug
+// =============================================================================
+
+mod streaming_repro {
+    use super::*;
+    use g3_providers::CompletionChunk;
+
+    fn chunk(content: &str, finished: bool) -> CompletionChunk {
+        CompletionChunk {
+            content: content.to_string(),
+            finished,
+            tool_calls: None,
+            usage: None,
+            stop_reason: None,
+        }
+    }
+
+    /// EXACT REPRO: LLM explains tool format inline, then emits real tool call.
+    /// Before the fix, the parser would detect the inline pattern and try to
+    /// parse it as a tool call, causing premature return of control.
+    #[test]
+    fn test_inline_explanation_does_not_trigger_tool_detection() {
+        let mut parser = StreamingToolParser::new();
+
+        // Simulate streaming chunks as the LLM explains tool format
+        let tools = parser.process_chunk(&chunk(
+            "I'll help you with that. The tool call format is ",
+            false,
+        ));
+        assert!(tools.is_empty(), "No tool call yet");
+
+        // THIS IS THE BUG: inline JSON pattern in explanation
+        let tools = parser.process_chunk(&chunk(
+            r#"{"tool": "shell", "args": {...}}"#,
+            false,
+        ));
+        // Before fix: this would incorrectly detect a tool call
+        // After fix: this should be ignored (it's inline, not on its own line)
+        assert!(tools.is_empty(), "Inline pattern should NOT trigger tool detection");
+
+        // More explanation
+        let tools = parser.process_chunk(&chunk(
+            " where you specify the command.\n\n",
+            false,
+        ));
+        assert!(tools.is_empty(), "Still no tool call");
+
+        // NOW the real tool call on its own line
+        let tools = parser.process_chunk(&chunk(
+            r#"{"tool": "shell", "args": {"command": "ls -la"}}"#,
+            true,
+        ));
+        
+        // Should detect exactly ONE tool call - the real one
+        assert_eq!(tools.len(), 1, "Should detect exactly one tool call");
+        assert_eq!(tools[0].tool, "shell");
+        assert_eq!(tools[0].args["command"], "ls -la");
+    }
+
+    /// Test that multiple inline patterns in a single chunk are all ignored
+    #[test]
+    fn test_multiple_inline_patterns_in_chunk_ignored() {
+        let mut parser = StreamingToolParser::new();
+
+        let tools = parser.process_chunk(&chunk(
+            r#"Compare {"tool": "a"} with {"tool": "b"} and {"tool": "c"}"#,
+            true,
+        ));
+
+        assert!(tools.is_empty(), "All inline patterns should be ignored");
+    }
+
+    /// Test streaming where tool call arrives across multiple chunks
+    #[test]
+    fn test_tool_call_split_across_chunks() {
+        let mut parser = StreamingToolParser::new();
+
+        // First chunk: prose then start of tool call on new line
+        let tools = parser.process_chunk(&chunk("Here's the command:\n{\"tool\": ", false));
+        assert!(tools.is_empty(), "Incomplete tool call");
+
+        // Second chunk: rest of tool call
+        let tools = parser.process_chunk(&chunk(
+            r#""shell", "args": {"command": "pwd"}}"#,
+            true,
+        ));
+
+        assert_eq!(tools.len(), 1, "Should detect the complete tool call");
+        assert_eq!(tools[0].tool, "shell");
    }
 }
--- a/crates/g3-core/tests/stream_completion_characterization_test.rs
+++ b/crates/g3-core/tests/stream_completion_characterization_test.rs
@@ -668,46 +668,46 @@ mod streaming_utilities_characterization {
 }

 // =============================================================================
-// Characterization Tests: Parser Sanitization
+// Characterization Tests: Parser Line-Boundary Detection
 // =============================================================================

-mod parser_sanitization_characterization {
-    use g3_core::{sanitize_inline_tool_patterns, LBRACE_HOMOGLYPH};
+mod parser_line_boundary_characterization {
+    use g3_core::StreamingToolParser;

-    /// CHARACTERIZATION: Standalone tool calls are not sanitized
+    /// CHARACTERIZATION: Standalone tool calls at start of text are detected
    #[test]
-    fn standalone_tool_calls_preserved() {
+    fn standalone_tool_calls_detected() {
        let input = r#"{"tool": "shell", "args": {}}"#;
-        let output = sanitize_inline_tool_patterns(input);
-        assert_eq!(output, input, "Standalone tool call should be preserved");
+        let pos = StreamingToolParser::find_first_tool_call_start(input);
+        assert!(pos.is_some(), "Standalone tool call should be detected");
+        assert_eq!(pos.unwrap(), 0, "Should be at position 0");
    }

-    /// CHARACTERIZATION: Inline tool patterns are sanitized
+    /// CHARACTERIZATION: Inline tool patterns are ignored (not detected)
    #[test]
-    fn inline_patterns_sanitized() {
+    fn inline_patterns_ignored() {
        let input = r#"Example: {"tool": "shell"} in text"#;
-        let output = sanitize_inline_tool_patterns(input);
+        let pos = StreamingToolParser::find_first_tool_call_start(input);
        assert!(
-            output.contains(LBRACE_HOMOGLYPH),
-            "Inline pattern should be sanitized: {}",
-            output
-        );
-        assert!(
-            !output.starts_with('{'),
-            "Should not start with regular brace"
+            pos.is_none(),
+            "Inline pattern should be ignored, but found at {:?}",
+            pos
        );
    }

-    /// CHARACTERIZATION: Tool call on its own line is preserved
+    /// CHARACTERIZATION: Tool call on its own line (after newline) is detected
    #[test]
-    fn tool_call_on_own_line_preserved() {
+    fn tool_call_on_own_line_detected() {
        let input = "Some text\n{\"tool\": \"shell\"}\nMore text";
-        let output = sanitize_inline_tool_patterns(input);
-        // The tool call line should be preserved
+        let pos = StreamingToolParser::find_first_tool_call_start(input);
        assert!(
-            output.contains("{\"tool\""),
-            "Tool call on own line should be preserved: {}",
-            output
+            pos.is_some(),
+            "Tool call on own line should be detected"
+        );
+        // Should find it after the newline (position 10 = len("Some text\n"))
+        assert_eq!(
+            pos.unwrap(), 10,
+            "Should find tool call at position after newline"
        );
    }
 }