From 999ac6fe662a7f79e09eb210d27217c6d6be324b Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Thu, 15 Jan 2026 08:54:47 +0530 Subject: [PATCH] fix: prevent parser poisoning from inline tool-call JSON patterns The streaming parser was incorrectly detecting tool call patterns that appeared inline in prose (e.g., when explaining the format), causing g3 to return control mid-task. Fix: Modified find_first_tool_call_start() and find_last_tool_call_start() to only recognize patterns that appear on their own line (at start of buffer or after newline with only whitespace before the pattern). Changes: - Added is_on_own_line() helper to check line-boundary conditions - Updated detection methods to skip inline patterns - Removed sanitize_inline_tool_patterns() and LBRACE_HOMOGLYPH (no longer needed) - Rewrote tests for new behavior - Added streaming_repro tests that use process_chunk() to verify the exact bug scenario 28 tests covering: streaming repro, line boundaries, Unicode, code contexts, edge cases --- crates/g3-core/src/lib.rs | 2 +- crates/g3-core/src/streaming_parser.rs | 257 +++++++------- .../g3-core/tests/parser_sanitization_test.rs | 323 +++++++++++++----- ...stream_completion_characterization_test.rs | 48 +-- 4 files changed, 390 insertions(+), 240 deletions(-) diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 899f580..ffa7bfd 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -93,7 +93,7 @@ pub enum StreamState { } // Re-export StreamingToolParser from its own module -pub use streaming_parser::{sanitize_inline_tool_patterns, StreamingToolParser, LBRACE_HOMOGLYPH}; +pub use streaming_parser::StreamingToolParser; pub struct Agent { providers: ProviderRegistry, diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index f3ffe68..5bf2819 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -2,6 +2,10 @@ //! //! This module handles parsing of tool calls from streaming LLM responses, //! supporting both native tool calls and JSON-based fallback parsing. +//! +//! **Important**: JSON tool calls are only recognized when they appear on their +//! own line (preceded by a newline or at the start of the buffer). This prevents +//! inline JSON examples in prose from being incorrectly parsed as tool calls. use tracing::debug; @@ -16,62 +20,6 @@ const TOOL_CALL_PATTERNS: [&str; 4] = [ r#"{ "tool" :"#, ]; -/// Unicode homoglyph for left curly brace, used to sanitize inline tool-call-like patterns. -/// This is the "FULLWIDTH LEFT CURLY BRACKET" (U+FF5B). -pub const LBRACE_HOMOGLYPH: char = '{'; - -/// Sanitize text content by replacing tool-call-like patterns that appear inline -/// (not on their own line) with homoglyphs to prevent parser poisoning. -/// -/// Real tool calls from LLMs always appear on their own line. When we see patterns -/// like `{"tool":` embedded within other text (e.g., in code examples, inline code, -/// or prose), we replace the opening brace with a homoglyph to prevent the streaming -/// parser from incorrectly entering JSON parsing mode. -/// -/// This function is called on each chunk of streamed content before it's added to -/// the text buffer. -pub fn sanitize_inline_tool_patterns(text: &str) -> String { - let mut result = String::with_capacity(text.len()); - let lines: Vec<&str> = text.split('\n').collect(); - - for (i, line) in lines.iter().enumerate() { - if i > 0 { - result.push('\n'); - } - - // Check if this line starts with a tool call pattern (after trimming whitespace) - let trimmed = line.trim_start(); - let is_standalone_tool_call = TOOL_CALL_PATTERNS.iter().any(|p| trimmed.starts_with(p)); - - if is_standalone_tool_call { - // This looks like a real tool call on its own line - leave it alone - result.push_str(line); - } else { - // Check if there are any tool call patterns embedded in this line - let mut line_result = line.to_string(); - for pattern in &TOOL_CALL_PATTERNS { - // Find all occurrences of the pattern in this line - let mut search_start = 0; - while let Some(pos) = line_result[search_start..].find(pattern) { - let abs_pos = search_start + pos; - // Check if this pattern is at the start of the trimmed line - // (which we already handled above) - let before = &line_result[..abs_pos]; - if !before.trim().is_empty() { - // There's non-whitespace before this pattern - it's inline, sanitize it - let replacement = format!("{}{}", LBRACE_HOMOGLYPH, &pattern[1..]); - line_result = format!("{}{}{}", &line_result[..abs_pos], replacement, &line_result[abs_pos + pattern.len()..]); - } - search_start = abs_pos + pattern.len(); - } - } - result.push_str(&line_result); - } - } - - result -} - /// Modern streaming tool parser that properly handles native tool calls and SSE chunks. #[derive(Debug)] pub struct StreamingToolParser { @@ -105,34 +53,74 @@ impl StreamingToolParser { } } - /// Find the starting position of the last tool call pattern in the given text. - /// Returns None if no tool call pattern is found. - fn find_last_tool_call_start(text: &str) -> Option { + /// Find the starting position of the last tool call pattern in the given text, + /// but ONLY if it appears on its own line (preceded by newline or at start of text, + /// with only whitespace before the pattern on that line). + /// Returns None if no valid tool call pattern is found. + pub fn find_last_tool_call_start(text: &str) -> Option { let mut best_start: Option = None; for pattern in &TOOL_CALL_PATTERNS { - if let Some(pos) = text.rfind(pattern) { - if best_start.map_or(true, |best| pos > best) { - best_start = Some(pos); + let mut search_end = text.len(); + while search_end > 0 { + if let Some(pos) = text[..search_end].rfind(pattern) { + // Check if this pattern is on its own line + if Self::is_on_own_line(text, pos) { + if best_start.map_or(true, |best| pos > best) { + best_start = Some(pos); + } + break; // Found a valid one for this pattern + } + // Not on its own line, keep searching backwards + search_end = pos; + } else { + break; } } } best_start } - /// Find the starting position of the FIRST tool call pattern in the given text. - /// Returns None if no tool call pattern is found. - fn find_first_tool_call_start(text: &str) -> Option { + /// Find the starting position of the FIRST tool call pattern in the given text, + /// but ONLY if it appears on its own line (preceded by newline or at start of text, + /// with only whitespace before the pattern on that line). + /// Returns None if no valid tool call pattern is found. + pub fn find_first_tool_call_start(text: &str) -> Option { let mut best_start: Option = None; for pattern in &TOOL_CALL_PATTERNS { - if let Some(pos) = text.find(pattern) { - if best_start.map_or(true, |best| pos < best) { - best_start = Some(pos); + let mut search_start = 0; + while search_start < text.len() { + if let Some(relative_pos) = text[search_start..].find(pattern) { + let pos = search_start + relative_pos; + // Check if this pattern is on its own line + if Self::is_on_own_line(text, pos) { + if best_start.map_or(true, |best| pos < best) { + best_start = Some(pos); + } + break; // Found a valid one for this pattern + } + // Not on its own line, keep searching forward + search_start = pos + 1; + } else { + break; } } } best_start } + /// Check if a position in text is "on its own line" - meaning it's either + /// at the start of the text, or preceded by a newline with only whitespace + /// between the newline and the position. + pub fn is_on_own_line(text: &str, pos: usize) -> bool { + if pos == 0 { + return true; + } + // Find the start of the current line (position after the last newline before pos) + let line_start = text[..pos].rfind('\n').map(|p| p + 1).unwrap_or(0); + // Check if everything between line_start and pos is whitespace + text[line_start..pos].chars().all(|c| c.is_whitespace()) + } + /// Detect malformed tool calls where LLM prose leaked into JSON keys. /// /// When the LLM "stutters" or mixes formats, it sometimes emits JSON where @@ -157,11 +145,9 @@ impl StreamingToolParser { pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec { let mut completed_tools = Vec::new(); - // Add text content to buffer after sanitizing inline tool-call patterns - // to prevent parser poisoning from examples/code blocks + // Add text content to buffer if !chunk.content.is_empty() { - let sanitized = sanitize_inline_tool_patterns(&chunk.content); - self.text_buffer.push_str(&sanitized); + self.text_buffer.push_str(&chunk.content); } // Handle native tool calls - return them immediately when received. @@ -520,97 +506,98 @@ Some text after"#; #[test] fn test_find_first_vs_last_tool_call() { - let text = r#"{"tool": "first"} and {"tool": "second"}"#; + // Both tool calls are on their own lines + let text = "{\"tool\": \"first\"}\n{\"tool\": \"second\"}"; let first_pos = StreamingToolParser::find_first_tool_call_start(text); let last_pos = StreamingToolParser::find_last_tool_call_start(text); - assert!(first_pos.is_some()); - assert!(last_pos.is_some()); + assert!(first_pos.is_some(), "Should find first tool call"); + assert!(last_pos.is_some(), "Should find last tool call"); assert!(first_pos.unwrap() < last_pos.unwrap(), "First position ({:?}) should be less than last position ({:?})", first_pos, last_pos); } #[test] - fn test_sanitize_inline_tool_patterns_preserves_standalone() { - // Tool call on its own line should NOT be sanitized - let input = r#"{"tool": "shell", "args": {"command": "ls"}}"#; - let result = sanitize_inline_tool_patterns(input); - assert_eq!(result, input, "Standalone tool call should not be modified"); + fn test_inline_tool_call_ignored() { + // Tool call pattern inline with other text should NOT be detected + let text = "Here is an example: {\"tool\": \"shell\"} in text"; + assert!(StreamingToolParser::find_first_tool_call_start(text).is_none(), + "Inline tool call pattern should be ignored"); + assert!(StreamingToolParser::find_last_tool_call_start(text).is_none(), + "Inline tool call pattern should be ignored"); } #[test] - fn test_sanitize_inline_tool_patterns_preserves_indented_standalone() { - // Tool call with leading whitespace should NOT be sanitized - let input = r#" {"tool": "shell", "args": {"command": "ls"}}"#; - let result = sanitize_inline_tool_patterns(input); - assert_eq!(result, input, "Indented standalone tool call should not be modified"); + fn test_standalone_tool_call_detected() { + // Tool call on its own line (at start of text) should be detected + let text = r#"{"tool": "shell", "args": {"command": "ls"}}"#; + assert!(StreamingToolParser::find_first_tool_call_start(text).is_some(), + "Standalone tool call should be detected"); } #[test] - fn test_sanitize_inline_tool_patterns_sanitizes_inline() { - // Tool call pattern embedded in text should be sanitized - let input = "Here is an example: {\"tool\": \"shell\"} in text"; - let result = sanitize_inline_tool_patterns(input); - assert!(!result.contains("{\"tool\":"), "Inline pattern should be sanitized"); - assert!(result.contains(LBRACE_HOMOGLYPH), "Should contain homoglyph"); + fn test_indented_tool_call_detected() { + // Tool call with leading whitespace should be detected + let text = r#" {"tool": "shell", "args": {"command": "ls"}}"#; + assert!(StreamingToolParser::find_first_tool_call_start(text).is_some(), + "Indented tool call should be detected"); } #[test] - fn test_sanitize_inline_tool_patterns_sanitizes_in_code_block() { - // Tool call pattern in inline code should be sanitized - let input = "Use `{\"tool\": \"shell\"}` to run commands"; - let result = sanitize_inline_tool_patterns(input); - assert!(!result.contains("{\"tool\":"), "Pattern in code should be sanitized"); - assert!(result.contains(LBRACE_HOMOGLYPH), "Should contain homoglyph"); + fn test_tool_call_after_newline_detected() { + // Tool call after a newline should be detected + let text = "Some prose here\n{\"tool\": \"shell\", \"args\": {}}"; + let pos = StreamingToolParser::find_first_tool_call_start(text); + assert!(pos.is_some(), "Tool call after newline should be detected"); + assert_eq!(pos.unwrap(), 16, "Should find tool call at position after newline"); } #[test] - fn test_sanitize_inline_tool_patterns_multiline() { - // Mixed: standalone on one line, inline on another - let input = "Some text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}\nMore text"; - let result = sanitize_inline_tool_patterns(input); + fn test_inline_ignored_but_standalone_detected() { + // Mixed: inline on first line (ignored), standalone on second line (detected) + let text = "Some text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}"; + let pos = StreamingToolParser::find_first_tool_call_start(text); + assert!(pos.is_some(), "Should find the standalone tool call"); + // The standalone one starts after the newline + assert!(pos.unwrap() > 30, "Should skip the inline pattern and find the standalone one"); + } + + #[test] + fn test_multiple_inline_patterns_all_ignored() { + // Multiple inline patterns on same line - all should be ignored + let text = "Compare {\"tool\": \"a\"} with {\"tool\": \"b\"}"; + assert!(StreamingToolParser::find_first_tool_call_start(text).is_none(), + "All inline patterns should be ignored"); + } + + #[test] + fn test_is_on_own_line() { + // Test the is_on_own_line helper directly + let text = "prefix {\"tool\":\n {\"tool\":"; - // The inline one should be sanitized - let lines: Vec<&str> = result.lines().collect(); - assert!(lines[0].contains(LBRACE_HOMOGLYPH), "First line should have homoglyph"); + // Position 0 is always on its own line + assert!(StreamingToolParser::is_on_own_line(text, 0)); - // The standalone one should NOT be sanitized - assert!(lines[1].starts_with("{\"tool\":"), "Second line should be unchanged"); - } - - #[test] - fn test_sanitize_inline_tool_patterns_multiple_inline() { - // Multiple inline patterns on same line - let input = "Compare {\"tool\": \"a\"} with {\"tool\": \"b\"}"; - let result = sanitize_inline_tool_patterns(input); + // Position 7 (after "prefix ") is NOT on its own line + assert!(!StreamingToolParser::is_on_own_line(text, 7)); - // Both should be sanitized - assert!(!result.contains("{\"tool\":"), "All inline patterns should be sanitized"); - // Count homoglyphs - should be 2 - let homoglyph_count = result.matches(LBRACE_HOMOGLYPH).count(); - assert_eq!(homoglyph_count, 2, "Should have 2 homoglyphs, got {}", homoglyph_count); + // Position after newline with only whitespace before pattern IS on its own line + let newline_pos = text.find('\n').unwrap(); + assert!(StreamingToolParser::is_on_own_line(text, newline_pos + 11)); // 10 spaces before { } #[test] - fn test_sanitize_inline_tool_patterns_no_false_positives() { - // Regular JSON that doesn't match tool patterns should be unchanged - let input = "Some {\"key\": \"value\"} json"; - let result = sanitize_inline_tool_patterns(input); - assert_eq!(result, input, "Non-tool JSON should not be modified"); - } - - #[test] - fn test_sanitize_inline_tool_patterns_all_pattern_variants() { - // Test all whitespace variants are caught - let inputs = [ - "text { \"tool\":\"x\"} more", - "text {\"tool\" :\"x\"} more", - "text { \"tool\" :\"x\"} more", + fn test_all_pattern_variants_require_own_line() { + // All whitespace variants should require their own line + let patterns = [ + "text { \"tool\":\"x\"}", + "text {\"tool\" :\"x\"}", + "text { \"tool\" :\"x\"}", ]; - for input in inputs { - let result = sanitize_inline_tool_patterns(input); - assert!(result.contains(LBRACE_HOMOGLYPH), "Pattern in '{}' should be sanitized", input); + for pattern in patterns { + assert!(StreamingToolParser::find_first_tool_call_start(pattern).is_none(), + "Inline pattern '{}' should be ignored", pattern); } } } diff --git a/crates/g3-core/tests/parser_sanitization_test.rs b/crates/g3-core/tests/parser_sanitization_test.rs index 5584422..a94cb11 100644 --- a/crates/g3-core/tests/parser_sanitization_test.rs +++ b/crates/g3-core/tests/parser_sanitization_test.rs @@ -1,21 +1,24 @@ -//! Parser Sanitization Edge Case Tests +//! Parser Line-Boundary Detection Tests //! -//! CHARACTERIZATION: These tests verify edge cases for the inline tool pattern -//! sanitization that prevents parser poisoning. +//! CHARACTERIZATION: These tests verify that tool call patterns are only detected +//! when they appear on their own line (at start of text or after a newline with +//! only whitespace before the pattern). //! //! What these tests protect: -//! - Tool call patterns in various contexts (code blocks, quotes, etc.) +//! - Tool call patterns in various contexts (code blocks, quotes, etc.) are IGNORED +//! - Tool calls on their own line are DETECTED //! - Edge cases at line boundaries -//! - Unicode handling in sanitization +//! - Unicode handling //! //! What these tests intentionally do NOT assert: //! - Internal parser state -//! - Exact sanitization implementation +//! - Exact detection implementation //! //! Related commits: -//! - 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns +//! - Original: 4c36cc0: fix: prevent parser poisoning from inline tool-call JSON patterns +//! - Updated: Line-boundary detection instead of sanitization -use g3_core::streaming_parser::sanitize_inline_tool_patterns; +use g3_core::StreamingToolParser; // ============================================================================= // Test: Code block contexts @@ -24,35 +27,35 @@ use g3_core::streaming_parser::sanitize_inline_tool_patterns; mod code_block_contexts { use super::*; - /// Test tool pattern in markdown inline code + /// Test tool pattern in markdown inline code - should be IGNORED #[test] fn test_inline_code_backticks() { let input = "Use `{\"tool\": \"shell\"}` to run commands"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // Should be sanitized since it's inline - assert!(!result.contains("{\"tool\":"), "Inline code should be sanitized"); + // Should be ignored since it's inline + assert!(result.is_none(), "Inline code should be ignored"); } - /// Test tool pattern after code fence (should NOT be sanitized) + /// Test tool pattern after code fence (should be DETECTED - it's on its own line) #[test] fn test_after_code_fence_standalone() { // Tool call on its own line after a code fence marker let input = "```\n{\"tool\": \"shell\", \"args\": {}}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // The tool call is on its own line, should NOT be sanitized - let lines: Vec<&str> = result.lines().collect(); - assert!(lines[1].starts_with("{\"tool\":"), "Standalone after fence should not be sanitized"); + // The tool call is on its own line, should be detected + assert!(result.is_some(), "Standalone after fence should be detected"); + assert_eq!(result.unwrap(), 4, "Should be at position 4 (after ```\\n)"); } - /// Test tool pattern in prose explanation + /// Test tool pattern in prose explanation - should be IGNORED #[test] fn test_prose_explanation() { let input = "The format is {\"tool\": \"name\", \"args\": {...}} where name is the tool"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - assert!(!result.contains("{\"tool\":"), "Prose should be sanitized"); + assert!(result.is_none(), "Prose should be ignored"); } } @@ -67,40 +70,43 @@ mod line_boundary_cases { #[test] fn test_empty_lines_before_tool_call() { let input = "\n\n{\"tool\": \"shell\", \"args\": {}}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // Tool call is on its own line (after empty lines), should NOT be sanitized - assert!(result.contains("{\"tool\":"), "Standalone after empty lines should not be sanitized"); + // Tool call is on its own line (after empty lines), should be detected + assert!(result.is_some(), "Standalone after empty lines should be detected"); + assert_eq!(result.unwrap(), 2, "Should be at position 2 (after two newlines)"); } /// Test whitespace-only lines #[test] fn test_whitespace_only_lines() { let input = " \n \n{\"tool\": \"shell\", \"args\": {}}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // Tool call is on its own line, should NOT be sanitized - assert!(result.contains("{\"tool\":"), "Standalone after whitespace lines should not be sanitized"); + // Tool call is on its own line, should be detected + assert!(result.is_some(), "Standalone after whitespace lines should be detected"); } /// Test tool call with leading whitespace (indented) #[test] fn test_indented_tool_call() { let input = " {\"tool\": \"shell\", \"args\": {}}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // Indented but on its own line, should NOT be sanitized - assert!(result.contains("{\"tool\":"), "Indented standalone should not be sanitized"); + // Indented but on its own line, should be detected + assert!(result.is_some(), "Indented standalone should be detected"); + assert_eq!(result.unwrap(), 4, "Should be at position 4 (after 4 spaces)"); } /// Test tool call with tabs #[test] fn test_tab_indented_tool_call() { let input = "\t{\"tool\": \"shell\", \"args\": {}}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // Tab-indented but on its own line, should NOT be sanitized - assert!(result.contains("{\"tool\":"), "Tab-indented standalone should not be sanitized"); + // Tab-indented but on its own line, should be detected + assert!(result.is_some(), "Tab-indented standalone should be detected"); + assert_eq!(result.unwrap(), 1, "Should be at position 1 (after tab)"); } } @@ -111,44 +117,55 @@ mod line_boundary_cases { mod unicode_handling { use super::*; - /// Test tool pattern after emoji + /// Test tool pattern after emoji - should be IGNORED (inline) #[test] fn test_after_emoji() { let input = "🔧 {\"tool\": \"shell\"}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // Emoji before means it's inline, should be sanitized - assert!(!result.contains("{\"tool\":"), "After emoji should be sanitized"); + // Emoji before means it's inline, should be ignored + assert!(result.is_none(), "After emoji should be ignored"); } - /// Test tool pattern after bullet point + /// Test tool pattern after bullet point - should be IGNORED (inline) #[test] fn test_after_bullet() { let input = "• {\"tool\": \"shell\"}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // Bullet before means it's inline, should be sanitized - assert!(!result.contains("{\"tool\":"), "After bullet should be sanitized"); + // Bullet before means it's inline, should be ignored + assert!(result.is_none(), "After bullet should be ignored"); } - /// Test tool pattern after CJK text + /// Test tool pattern after CJK text - should be IGNORED (inline) #[test] fn test_after_cjk() { let input = "使用 {\"tool\": \"shell\"} 命令"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // CJK text before means it's inline, should be sanitized - assert!(!result.contains("{\"tool\":"), "After CJK should be sanitized"); + // CJK text before means it's inline, should be ignored + assert!(result.is_none(), "After CJK should be ignored"); } - /// Test tool pattern with Unicode in args (should still detect pattern) + /// Test tool pattern with Unicode in args on its own line - should be DETECTED #[test] - fn test_unicode_in_args() { - let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}"; - let result = sanitize_inline_tool_patterns(input); + fn test_unicode_in_args_standalone() { + let input = "{\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}"; + let result = StreamingToolParser::find_first_tool_call_start(input); - // Should be sanitized (inline) - assert!(!result.contains("{\"tool\":"), "Unicode in args should still be detected"); + // Standalone, should be detected + assert!(result.is_some(), "Unicode in args standalone should be detected"); + assert_eq!(result.unwrap(), 0, "Should be at position 0"); + } + + /// Test tool pattern with Unicode in args inline - should be IGNORED + #[test] + fn test_unicode_in_args_inline() { + let input = "Example: {\"tool\": \"shell\", \"args\": {\"command\": \"echo 你好\"}}"; + let result = StreamingToolParser::find_first_tool_call_start(input); + + // Inline, should be ignored + assert!(result.is_none(), "Unicode in args inline should be ignored"); } } @@ -159,37 +176,35 @@ mod unicode_handling { mod multiple_patterns { use super::*; - /// Test three tool patterns on one line + /// Test three tool patterns on one line - all should be IGNORED #[test] fn test_three_patterns() { let input = "Compare {\"tool\": \"a\"} vs {\"tool\": \"b\"} vs {\"tool\": \"c\"}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - // All should be sanitized - assert!(!result.contains("{\"tool\":"), "All three should be sanitized"); + // All are inline, should be ignored + assert!(result.is_none(), "All three inline should be ignored"); } - /// Test mixed: one standalone, one inline + /// Test mixed: one inline (ignored), one standalone (detected) #[test] fn test_mixed_standalone_and_inline() { let input = "Text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}"; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - let lines: Vec<&str> = result.lines().collect(); - - // First line should have sanitized pattern - assert!(!lines[0].contains("{\"tool\":"), "Inline should be sanitized"); - - // Second line should NOT be sanitized (standalone) - assert!(lines[1].starts_with("{\"tool\":"), "Standalone should not be sanitized"); + // Should find the standalone one, not the inline one + assert!(result.is_some(), "Should find standalone"); + // The standalone one starts after the newline + let newline_pos = input.find('\n').unwrap(); + assert_eq!(result.unwrap(), newline_pos + 1, "Should find standalone after newline"); } } // ============================================================================= -// Test: Edge cases that should NOT trigger sanitization +// Test: Edge cases that should NOT be detected (not tool patterns) // ============================================================================= -mod no_sanitization_cases { +mod no_detection_cases { use super::*; /// Test similar but not matching patterns @@ -203,8 +218,8 @@ mod no_sanitization_cases { ]; for input in inputs { - let result = sanitize_inline_tool_patterns(input); - assert_eq!(result, input, "'{}' should not be modified", input); + let result = StreamingToolParser::find_first_tool_call_start(input); + assert!(result.is_none(), "'{}' should not be detected", input); } } @@ -218,8 +233,8 @@ mod no_sanitization_cases { ]; for input in inputs { - let result = sanitize_inline_tool_patterns(input); - assert_eq!(result, input, "'{}' should not be modified", input); + let result = StreamingToolParser::find_first_tool_call_start(input); + assert!(result.is_none(), "'{}' should not be detected", input); } } @@ -227,8 +242,8 @@ mod no_sanitization_cases { #[test] fn test_tool_as_value() { let input = "{\"name\": \"tool\"}"; - let result = sanitize_inline_tool_patterns(input); - assert_eq!(result, input, "'tool' as value should not trigger sanitization"); + let result = StreamingToolParser::find_first_tool_call_start(input); + assert!(result.is_none(), "'tool' as value should not trigger detection"); } } @@ -249,22 +264,170 @@ For example: This will execute the command."#; - let result = sanitize_inline_tool_patterns(input); - let lines: Vec<&str> = result.lines().collect(); + let result = StreamingToolParser::find_first_tool_call_start(input); - // First line has inline pattern - should be sanitized - assert!(!lines[0].contains("{\"tool\":"), "Inline in docs should be sanitized"); + // Should find the standalone example, not the inline one + assert!(result.is_some(), "Should find standalone example"); - // The standalone example should NOT be sanitized - assert!(lines[3].starts_with("{\"tool\":"), "Standalone example should not be sanitized"); + // The standalone example is on line 4 (0-indexed line 3) + // "To call a tool...\n\nFor example:\n" = 64 + 1 + 13 + 1 = 79 chars before it + // Actually let's just verify it's NOT at position 33 (the inline one) + assert!(result.unwrap() > 50, "Should skip inline and find standalone"); } - /// Test code example in prose + /// Test code example in prose - should be IGNORED #[test] fn test_code_in_prose() { let input = "The agent responds with {\"tool\": \"read_file\"} when it needs to read files."; - let result = sanitize_inline_tool_patterns(input); + let result = StreamingToolParser::find_first_tool_call_start(input); - assert!(!result.contains("{\"tool\":"), "Code in prose should be sanitized"); + assert!(result.is_none(), "Code in prose should be ignored"); + } + + /// Test the exact scenario from the bug: LLM explaining tool format + #[test] + fn test_llm_explanation_scenario() { + let input = r#"I'll use the shell tool. The format is {"tool": "shell", "args": {...}}. + +{"tool": "shell", "args": {"command": "ls -la"}}"#; + + let result = StreamingToolParser::find_first_tool_call_start(input); + + // Should find the actual tool call, not the explanation + assert!(result.is_some(), "Should find actual tool call"); + + // The actual tool call is on the last line, after two newlines + let last_newline = input.rfind('\n').unwrap(); + assert_eq!(result.unwrap(), last_newline + 1, "Should find tool call on last line"); + } +} + +// ============================================================================= +// Test: is_on_own_line helper function +// ============================================================================= + +mod is_on_own_line_tests { + use super::*; + + #[test] + fn test_position_zero() { + assert!(StreamingToolParser::is_on_own_line("anything", 0)); + } + + #[test] + fn test_after_newline_no_whitespace() { + let text = "line1\nline2"; + assert!(StreamingToolParser::is_on_own_line(text, 6)); // position of 'l' in line2 + } + + #[test] + fn test_after_newline_with_whitespace() { + let text = "line1\n indented"; + assert!(StreamingToolParser::is_on_own_line(text, 8)); // position of 'i' in indented + } + + #[test] + fn test_middle_of_line() { + let text = "some text here"; + assert!(!StreamingToolParser::is_on_own_line(text, 5)); // position of 't' in text + } + + #[test] + fn test_after_non_whitespace() { + let text = "prefix{"; + assert!(!StreamingToolParser::is_on_own_line(text, 6)); // position of '{' + } +} + +// ============================================================================= +// Test: End-to-end streaming repro of the parser poisoning bug +// ============================================================================= + +mod streaming_repro { + use super::*; + use g3_providers::CompletionChunk; + + fn chunk(content: &str, finished: bool) -> CompletionChunk { + CompletionChunk { + content: content.to_string(), + finished, + tool_calls: None, + usage: None, + stop_reason: None, + } + } + + /// EXACT REPRO: LLM explains tool format inline, then emits real tool call. + /// Before the fix, the parser would detect the inline pattern and try to + /// parse it as a tool call, causing premature return of control. + #[test] + fn test_inline_explanation_does_not_trigger_tool_detection() { + let mut parser = StreamingToolParser::new(); + + // Simulate streaming chunks as the LLM explains tool format + let tools = parser.process_chunk(&chunk( + "I'll help you with that. The tool call format is ", + false, + )); + assert!(tools.is_empty(), "No tool call yet"); + + // THIS IS THE BUG: inline JSON pattern in explanation + let tools = parser.process_chunk(&chunk( + r#"{"tool": "shell", "args": {...}}"#, + false, + )); + // Before fix: this would incorrectly detect a tool call + // After fix: this should be ignored (it's inline, not on its own line) + assert!(tools.is_empty(), "Inline pattern should NOT trigger tool detection"); + + // More explanation + let tools = parser.process_chunk(&chunk( + " where you specify the command.\n\n", + false, + )); + assert!(tools.is_empty(), "Still no tool call"); + + // NOW the real tool call on its own line + let tools = parser.process_chunk(&chunk( + r#"{"tool": "shell", "args": {"command": "ls -la"}}"#, + true, + )); + + // Should detect exactly ONE tool call - the real one + assert_eq!(tools.len(), 1, "Should detect exactly one tool call"); + assert_eq!(tools[0].tool, "shell"); + assert_eq!(tools[0].args["command"], "ls -la"); + } + + /// Test that multiple inline patterns in a single chunk are all ignored + #[test] + fn test_multiple_inline_patterns_in_chunk_ignored() { + let mut parser = StreamingToolParser::new(); + + let tools = parser.process_chunk(&chunk( + r#"Compare {"tool": "a"} with {"tool": "b"} and {"tool": "c"}"#, + true, + )); + + assert!(tools.is_empty(), "All inline patterns should be ignored"); + } + + /// Test streaming where tool call arrives across multiple chunks + #[test] + fn test_tool_call_split_across_chunks() { + let mut parser = StreamingToolParser::new(); + + // First chunk: prose then start of tool call on new line + let tools = parser.process_chunk(&chunk("Here's the command:\n{\"tool\": ", false)); + assert!(tools.is_empty(), "Incomplete tool call"); + + // Second chunk: rest of tool call + let tools = parser.process_chunk(&chunk( + r#""shell", "args": {"command": "pwd"}}"#, + true, + )); + + assert_eq!(tools.len(), 1, "Should detect the complete tool call"); + assert_eq!(tools[0].tool, "shell"); } } diff --git a/crates/g3-core/tests/stream_completion_characterization_test.rs b/crates/g3-core/tests/stream_completion_characterization_test.rs index 052b817..13fc7c1 100644 --- a/crates/g3-core/tests/stream_completion_characterization_test.rs +++ b/crates/g3-core/tests/stream_completion_characterization_test.rs @@ -668,46 +668,46 @@ mod streaming_utilities_characterization { } // ============================================================================= -// Characterization Tests: Parser Sanitization +// Characterization Tests: Parser Line-Boundary Detection // ============================================================================= -mod parser_sanitization_characterization { - use g3_core::{sanitize_inline_tool_patterns, LBRACE_HOMOGLYPH}; +mod parser_line_boundary_characterization { + use g3_core::StreamingToolParser; - /// CHARACTERIZATION: Standalone tool calls are not sanitized + /// CHARACTERIZATION: Standalone tool calls at start of text are detected #[test] - fn standalone_tool_calls_preserved() { + fn standalone_tool_calls_detected() { let input = r#"{"tool": "shell", "args": {}}"#; - let output = sanitize_inline_tool_patterns(input); - assert_eq!(output, input, "Standalone tool call should be preserved"); + let pos = StreamingToolParser::find_first_tool_call_start(input); + assert!(pos.is_some(), "Standalone tool call should be detected"); + assert_eq!(pos.unwrap(), 0, "Should be at position 0"); } - /// CHARACTERIZATION: Inline tool patterns are sanitized + /// CHARACTERIZATION: Inline tool patterns are ignored (not detected) #[test] - fn inline_patterns_sanitized() { + fn inline_patterns_ignored() { let input = r#"Example: {"tool": "shell"} in text"#; - let output = sanitize_inline_tool_patterns(input); + let pos = StreamingToolParser::find_first_tool_call_start(input); assert!( - output.contains(LBRACE_HOMOGLYPH), - "Inline pattern should be sanitized: {}", - output - ); - assert!( - !output.starts_with('{'), - "Should not start with regular brace" + pos.is_none(), + "Inline pattern should be ignored, but found at {:?}", + pos ); } - /// CHARACTERIZATION: Tool call on its own line is preserved + /// CHARACTERIZATION: Tool call on its own line (after newline) is detected #[test] - fn tool_call_on_own_line_preserved() { + fn tool_call_on_own_line_detected() { let input = "Some text\n{\"tool\": \"shell\"}\nMore text"; - let output = sanitize_inline_tool_patterns(input); - // The tool call line should be preserved + let pos = StreamingToolParser::find_first_tool_call_start(input); assert!( - output.contains("{\"tool\""), - "Tool call on own line should be preserved: {}", - output + pos.is_some(), + "Tool call on own line should be detected" + ); + // Should find it after the newline (position 10 = len("Some text\n")) + assert_eq!( + pos.unwrap(), 10, + "Should find tool call at position after newline" ); } }