fix: detect invalidated JSON tool calls to prevent parser poisoning
When partial JSON tool call patterns appear in LLM output (e.g., from quoting file content), the parser would incorrectly report them as "incomplete tool calls", triggering auto-continue loops. Fix: Added is_json_invalidated() to detect when partial JSON has been invalidated by subsequent content that cannot be valid JSON: - Unescaped newline inside a string (invalid JSON) - Newline followed by prose text outside a string The check is only applied to incomplete JSON - complete tool calls with trailing text are still correctly detected. Added 6 new tests covering: - Tool results with partial JSON patterns - LLM quoting file content inline vs on own line - Comment prefixes (// # -- etc) with partial patterns - Real incomplete tool calls (should still be detected)
This commit is contained in:
@@ -195,6 +195,16 @@ impl StreamingToolParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Fallback method to parse JSON tool calls from text content.
|
/// Fallback method to parse JSON tool calls from text content.
|
||||||
|
///
|
||||||
|
/// This method maintains state (`in_json_tool_call`, `json_tool_start`) to track
|
||||||
|
/// partial JSON tool calls across streaming chunks. When a pattern like `{"tool":`
|
||||||
|
/// is found on its own line, we enter "in JSON tool call" mode and wait for the
|
||||||
|
/// JSON to complete.
|
||||||
|
///
|
||||||
|
/// IMPORTANT: We must also detect when the JSON has been **invalidated** - i.e.,
|
||||||
|
/// when subsequent content makes it clear this isn't a real tool call. For example:
|
||||||
|
/// - `{"tool": "read_file` followed by `\nsome regular text` is NOT a tool call
|
||||||
|
/// - The newline followed by non-JSON text invalidates the partial JSON
|
||||||
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
|
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
|
||||||
// If we're not currently in a JSON tool call, look for the start
|
// If we're not currently in a JSON tool call, look for the start
|
||||||
if !self.in_json_tool_call {
|
if !self.in_json_tool_call {
|
||||||
@@ -247,6 +257,17 @@ impl StreamingToolParser {
|
|||||||
self.in_json_tool_call = false;
|
self.in_json_tool_call = false;
|
||||||
self.json_tool_start = None;
|
self.json_tool_start = None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we didn't find a complete JSON object, check if the partial JSON
|
||||||
|
// has been invalidated by subsequent content (e.g., a newline followed
|
||||||
|
// by regular text when not inside a string).
|
||||||
|
if self.in_json_tool_call && Self::is_json_invalidated(json_text) {
|
||||||
|
debug!("JSON tool call invalidated by subsequent content, clearing state");
|
||||||
|
self.in_json_tool_call = false;
|
||||||
|
self.json_tool_start = None;
|
||||||
|
self.last_consumed_position = start_pos + json_text.len();
|
||||||
|
return None;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -324,8 +345,16 @@ impl StreamingToolParser {
|
|||||||
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
|
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
|
||||||
if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
|
if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
|
||||||
let json_text = &unchecked_buffer[start_pos..];
|
let json_text = &unchecked_buffer[start_pos..];
|
||||||
// If NOT complete, it's an incomplete tool call
|
// If JSON is complete, it's not incomplete
|
||||||
Self::find_complete_json_object_end(json_text).is_none()
|
if Self::find_complete_json_object_end(json_text).is_some() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// If JSON has been invalidated by subsequent content, it's not a real tool call
|
||||||
|
if Self::is_json_invalidated(json_text) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Otherwise, it's a genuinely incomplete tool call
|
||||||
|
true
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
@@ -354,6 +383,80 @@ impl StreamingToolParser {
|
|||||||
self.last_consumed_position = self.text_buffer.len();
|
self.last_consumed_position = self.text_buffer.len();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if a partial JSON tool call has been invalidated by subsequent content.
|
||||||
|
///
|
||||||
|
/// This detects cases where we started parsing what looked like a tool call
|
||||||
|
/// (e.g., `{"tool": "read_file`) but subsequent content makes it clear this
|
||||||
|
/// isn't valid JSON (e.g., a newline followed by regular prose).
|
||||||
|
///
|
||||||
|
/// The key insight: in valid JSON, after an open quote for a string value,
|
||||||
|
/// we must see the string content and closing quote before any unescaped newline.
|
||||||
|
/// If we see a newline followed by text that looks like prose (starts with a letter),
|
||||||
|
/// this can't be a valid JSON tool call.
|
||||||
|
///
|
||||||
|
/// Additionally, an unescaped newline INSIDE a string is invalid JSON. So if we're
|
||||||
|
/// in a string and see a newline (not escaped), the JSON is invalid.
|
||||||
|
fn is_json_invalidated(json_text: &str) -> bool {
|
||||||
|
let mut in_string = false;
|
||||||
|
let mut escape_next = false;
|
||||||
|
let mut chars = json_text.char_indices().peekable();
|
||||||
|
|
||||||
|
while let Some((_, ch)) = chars.next() {
|
||||||
|
if escape_next {
|
||||||
|
escape_next = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match ch {
|
||||||
|
'\\' => escape_next = true,
|
||||||
|
'"' => in_string = !in_string,
|
||||||
|
'\n' if in_string => {
|
||||||
|
// Unescaped newline inside a string is invalid JSON!
|
||||||
|
// Valid JSON strings cannot contain literal newlines (must be \n).
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
'\n' if !in_string => {
|
||||||
|
// We hit a newline outside of a string.
|
||||||
|
// Check what comes after - if it's regular prose, this isn't valid JSON.
|
||||||
|
|
||||||
|
// Skip any whitespace after the newline
|
||||||
|
while let Some(&(_, next_ch)) = chars.peek() {
|
||||||
|
if next_ch == ' ' || next_ch == '\t' {
|
||||||
|
chars.next();
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the first non-whitespace character after the newline
|
||||||
|
if let Some(&(_, next_ch)) = chars.peek() {
|
||||||
|
// Valid JSON continuation characters after newline:
|
||||||
|
// - '"' (string)
|
||||||
|
// - '{' or '}' (object)
|
||||||
|
// - '[' or ']' (array)
|
||||||
|
// - digits (number)
|
||||||
|
// - 't', 'f', 'n' could be true/false/null but also prose
|
||||||
|
// - ':' or ',' (separators)
|
||||||
|
//
|
||||||
|
// If we see a letter that's clearly prose (not t/f/n at start of token),
|
||||||
|
// or other non-JSON characters, this is invalidated.
|
||||||
|
let is_valid_json_continuation = matches!(next_ch,
|
||||||
|
'"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' |
|
||||||
|
'0'..='9' | 't' | 'f' | 'n' | '\n'
|
||||||
|
);
|
||||||
|
|
||||||
|
if !is_valid_json_continuation {
|
||||||
|
return true; // Invalidated!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false // Not invalidated (yet)
|
||||||
|
}
|
||||||
|
|
||||||
/// Find the end position (byte index) of a complete JSON object in the text.
|
/// Find the end position (byte index) of a complete JSON object in the text.
|
||||||
/// Returns None if no complete JSON object is found.
|
/// Returns None if no complete JSON object is found.
|
||||||
pub fn find_complete_json_object_end(text: &str) -> Option<usize> {
|
pub fn find_complete_json_object_end(text: &str) -> Option<usize> {
|
||||||
|
|||||||
263
crates/g3-core/tests/tool_result_poisoning_test.rs
Normal file
263
crates/g3-core/tests/tool_result_poisoning_test.rs
Normal file
@@ -0,0 +1,263 @@
|
|||||||
|
//! Tool Result Poisoning Test
|
||||||
|
//!
|
||||||
|
//! This test reproduces the bug where partial JSON tool call patterns in tool results
|
||||||
|
//! (e.g., from reading a source file with comments containing tool call examples)
|
||||||
|
//! incorrectly trigger the parser's incomplete tool call detection.
|
||||||
|
//!
|
||||||
|
//! The key insight: tool results should NEVER be searched for JSON tool calls.
|
||||||
|
//! Only the LLM's actual response text should be parsed for tool calls.
|
||||||
|
|
||||||
|
use g3_core::StreamingToolParser;
|
||||||
|
use g3_providers::CompletionChunk;
|
||||||
|
|
||||||
|
/// Helper to create a completion chunk
|
||||||
|
fn chunk(content: &str, finished: bool) -> CompletionChunk {
|
||||||
|
CompletionChunk {
|
||||||
|
content: content.to_string(),
|
||||||
|
finished,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
stop_reason: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Simulates a file that contains a partial tool call pattern in a comment.
|
||||||
|
/// This is the kind of content that would be returned by `shell` with `cat file.rs`.
|
||||||
|
const FILE_WITH_TOOL_CALL_COMMENT: &str = r#"//! Example module
|
||||||
|
//!
|
||||||
|
//! To call a tool, use this format:
|
||||||
|
//! {"tool": "shell", "args": {"command": "ls"}}
|
||||||
|
//!
|
||||||
|
//! Or for reading files:
|
||||||
|
//! {"tool": "read_file
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
println!("Hello, world!");
|
||||||
|
}
|
||||||
|
"#;
|
||||||
|
|
||||||
|
/// REPRO: Tool result containing partial JSON tool call pattern should NOT
|
||||||
|
/// cause has_incomplete_tool_call() to return true.
|
||||||
|
///
|
||||||
|
/// Scenario:
|
||||||
|
/// 1. LLM calls shell tool: `cat file.rs`
|
||||||
|
/// 2. Tool returns file content containing `{"tool": "read_file` (incomplete JSON in comment)
|
||||||
|
/// 3. Tool result is added to context (as a User message, not through parser)
|
||||||
|
/// 4. LLM streams its next response: "I can see the file contains..."
|
||||||
|
/// 5. At end of stream, has_incomplete_tool_call() should be FALSE
|
||||||
|
///
|
||||||
|
/// The bug: has_incomplete_tool_call() was returning TRUE because it found
|
||||||
|
/// the partial pattern in... somewhere. Let's find out where.
|
||||||
|
#[test]
|
||||||
|
fn test_tool_result_with_partial_json_does_not_poison_parser() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// Step 1: LLM streams a tool call
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
r#"{"tool": "shell", "args": {"command": "cat file.rs"}}"#,
|
||||||
|
true,
|
||||||
|
));
|
||||||
|
assert_eq!(tools.len(), 1, "Should detect the shell tool call");
|
||||||
|
assert_eq!(tools[0].tool, "shell");
|
||||||
|
|
||||||
|
// Step 2: Tool executes and returns file content (this goes to context, not parser)
|
||||||
|
// The tool result contains a partial JSON tool call pattern in a comment:
|
||||||
|
// {"tool": "read_file
|
||||||
|
// This is NOT fed to the parser - it goes to context_window.add_message()
|
||||||
|
let _tool_result = FILE_WITH_TOOL_CALL_COMMENT;
|
||||||
|
|
||||||
|
// Step 3: Parser is reset for next LLM response
|
||||||
|
parser.reset();
|
||||||
|
|
||||||
|
// Step 4: LLM streams its response after seeing the tool result
|
||||||
|
// The LLM does NOT emit any tool calls, just commentary
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
"I can see the file contains a main function that prints 'Hello, world!'.",
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty(), "No tool calls in this response");
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
" The file also has some documentation comments with examples.",
|
||||||
|
true,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty(), "No tool calls in this response");
|
||||||
|
|
||||||
|
// Step 5: CRITICAL - has_incomplete_tool_call should be FALSE
|
||||||
|
// The parser should NOT think there's an incomplete tool call
|
||||||
|
assert!(
|
||||||
|
!parser.has_incomplete_tool_call(),
|
||||||
|
"Parser should NOT detect incomplete tool call - tool results are not in parser buffer"
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
!parser.has_unexecuted_tool_call(),
|
||||||
|
"Parser should NOT detect unexecuted tool call"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// REPRO: What if the LLM quotes the file content in its response?
|
||||||
|
/// This is the trickier case - the LLM might say:
|
||||||
|
/// "I see the file has a comment with {"tool": "read_file..."
|
||||||
|
///
|
||||||
|
/// Even in this case, the partial JSON is INLINE (not on its own line),
|
||||||
|
/// so it should be ignored by our line-boundary detection.
|
||||||
|
#[test]
|
||||||
|
fn test_llm_quoting_file_content_inline_does_not_poison() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// LLM quotes the file content inline in its response
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
r#"I see the file has a comment showing the format: {"tool": "read_file"#,
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty(), "Inline quote should not trigger tool detection");
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
" which is incomplete in the example.",
|
||||||
|
true,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty(), "No tool calls");
|
||||||
|
|
||||||
|
// Should NOT detect incomplete tool call - the pattern was inline
|
||||||
|
assert!(
|
||||||
|
!parser.has_incomplete_tool_call(),
|
||||||
|
"Inline quoted pattern should NOT be detected as incomplete tool call"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// REPRO: What if the LLM quotes the file content on its own line?
|
||||||
|
/// This is the problematic case:
|
||||||
|
///
|
||||||
|
/// "The file contains:\n{"tool": "read_file\n"
|
||||||
|
///
|
||||||
|
/// The pattern IS on its own line, but it's clearly not a real tool call -
|
||||||
|
/// it's the LLM quoting file content.
|
||||||
|
#[test]
|
||||||
|
fn test_llm_quoting_file_content_on_own_line_should_not_poison() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// LLM quotes the file content, putting the example on its own line
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
"The file contains this example:\n",
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty());
|
||||||
|
|
||||||
|
// This is the problematic chunk - partial JSON on its own line
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
r#"{"tool": "read_file"#,
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
// Currently this might incorrectly set in_json_tool_call = true
|
||||||
|
// But it should NOT return a tool call since JSON is incomplete
|
||||||
|
assert!(tools.is_empty(), "Incomplete JSON should not return tool call");
|
||||||
|
|
||||||
|
// More content follows that makes it clear this isn't a real tool call
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
"\nwhich shows how to read files.",
|
||||||
|
true,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty());
|
||||||
|
|
||||||
|
// THE BUG: This currently returns true because the parser found
|
||||||
|
// {"tool": "read_file on its own line and the JSON never completed.
|
||||||
|
//
|
||||||
|
// The fix: When we see content AFTER the partial JSON that isn't valid
|
||||||
|
// JSON continuation (like a newline followed by regular text), we should
|
||||||
|
// clear the in_json_tool_call state.
|
||||||
|
assert!(
|
||||||
|
!parser.has_incomplete_tool_call(),
|
||||||
|
"Quoted example followed by regular text should NOT be detected as incomplete tool call"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test that a REAL incomplete tool call IS detected.
|
||||||
|
/// This is the legitimate case we want to catch.
|
||||||
|
#[test]
|
||||||
|
fn test_real_incomplete_tool_call_is_detected() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// LLM starts emitting a real tool call but stream gets cut off
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
"Let me check that file.\n",
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty());
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
r#"{"tool": "read_file", "args": {"file_path": "src/main"#,
|
||||||
|
true, // Stream ends abruptly!
|
||||||
|
));
|
||||||
|
// JSON is incomplete, so no tool call returned
|
||||||
|
assert!(tools.is_empty());
|
||||||
|
|
||||||
|
// This IS a real incomplete tool call - it's valid JSON so far,
|
||||||
|
// just cut off mid-stream
|
||||||
|
assert!(
|
||||||
|
parser.has_incomplete_tool_call(),
|
||||||
|
"Real incomplete tool call SHOULD be detected"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// REPRO: LLM quotes file content that has a comment with partial tool call.
|
||||||
|
/// The pattern is NOT on its own line because it's prefixed with `//`.
|
||||||
|
///
|
||||||
|
/// Example: "//! {"tool": "read_file"
|
||||||
|
///
|
||||||
|
/// This should be ignored because `//` precedes the pattern on the same line.
|
||||||
|
#[test]
|
||||||
|
fn test_llm_quoting_comment_with_partial_tool_call() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// LLM shows the file content including a comment line
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
"The file has this documentation:\n",
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty());
|
||||||
|
|
||||||
|
// Comment line with partial tool call - NOT on its own line due to `//!` prefix
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
r#"//! {"tool": "read_file"#,
|
||||||
|
false,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty(), "Comment prefix means pattern is not on its own line");
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(
|
||||||
|
"\nAnd more documentation follows.",
|
||||||
|
true,
|
||||||
|
));
|
||||||
|
assert!(tools.is_empty());
|
||||||
|
|
||||||
|
// Should NOT detect incomplete tool call - pattern was not on its own line
|
||||||
|
assert!(
|
||||||
|
!parser.has_incomplete_tool_call(),
|
||||||
|
"Pattern with // prefix should NOT be detected as incomplete tool call"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// REPRO: Multiple comment styles that should all be ignored.
|
||||||
|
#[test]
|
||||||
|
fn test_various_comment_prefixes_with_partial_tool_calls() {
|
||||||
|
let test_cases = [
|
||||||
|
("// {\"tool\": \"shell\"", "C-style line comment"),
|
||||||
|
("//! {\"tool\": \"shell\"", "Rust doc comment"),
|
||||||
|
("/// {\"tool\": \"shell\"", "Rust doc comment alt"),
|
||||||
|
("# {\"tool\": \"shell\"", "Python/shell comment"),
|
||||||
|
("-- {\"tool\": \"shell\"", "SQL comment"),
|
||||||
|
("* {\"tool\": \"shell\"", "Block comment continuation"),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (input, description) in test_cases {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk(input, true));
|
||||||
|
assert!(tools.is_empty(), "{}: should not detect tool call", description);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
!parser.has_incomplete_tool_call(),
|
||||||
|
"{}: should NOT report incomplete tool call",
|
||||||
|
description
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user