fix: strip duplicate tool call JSON from assistant messages when LLM stutters

When the LLM emits identical JSON tool calls as text content (JSON
fallback mode), the raw duplicate JSON was being stored in the assistant
message in conversation history. This confused the model on subsequent
turns, causing it to stall or repeat itself.

Root cause: raw_content_for_log used get_text_content() which returns
the full parser buffer including all duplicate tool call JSONs.

Fix: Added get_text_before_tool_calls() to StreamingToolParser that
returns only the text before the first JSON tool call. Changed
raw_content_for_log to use this method so the assistant message only
contains the preamble text + the single executed tool call.

Added 5 integration tests covering stuttered duplicates, triple
stutter, cross-turn dedup, and different-args boundary case.

Added MockResponse helpers for simulating LLM stutter patterns.
This commit is contained in:
Dhanji R. Prasanna
2026-02-10 19:53:11 +11:00
parent f9625f1a2d
commit 2a4cd1f4d6
4 changed files with 302 additions and 2 deletions

View File

@@ -231,7 +231,59 @@ impl MockResponse {
}
}
/// Create a response that gets cut off by max_tokens
/// Create a response with duplicate JSON tool calls in text content (non-native).
/// Mimics the LLM stuttering pattern where it emits the same tool call twice.
pub fn text_with_duplicate_json_tools(tool: &str, args: serde_json::Value) -> Self {
let args_str = serde_json::to_string(&args).unwrap();
let tool_str = format!(r#"{{"tool": "{}", "args": {}}}"#, tool, args_str);
let full_content = format!("{}\n\n{}", &tool_str, &tool_str);
Self {
chunks: vec![
MockChunk::content(&tool_str),
MockChunk::content("\n\n"),
MockChunk::content(&tool_str),
MockChunk::finished("end_turn"),
],
usage: Usage {
prompt_tokens: 100,
completion_tokens: full_content.len() as u32 / 4,
total_tokens: 100 + full_content.len() as u32 / 4,
cache_creation_tokens: 0,
cache_read_tokens: 0,
},
}
}
/// Create a response with text followed by duplicate JSON tool calls (non-native).
/// Mimics the pattern: "Let me run that.\n\n{tool...}\n\n{tool...}"
pub fn text_with_duplicate_json_tools_prefixed(
text: &str,
tool: &str,
args: serde_json::Value,
) -> Self {
let args_str = serde_json::to_string(&args).unwrap();
let tool_str = format!(r#"{{"tool": "{}", "args": {}}}"#, tool, args_str);
let full_content = format!("{}\n\n{}\n\n{}", text, &tool_str, &tool_str);
Self {
chunks: vec![
MockChunk::content(text),
MockChunk::content("\n\n"),
MockChunk::content(&tool_str),
MockChunk::content("\n\n"),
MockChunk::content(&tool_str),
MockChunk::finished("end_turn"),
],
usage: Usage {
prompt_tokens: 100,
completion_tokens: full_content.len() as u32 / 4,
total_tokens: 100 + full_content.len() as u32 / 4,
cache_creation_tokens: 0,
cache_read_tokens: 0,
},
}
}
pub fn truncated(content: &str) -> Self {
Self {
chunks: vec![