Fix JSON filter to preserve code fence and indented content

Two cosmetic bugs fixed:
1. JSON inside code fences was being filtered - now tracks fence state
   and passes through all content inside ``` ... ``` blocks
2. Indented JSON was being filtered - now recognizes that real tool
   calls are never indented, so indented JSON is always documentation

Changes:
- Added in_code_fence and fence_buffer fields to FilterState
- Added track_code_fence() to detect ``` markers (with/without language)
- Added pass_through_char() for content inside code fences
- Modified '{' handling to only filter when no leading whitespace
- Added 4 new unit tests for code fence and indentation cases
- Updated 3 stress tests to expect new (correct) behavior

All 16 filter_json unit tests and 59 stress tests pass.
This commit is contained in:
Dhanji R. Prasanna
2026-01-19 17:00:43 +05:30
parent 1604ed613a
commit 6ff21a7d47
2 changed files with 109 additions and 4 deletions

View File

@@ -54,6 +54,10 @@ struct FilterState {
state: State, state: State,
/// Buffer for potential tool call detection (Buffering state) /// Buffer for potential tool call detection (Buffering state)
buffer: String, buffer: String,
/// Are we inside a code fence? (``` ... ```)
in_code_fence: bool,
/// Buffer for detecting code fence markers
fence_buffer: String,
/// Brace depth for JSON tracking (Suppressing state) - string-aware /// Brace depth for JSON tracking (Suppressing state) - string-aware
brace_depth: i32, brace_depth: i32,
/// Are we inside a JSON string? (for proper brace counting) /// Are we inside a JSON string? (for proper brace counting)
@@ -73,6 +77,8 @@ impl FilterState {
Self { Self {
state: State::Streaming, state: State::Streaming,
buffer: String::new(), buffer: String::new(),
in_code_fence: false,
fence_buffer: String::new(),
brace_depth: 0, brace_depth: 0,
in_string: false, in_string: false,
escape_next: false, escape_next: false,
@@ -85,6 +91,8 @@ impl FilterState {
fn reset(&mut self) { fn reset(&mut self) {
self.state = State::Streaming; self.state = State::Streaming;
self.buffer.clear(); self.buffer.clear();
self.in_code_fence = false;
self.fence_buffer.clear();
self.brace_depth = 0; self.brace_depth = 0;
self.in_string = false; self.in_string = false;
self.escape_next = false; self.escape_next = false;
@@ -185,6 +193,15 @@ pub fn filter_json_tool_calls(content: &str) -> String {
/// Handle a character in Streaming state /// Handle a character in Streaming state
fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String) { fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String) {
// Track code fence state
track_code_fence(state, ch);
// If inside a code fence, pass through everything
if state.in_code_fence {
pass_through_char(state, ch, output);
return;
}
match ch { match ch {
'\n' => { '\n' => {
// Buffer extra newlines at line start - they may precede a tool call // Buffer extra newlines at line start - they may precede a tool call
@@ -202,14 +219,24 @@ fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String)
// Accumulate whitespace at line start // Accumulate whitespace at line start
state.pending_whitespace.push(ch); state.pending_whitespace.push(ch);
} }
'{' if state.at_line_start => { '{' if state.at_line_start && state.pending_whitespace.is_empty() => {
// Potential tool call! Enter buffering mode // Potential tool call! Enter buffering mode
// BUT only if there's no leading whitespace (indented JSON is not a tool call)
debug!("Potential tool call detected - entering Buffering state"); debug!("Potential tool call detected - entering Buffering state");
state.state = State::Buffering; state.state = State::Buffering;
state.buffer.clear(); state.buffer.clear();
state.buffer.push(ch); state.buffer.push(ch);
// Don't output pending_newlines or pending_whitespace yet - we might need to suppress them // Don't output pending_newlines or pending_whitespace yet - we might need to suppress them
} }
'{' if state.at_line_start && !state.pending_whitespace.is_empty() => {
// Indented JSON - not a tool call, pass through
output.push_str(&state.pending_newlines);
output.push_str(&state.pending_whitespace);
state.pending_newlines.clear();
state.pending_whitespace.clear();
output.push(ch);
state.at_line_start = false;
}
_ => { _ => {
// Regular character - output any pending newlines and whitespace first // Regular character - output any pending newlines and whitespace first
output.push_str(&state.pending_newlines); output.push_str(&state.pending_newlines);
@@ -222,6 +249,45 @@ fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String)
} }
} }
/// Pass through a character without filtering (used inside code fences)
fn pass_through_char(state: &mut FilterState, ch: char, output: &mut String) {
// Output any pending content first
output.push_str(&state.pending_newlines);
output.push_str(&state.pending_whitespace);
state.pending_newlines.clear();
state.pending_whitespace.clear();
output.push(ch);
state.at_line_start = ch == '\n';
}
/// Track code fence state (``` markers)
fn track_code_fence(state: &mut FilterState, ch: char) {
match ch {
'`' => {
state.fence_buffer.push(ch);
}
'\n' => {
// Check if we have a fence marker
if state.fence_buffer.starts_with("```") {
// Toggle fence state
state.in_code_fence = !state.in_code_fence;
debug!("Code fence toggled: in_code_fence={}", state.in_code_fence);
}
state.fence_buffer.clear();
}
_ => {
// If we were accumulating backticks but got something else,
// check if we have a fence marker (for opening fences with language)
if state.fence_buffer.starts_with("```") && !state.in_code_fence {
// Opening fence with language specifier (e.g., ```json)
state.in_code_fence = true;
debug!("Code fence opened with language: in_code_fence=true");
}
state.fence_buffer.clear();
}
}
}
/// Handle a character in Buffering state /// Handle a character in Buffering state
fn handle_buffering_char(state: &mut FilterState, ch: char, output: &mut String) { fn handle_buffering_char(state: &mut FilterState, ch: char, output: &mut String) {
state.buffer.push(ch); state.buffer.push(ch);
@@ -508,4 +574,40 @@ mod tests {
let result = filter_json_tool_calls(input); let result = filter_json_tool_calls(input);
assert_eq!(result, input, "Tool calls not at line start should pass through"); assert_eq!(result, input, "Tool calls not at line start should pass through");
} }
#[test]
fn test_tool_json_in_code_fence_passes_through() {
// JSON inside code fences should NOT be filtered, even if it looks like a tool call
reset_json_tool_state();
let input = "Before\n```json\n{\"tool\": \"shell\", \"args\": {}}\n```\nAfter";
let result = filter_json_tool_calls(input);
assert_eq!(result, input, "Tool JSON inside code fence should pass through");
}
#[test]
fn test_tool_json_in_plain_code_fence_passes_through() {
// JSON inside plain code fences (no language) should also pass through
reset_json_tool_state();
let input = "Before\n```\n{\"tool\": \"shell\", \"args\": {}}\n```\nAfter";
let result = filter_json_tool_calls(input);
assert_eq!(result, input, "Tool JSON inside plain code fence should pass through");
}
#[test]
fn test_indented_tool_json_passes_through() {
// Indented JSON should NOT be filtered (real tool calls are never indented)
reset_json_tool_state();
let input = "Before\n {\"tool\": \"shell\", \"args\": {}}\nAfter";
let result = filter_json_tool_calls(input);
assert_eq!(result, input, "Indented tool JSON should pass through");
}
#[test]
fn test_tab_indented_tool_json_passes_through() {
// Tab-indented JSON should also pass through
reset_json_tool_state();
let input = "Before\n\t{\"tool\": \"shell\", \"args\": {}}\nAfter";
let result = filter_json_tool_calls(input);
assert_eq!(result, input, "Tab-indented tool JSON should pass through");
}
} }

View File

@@ -420,7 +420,8 @@ fn test_tabs_before_brace() {
reset_json_tool_state(); reset_json_tool_state();
let input = "Text\n\t\t{\"tool\": \"x\", \"args\": {}}\nMore"; let input = "Text\n\t\t{\"tool\": \"x\", \"args\": {}}\nMore";
let result = filter_json_tool_calls(input); let result = filter_json_tool_calls(input);
assert_eq!(result, "Text\n\nMore"); // Indented JSON should NOT be filtered - real tool calls are never indented
assert_eq!(result, input);
} }
#[test] #[test]
@@ -428,7 +429,8 @@ fn test_spaces_before_brace() {
reset_json_tool_state(); reset_json_tool_state();
let input = "Text\n {\"tool\": \"x\", \"args\": {}}\nMore"; let input = "Text\n {\"tool\": \"x\", \"args\": {}}\nMore";
let result = filter_json_tool_calls(input); let result = filter_json_tool_calls(input);
assert_eq!(result, "Text\n\nMore"); // Indented JSON should NOT be filtered - real tool calls are never indented
assert_eq!(result, input);
} }
#[test] #[test]
@@ -436,7 +438,8 @@ fn test_mixed_whitespace_before_brace() {
reset_json_tool_state(); reset_json_tool_state();
let input = "Text\n \t \t {\"tool\": \"x\", \"args\": {}}\nMore"; let input = "Text\n \t \t {\"tool\": \"x\", \"args\": {}}\nMore";
let result = filter_json_tool_calls(input); let result = filter_json_tool_calls(input);
assert_eq!(result, "Text\n\nMore"); // Indented JSON should NOT be filtered - real tool calls are never indented
assert_eq!(result, input);
} }
#[test] #[test]