Fix JSON filter to preserve code fence and indented content

Two cosmetic bugs fixed: 1. JSON inside code fences was being filtered - now tracks fence state and passes through all content inside ``` ... ``` blocks 2. Indented JSON was being filtered - now recognizes that real tool calls are never indented, so indented JSON is always documentation Changes: - Added in_code_fence and fence_buffer fields to FilterState - Added track_code_fence() to detect ``` markers (with/without language) - Added pass_through_char() for content inside code fences - Modified '{' handling to only filter when no leading whitespace - Added 4 new unit tests for code fence and indentation cases - Updated 3 stress tests to expect new (correct) behavior All 16 filter_json unit tests and 59 stress tests pass.
2026-01-19 17:00:43 +05:30
parent 1604ed613a
commit 6ff21a7d47
2 changed files with 109 additions and 4 deletions
--- a/crates/g3-cli/src/filter_json.rs
+++ b/crates/g3-cli/src/filter_json.rs
@@ -54,6 +54,10 @@ struct FilterState {
    state: State,
    /// Buffer for potential tool call detection (Buffering state)
    buffer: String,
+    /// Are we inside a code fence? (``` ... ```)
+    in_code_fence: bool,
+    /// Buffer for detecting code fence markers
+    fence_buffer: String,
    /// Brace depth for JSON tracking (Suppressing state) - string-aware
    brace_depth: i32,
    /// Are we inside a JSON string? (for proper brace counting)
@@ -73,6 +77,8 @@ impl FilterState {
        Self {
            state: State::Streaming,
            buffer: String::new(),
+            in_code_fence: false,
+            fence_buffer: String::new(),
            brace_depth: 0,
            in_string: false,
            escape_next: false,
@@ -85,6 +91,8 @@ impl FilterState {
    fn reset(&mut self) {
        self.state = State::Streaming;
        self.buffer.clear();
+        self.in_code_fence = false;
+        self.fence_buffer.clear();
        self.brace_depth = 0;
        self.in_string = false;
        self.escape_next = false;
@@ -185,6 +193,15 @@ pub fn filter_json_tool_calls(content: &str) -> String {

 /// Handle a character in Streaming state
 fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String) {
+    // Track code fence state
+    track_code_fence(state, ch);
+    
+    // If inside a code fence, pass through everything
+    if state.in_code_fence {
+        pass_through_char(state, ch, output);
+        return;
+    }
+    
    match ch {
        '\n' => {
            // Buffer extra newlines at line start - they may precede a tool call
@@ -202,14 +219,24 @@ fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String)
            // Accumulate whitespace at line start
            state.pending_whitespace.push(ch);
        }
-        '{' if state.at_line_start => {
+        '{' if state.at_line_start && state.pending_whitespace.is_empty() => {
            // Potential tool call! Enter buffering mode
+            // BUT only if there's no leading whitespace (indented JSON is not a tool call)
            debug!("Potential tool call detected - entering Buffering state");
            state.state = State::Buffering;
            state.buffer.clear();
            state.buffer.push(ch);
            // Don't output pending_newlines or pending_whitespace yet - we might need to suppress them
        }
+        '{' if state.at_line_start && !state.pending_whitespace.is_empty() => {
+            // Indented JSON - not a tool call, pass through
+            output.push_str(&state.pending_newlines);
+            output.push_str(&state.pending_whitespace);
+            state.pending_newlines.clear();
+            state.pending_whitespace.clear();
+            output.push(ch);
+            state.at_line_start = false;
+        }
        _ => {
            // Regular character - output any pending newlines and whitespace first
            output.push_str(&state.pending_newlines);
@@ -222,6 +249,45 @@ fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String)
    }
 }

+/// Pass through a character without filtering (used inside code fences)
+fn pass_through_char(state: &mut FilterState, ch: char, output: &mut String) {
+    // Output any pending content first
+    output.push_str(&state.pending_newlines);
+    output.push_str(&state.pending_whitespace);
+    state.pending_newlines.clear();
+    state.pending_whitespace.clear();
+    output.push(ch);
+    state.at_line_start = ch == '\n';
+}
+
+/// Track code fence state (``` markers)
+fn track_code_fence(state: &mut FilterState, ch: char) {
+    match ch {
+        '`' => {
+            state.fence_buffer.push(ch);
+        }
+        '\n' => {
+            // Check if we have a fence marker
+            if state.fence_buffer.starts_with("```") {
+                // Toggle fence state
+                state.in_code_fence = !state.in_code_fence;
+                debug!("Code fence toggled: in_code_fence={}", state.in_code_fence);
+            }
+            state.fence_buffer.clear();
+        }
+        _ => {
+            // If we were accumulating backticks but got something else,
+            // check if we have a fence marker (for opening fences with language)
+            if state.fence_buffer.starts_with("```") && !state.in_code_fence {
+                // Opening fence with language specifier (e.g., ```json)
+                state.in_code_fence = true;
+                debug!("Code fence opened with language: in_code_fence=true");
+            }
+            state.fence_buffer.clear();
+        }
+    }
+}
+
 /// Handle a character in Buffering state
 fn handle_buffering_char(state: &mut FilterState, ch: char, output: &mut String) {
    state.buffer.push(ch);
@@ -508,4 +574,40 @@ mod tests {
        let result = filter_json_tool_calls(input);
        assert_eq!(result, input, "Tool calls not at line start should pass through");
    }
+
+    #[test]
+    fn test_tool_json_in_code_fence_passes_through() {
+        // JSON inside code fences should NOT be filtered, even if it looks like a tool call
+        reset_json_tool_state();
+        let input = "Before\n```json\n{\"tool\": \"shell\", \"args\": {}}\n```\nAfter";
+        let result = filter_json_tool_calls(input);
+        assert_eq!(result, input, "Tool JSON inside code fence should pass through");
+    }
+
+    #[test]
+    fn test_tool_json_in_plain_code_fence_passes_through() {
+        // JSON inside plain code fences (no language) should also pass through
+        reset_json_tool_state();
+        let input = "Before\n```\n{\"tool\": \"shell\", \"args\": {}}\n```\nAfter";
+        let result = filter_json_tool_calls(input);
+        assert_eq!(result, input, "Tool JSON inside plain code fence should pass through");
+    }
+
+    #[test]
+    fn test_indented_tool_json_passes_through() {
+        // Indented JSON should NOT be filtered (real tool calls are never indented)
+        reset_json_tool_state();
+        let input = "Before\n    {\"tool\": \"shell\", \"args\": {}}\nAfter";
+        let result = filter_json_tool_calls(input);
+        assert_eq!(result, input, "Indented tool JSON should pass through");
+    }
+
+    #[test]
+    fn test_tab_indented_tool_json_passes_through() {
+        // Tab-indented JSON should also pass through
+        reset_json_tool_state();
+        let input = "Before\n\t{\"tool\": \"shell\", \"args\": {}}\nAfter";
+        let result = filter_json_tool_calls(input);
+        assert_eq!(result, input, "Tab-indented tool JSON should pass through");
+    }
 }