Fix extra newlines before tool calls in JSON filter

The JSON tool call filter was outputting newlines immediately as they
were encountered. When the LLM output contained multiple newlines before
a tool call, each newline was output before the tool call JSON was
detected and suppressed, leaving orphaned blank lines in the output.

Changes:
- Add pending_newlines field to FilterState to buffer newlines at line start
- First newline after content is output immediately, subsequent ones buffered
- When tool call confirmed, pending_newlines cleared (suppressing extra blanks)
- When not a tool call, pending_newlines output with the buffer
- Add flush_json_tool_filter() to flush pending content at end of streaming
- Update tests to reflect new behavior
- Add tests for newline suppression behavior
This commit is contained in:
Dhanji R. Prasanna
2026-01-11 17:04:27 +05:30
parent 9509e51708
commit 2fbdac7aa9
4 changed files with 158 additions and 17 deletions

View File

@@ -53,6 +53,8 @@ struct FilterState {
at_line_start: bool,
/// Whitespace seen after newline (before potential {)
pending_whitespace: String,
/// Newlines accumulated at line start (before potential tool call)
pending_newlines: String,
}
impl FilterState {
@@ -65,6 +67,7 @@ impl FilterState {
escape_next: false,
at_line_start: true, // Start of input counts as line start
pending_whitespace: String::new(),
pending_newlines: String::new(),
}
}
@@ -76,6 +79,7 @@ impl FilterState {
self.escape_next = false;
self.at_line_start = true;
self.pending_whitespace.clear();
self.pending_newlines.clear();
}
}
@@ -192,11 +196,16 @@ pub fn filter_json_tool_calls(content: &str) -> String {
fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String) {
match ch {
'\n' => {
// Output the newline and any pending whitespace
output.push_str(&state.pending_whitespace);
output.push(ch);
state.pending_whitespace.clear();
state.at_line_start = true;
// Buffer extra newlines at line start - they may precede a tool call
// Always output the first newline, but buffer subsequent ones
if state.at_line_start {
state.pending_newlines.push(ch);
} else {
// First newline after content - output it and enter line start mode
output.push(ch);
state.at_line_start = true;
state.pending_newlines.clear(); // Reset - this newline was output
}
}
' ' | '\t' if state.at_line_start => {
// Accumulate whitespace at line start
@@ -208,10 +217,12 @@ fn handle_streaming_char(state: &mut FilterState, ch: char, output: &mut String)
state.state = State::Buffering;
state.buffer.clear();
state.buffer.push(ch);
// Don't output pending_whitespace yet - we might need to suppress it
// Don't output pending_newlines or pending_whitespace yet - we might need to suppress them
}
_ => {
// Regular character - output any pending whitespace first
// Regular character - output any pending newlines and whitespace first
output.push_str(&state.pending_newlines);
state.pending_newlines.clear();
output.push_str(&state.pending_whitespace);
state.pending_whitespace.clear();
output.push(ch);
@@ -233,15 +244,18 @@ fn handle_buffering_char(state: &mut FilterState, ch: char, output: &mut String)
state.brace_depth = 1; // We already have the opening {
state.in_string = true; // We're inside the "tool" value string
state.escape_next = false;
// Discard pending_whitespace (it's part of the tool call line)
// Discard pending_newlines and pending_whitespace (they're part of the tool call)
state.pending_newlines.clear();
state.pending_whitespace.clear();
state.buffer.clear();
}
Some(false) => {
// Not a tool call - release buffered content
debug!("Not a tool call - releasing buffer");
output.push_str(&state.pending_newlines);
output.push_str(&state.pending_whitespace);
output.push_str(&state.buffer);
state.pending_newlines.clear();
state.pending_whitespace.clear();
state.buffer.clear();
state.state = State::Streaming;
@@ -252,8 +266,10 @@ fn handle_buffering_char(state: &mut FilterState, ch: char, output: &mut String)
if state.buffer.len() > MAX_BUFFER_FOR_DETECTION {
// Too long without confirmation - not a tool call
debug!("Buffer exceeded max length - not a tool call");
output.push_str(&state.pending_newlines);
output.push_str(&state.pending_whitespace);
output.push_str(&state.buffer);
state.pending_newlines.clear();
state.pending_whitespace.clear();
state.buffer.clear();
state.state = State::Streaming;
@@ -347,6 +363,24 @@ pub fn reset_json_tool_state() {
});
}
/// Flushes any pending content from the JSON filter.
///
/// Call this at the end of streaming to ensure any buffered newlines
/// or whitespace that wasn't followed by a tool call gets output.
pub fn flush_json_tool_filter() -> String {
JSON_TOOL_STATE.with(|state| {
let mut state = state.borrow_mut();
let mut output = String::new();
// Output any pending newlines and whitespace
output.push_str(&state.pending_newlines);
output.push_str(&state.pending_whitespace);
output.push_str(&state.buffer);
state.pending_newlines.clear();
state.pending_whitespace.clear();
state.buffer.clear();
output
})
}
#[cfg(test)]
mod tests {
@@ -438,4 +472,28 @@ mod tests {
// The tool call should be filtered out
assert_eq!(result, "Text\n\nMore");
}
#[test]
fn test_multiple_newlines_before_tool_call_suppressed() {
// This test verifies that extra blank lines before a tool call are suppressed.
// This fixes the visual issue where many blank lines appeared before tool calls.
reset_json_tool_state();
// Input has 4 newlines before the tool call (3 blank lines)
let input = "Before\n\n\n\n{\"tool\": \"shell\", \"args\": {}}\nAfter";
let result = filter_json_tool_calls(input);
// Only one newline should remain before where the tool call was
// (the first newline after "Before" is preserved, extra ones are suppressed)
assert_eq!(result, "Before\n\nAfter");
}
#[test]
fn test_single_newline_before_tool_call_preserved() {
// A single newline before a tool call should be preserved
reset_json_tool_state();
let input = "Before\n{\"tool\": \"shell\", \"args\": {}}\nAfter";
let result = filter_json_tool_calls(input);
assert_eq!(result, "Before\n\nAfter");
}
}

View File

@@ -3,7 +3,7 @@
//! These tests hammer the filter with malformed JSON, partial tool calls,
//! edge cases, and adversarial inputs to ensure robustness.
use g3_cli::filter_json::{filter_json_tool_calls, reset_json_tool_state};
use g3_cli::filter_json::{filter_json_tool_calls, flush_json_tool_filter, reset_json_tool_state};
// ============================================================================
// Malformed JSON Tests
@@ -478,7 +478,9 @@ fn test_empty_input() {
#[test]
fn test_just_newline() {
reset_json_tool_state();
assert_eq!(filter_json_tool_calls("\n"), "\n");
let result = filter_json_tool_calls("\n");
let flushed = flush_json_tool_filter();
assert_eq!(format!("{}{}", result, flushed), "\n");
}
#[test]

View File

@@ -211,7 +211,8 @@ After"#;
{"tool": "final_output", "args": {"summary": "Task completed successfully"}}"#;
let result = filter_json_tool_calls(input);
let expected = "\n";
// Leading newline before tool call at start of input is suppressed
let expected = "";
assert_eq!(result, expected);
}
@@ -509,13 +510,13 @@ End"#;
let result = filter_json_tool_calls(input);
// The tool call starts on its own line after the read_file output,
// so it should be filtered out. Only the read_file output should remain.
// The tool call starts on its own line after the read_file output.
// The tool call is filtered out, and extra newlines before it are suppressed.
// Only one newline remains (the line ending after "1ms").
let expected = r#"┌─ read_file | ./crates/g3-cli/src/ui_writer_impl.rs [13000..13300]
│ }
│ (11 lines)
└─ ⚡️ 1ms
"#;
assert_eq!(
result, expected,
@@ -538,9 +539,10 @@ End"#;
let result = filter_json_tool_calls(input);
// The shell tool call starts at line beginning, so it should be filtered out
// Only the surrounding text should remain
// Note: The tool call is on its own line, so filtering leaves an empty line
let expected = "Let me create a test case:\n\n\n\nDone.";
// Only the surrounding text should remain.
// Extra newlines before the tool call are suppressed (one blank line before
// becomes just the line ending), but newlines after are preserved.
let expected = "Let me create a test case:\n\n\nDone.";
assert_eq!(
result, expected,