clean up filter_json a bit (more to come)

This commit is contained in:
Dhanji R. Prasanna
2025-12-22 12:03:09 +11:00
parent 87d9b39ae4
commit 3bc254962c
6 changed files with 85 additions and 88 deletions

View File

@@ -1,13 +1,25 @@
// FINAL CORRECTED implementation of filter_json_tool_calls function according to specification
// 1. Detect tool call start with regex '\w*{\w*"tool"\w*:\w*"' on the very next newline
// 2. Enter suppression mode and use brace counting to find complete JSON
// 3. Only elide JSON content between first '{' and last '}' (inclusive)
// 4. Return everything else as the final filtered string
//! JSON tool call filtering for streaming LLM responses.
//!
//! This module filters out JSON tool calls from LLM output streams while preserving
//! regular text content. It uses a state machine to handle streaming chunks.
//!
//! # Design
//!
//! The filter detects tool calls by looking for JSON objects that start with `{"tool":`
//! at the beginning of a line. It uses brace counting to find the complete JSON object
//! and removes it from the output stream.
//!
//! # Known Edge Cases
//!
//! 1. **Brace counting without string awareness in main loop**: The main filtering loop
//! counts braces without considering whether they're inside JSON strings. This can
//! cause premature exit from suppression mode if a string contains `}`.
//!
//! 2. **Tool calls not at line start**: Tool calls that don't start at the beginning
//! of a line (after optional whitespace) won't be detected.
//!
//! 3. **Streaming chunk boundaries**: If a tool call pattern is split across chunks
//! (e.g., `{"to` in one chunk and `ol":` in the next), detection may fail.
use regex::Regex;
use std::cell::RefCell;
@@ -15,12 +27,12 @@ use tracing::debug;
// Thread-local state for tracking JSON tool call suppression
thread_local! {
static FIXED_JSON_TOOL_STATE: RefCell<FixedJsonToolState> = RefCell::new(FixedJsonToolState::new());
static JSON_TOOL_STATE: RefCell<JsonToolState> = RefCell::new(JsonToolState::new());
}
/// Internal state for tracking JSON tool call filtering across streaming chunks.
#[derive(Debug, Clone)]
struct FixedJsonToolState {
struct JsonToolState {
/// True when actively suppressing a confirmed tool call
suppression_mode: bool,
/// True when buffering potential JSON (saw { but not yet confirmed as tool call)
@@ -33,7 +45,7 @@ struct FixedJsonToolState {
potential_json_start: Option<usize>, // Where the potential JSON started
}
impl FixedJsonToolState {
impl JsonToolState {
fn new() -> Self {
Self {
suppression_mode: false,
@@ -57,18 +69,22 @@ impl FixedJsonToolState {
}
}
// FINAL CORRECTED implementation according to specification
/// Filters JSON tool calls from streaming LLM content.
///
/// Processes content chunks and removes JSON tool calls while preserving regular text.
/// Maintains state across calls to handle tool calls spanning multiple chunks.
pub fn fixed_filter_json_tool_calls(content: &str) -> String {
///
/// # Arguments
/// * `content` - A chunk of streaming content from the LLM
///
/// # Returns
/// The filtered content with JSON tool calls removed
pub fn filter_json_tool_calls(content: &str) -> String {
if content.is_empty() {
return String::new();
}
FIXED_JSON_TOOL_STATE.with(|state| {
JSON_TOOL_STATE.with(|state| {
let mut state = state.borrow_mut();
// Add new content to buffer
@@ -87,7 +103,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
debug!("JSON tool call completed - exiting suppression mode");
// Extract the complete result with JSON filtered out
let result = extract_fixed_content(
let result = extract_content_without_json(
&state.buffer,
state.json_start_in_buffer.unwrap_or(0),
);
@@ -107,7 +123,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
}
}
// CRITICAL FIX: After counting braces, if still in suppression mode,
// After counting braces, if still in suppression mode,
// check if a new tool call pattern appears. This handles truncated JSON
// followed by complete JSON.
if state.suppression_mode {
@@ -171,7 +187,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
state.brace_depth -= 1;
if state.brace_depth <= 0 {
debug!("JSON tool call completed immediately");
let result = extract_fixed_content(&state.buffer, json_start);
let result = extract_content_without_json(&state.buffer, json_start);
let new_content = if result.len() > state.content_returned_up_to {
result[state.content_returned_up_to..].to_string()
} else {
@@ -267,7 +283,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
state.brace_depth -= 1;
if state.brace_depth <= 0 {
debug!("JSON tool call completed in same chunk");
let result = extract_fixed_content(&state.buffer, json_start);
let result = extract_content_without_json(&state.buffer, json_start);
let content_after = if result.len() > json_start {
&result[json_start..]
} else {
@@ -362,7 +378,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
if state.brace_depth <= 0 {
// JSON is complete in this chunk
debug!("JSON tool call completed in same chunk");
let result = extract_fixed_content(&buffer_clone, json_start);
let result = extract_content_without_json(&buffer_clone, json_start);
// Return content before JSON plus content after JSON
let content_after_json = if result.len() > json_start {
@@ -387,8 +403,6 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
}
// No JSON tool call detected, return only the new content we haven't returned yet
if state.buffer.len() > state.content_returned_up_to {
let result = state.buffer[state.content_returned_up_to..].to_string();
state.content_returned_up_to = state.buffer.len();
@@ -410,7 +424,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
/// # Arguments
/// * `full_content` - The full content buffer
/// * `json_start` - Position where the JSON tool call begins
fn extract_fixed_content(full_content: &str, json_start: usize) -> String {
fn extract_content_without_json(full_content: &str, json_start: usize) -> String {
// Find the end of the JSON using proper brace counting with string handling
let mut brace_depth = 0;
let mut json_end = json_start;
@@ -455,8 +469,8 @@ fn extract_fixed_content(full_content: &str, json_start: usize) -> String {
///
/// Call this between independent filtering sessions to ensure clean state.
/// This is particularly important in tests and when starting new conversations.
pub fn reset_fixed_json_tool_state() {
FIXED_JSON_TOOL_STATE.with(|state| {
pub fn reset_json_tool_state() {
JSON_TOOL_STATE.with(|state| {
let mut state = state.borrow_mut();
state.reset();
});

View File

@@ -1,5 +1,5 @@
// JSON tool call filtering for display (moved from g3-core)
pub mod fixed_filter_json;
pub mod filter_json;
use anyhow::Result;
use crossterm::style::{Color, ResetColor, SetForegroundColor};
@@ -2478,7 +2478,7 @@ async fn run_autonomous(
let coach_config = base_config.for_coach()?;
// Reset filter suppression state before creating coach agent
crate::fixed_filter_json::reset_fixed_json_tool_state();
crate::filter_json::reset_json_tool_state();
let ui_writer = ConsoleUiWriter::new();
let mut coach_agent =

View File

@@ -1,4 +1,4 @@
use crate::fixed_filter_json::{fixed_filter_json_tool_calls, reset_fixed_json_tool_state};
use crate::filter_json::{filter_json_tool_calls, reset_json_tool_state};
use g3_core::ui_writer::UiWriter;
use std::io::{self, Write};
use termimad::MadSkin;
@@ -354,11 +354,11 @@ impl UiWriter for ConsoleUiWriter {
fn filter_json_tool_calls(&self, content: &str) -> String {
// Apply JSON tool call filtering for display
fixed_filter_json_tool_calls(content)
filter_json_tool_calls(content)
}
fn reset_json_filter(&self) {
// Reset the filter state for a new response
reset_fixed_json_tool_state();
reset_json_tool_state();
}
}