clean up filter_json a bit (more to come)

This commit is contained in:
Dhanji R. Prasanna
2025-12-22 12:03:09 +11:00
parent 87d9b39ae4
commit 3bc254962c
6 changed files with 85 additions and 88 deletions

View File

@@ -1 +0,0 @@
context_window_ok_awesome_can_you_commit_ca89d518008247ee.txt

View File

@@ -1,16 +0,0 @@
103104-TgR, sys, 4K 🟡, You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You an
103104-Ugn, sys, 5K 🟠, 📚 Project README (from README.md): # G3 - AI Coding Agent G3 is a coding AI agent designed to help you complete tasks
, usr, 0K 🟢, [Session Resumed] Previous session summary: ## Move fixed_filter_json to g3-cli - Complete ### Summary Successfully m
103203-esQ, usr, 0K 🟢, Task: OK awesome, can you commit the current changes and then read agents/fowler.md for prompting on refactoring hygiene
103206-mKT, ass, 0K 🟢, I'll commit the current changes first, then read the fowler.md file and apply its guidelines. {"tool": "shell", "args":
103206-Ols, usr, 0K 🟢, Tool result: ❌ Command failed: bash: line 0: cd: /Users/jimbob/g3: No such file or directory
103206-aug, ass, 0K 🟢, {"tool": "read_file", "args": {"file_path":"/Users/jimbob/g3/agents/fowler.md"}}
103206-OLG, usr, 0K 🟢, Tool result: ❌ Failed to read file '/Users/jimbob/g3/agents/fowler.md': No such file or directory (os error 2)
103210-eij, ass, 0K 🟢, Let me find the correct working directory first. {"tool": "shell", "args": {"command":"pwd && ls -la"}}
103210-Rnv, usr, 0K 🟢, Tool result: /Users/dhanji/src/g3 total 376 drwxr-xr-x@ 22 dhanji staff 704 22 Dec 10:30 . drwxr-xr-x@ 8 dhanji s
103212-cNH, ass, 0K 🟢, {"tool": "shell", "args": {"command":"cd /Users/dhanji/src/g3 && git status"}}
103212-Nhy, usr, 0K 🟢, Tool result: On branch main Your branch is ahead of 'origin/main' by 3 commits. (use "git push" to publish your local
103213-RBp, ass, 0K 🟢, {"tool": "read_file", "args": {"file_path":"/Users/dhanji/src/g3/agents/fowler.md"}}
103213-Ghp, usr, 1K 🟡, Tool result: 📄 File content (151 lines): You are fowler, a specialized software refactoring agent, named after Martin Fo
--- TOTAL: 12K / 200K (6.3%) ---

View File

@@ -1,13 +1,25 @@
// FINAL CORRECTED implementation of filter_json_tool_calls function according to specification
// 1. Detect tool call start with regex '\w*{\w*"tool"\w*:\w*"' on the very next newline
// 2. Enter suppression mode and use brace counting to find complete JSON
// 3. Only elide JSON content between first '{' and last '}' (inclusive)
// 4. Return everything else as the final filtered string
//! JSON tool call filtering for streaming LLM responses.
//!
//! This module filters out JSON tool calls from LLM output streams while preserving
//! regular text content. It uses a state machine to handle streaming chunks.
//!
//! # Design
//!
//! The filter detects tool calls by looking for JSON objects that start with `{"tool":`
//! at the beginning of a line. It uses brace counting to find the complete JSON object
//! and removes it from the output stream.
//!
//! # Known Edge Cases
//!
//! 1. **Brace counting without string awareness in main loop**: The main filtering loop
//! counts braces without considering whether they're inside JSON strings. This can
//! cause premature exit from suppression mode if a string contains `}`.
//!
//! 2. **Tool calls not at line start**: Tool calls that don't start at the beginning
//! of a line (after optional whitespace) won't be detected.
//!
//! 3. **Streaming chunk boundaries**: If a tool call pattern is split across chunks
//! (e.g., `{"to` in one chunk and `ol":` in the next), detection may fail.
use regex::Regex;
use std::cell::RefCell;
@@ -15,12 +27,12 @@ use tracing::debug;
// Thread-local state for tracking JSON tool call suppression
thread_local! {
static FIXED_JSON_TOOL_STATE: RefCell<FixedJsonToolState> = RefCell::new(FixedJsonToolState::new());
static JSON_TOOL_STATE: RefCell<JsonToolState> = RefCell::new(JsonToolState::new());
}
/// Internal state for tracking JSON tool call filtering across streaming chunks.
#[derive(Debug, Clone)]
struct FixedJsonToolState {
struct JsonToolState {
/// True when actively suppressing a confirmed tool call
suppression_mode: bool,
/// True when buffering potential JSON (saw { but not yet confirmed as tool call)
@@ -33,7 +45,7 @@ struct FixedJsonToolState {
potential_json_start: Option<usize>, // Where the potential JSON started
}
impl FixedJsonToolState {
impl JsonToolState {
fn new() -> Self {
Self {
suppression_mode: false,
@@ -57,18 +69,22 @@ impl FixedJsonToolState {
}
}
// FINAL CORRECTED implementation according to specification
/// Filters JSON tool calls from streaming LLM content.
///
/// Processes content chunks and removes JSON tool calls while preserving regular text.
/// Maintains state across calls to handle tool calls spanning multiple chunks.
pub fn fixed_filter_json_tool_calls(content: &str) -> String {
///
/// # Arguments
/// * `content` - A chunk of streaming content from the LLM
///
/// # Returns
/// The filtered content with JSON tool calls removed
pub fn filter_json_tool_calls(content: &str) -> String {
if content.is_empty() {
return String::new();
}
FIXED_JSON_TOOL_STATE.with(|state| {
JSON_TOOL_STATE.with(|state| {
let mut state = state.borrow_mut();
// Add new content to buffer
@@ -87,7 +103,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
debug!("JSON tool call completed - exiting suppression mode");
// Extract the complete result with JSON filtered out
let result = extract_fixed_content(
let result = extract_content_without_json(
&state.buffer,
state.json_start_in_buffer.unwrap_or(0),
);
@@ -107,7 +123,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
}
}
// CRITICAL FIX: After counting braces, if still in suppression mode,
// After counting braces, if still in suppression mode,
// check if a new tool call pattern appears. This handles truncated JSON
// followed by complete JSON.
if state.suppression_mode {
@@ -171,7 +187,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
state.brace_depth -= 1;
if state.brace_depth <= 0 {
debug!("JSON tool call completed immediately");
let result = extract_fixed_content(&state.buffer, json_start);
let result = extract_content_without_json(&state.buffer, json_start);
let new_content = if result.len() > state.content_returned_up_to {
result[state.content_returned_up_to..].to_string()
} else {
@@ -267,7 +283,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
state.brace_depth -= 1;
if state.brace_depth <= 0 {
debug!("JSON tool call completed in same chunk");
let result = extract_fixed_content(&state.buffer, json_start);
let result = extract_content_without_json(&state.buffer, json_start);
let content_after = if result.len() > json_start {
&result[json_start..]
} else {
@@ -362,7 +378,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
if state.brace_depth <= 0 {
// JSON is complete in this chunk
debug!("JSON tool call completed in same chunk");
let result = extract_fixed_content(&buffer_clone, json_start);
let result = extract_content_without_json(&buffer_clone, json_start);
// Return content before JSON plus content after JSON
let content_after_json = if result.len() > json_start {
@@ -387,8 +403,6 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
}
// No JSON tool call detected, return only the new content we haven't returned yet
if state.buffer.len() > state.content_returned_up_to {
let result = state.buffer[state.content_returned_up_to..].to_string();
state.content_returned_up_to = state.buffer.len();
@@ -410,7 +424,7 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
/// # Arguments
/// * `full_content` - The full content buffer
/// * `json_start` - Position where the JSON tool call begins
fn extract_fixed_content(full_content: &str, json_start: usize) -> String {
fn extract_content_without_json(full_content: &str, json_start: usize) -> String {
// Find the end of the JSON using proper brace counting with string handling
let mut brace_depth = 0;
let mut json_end = json_start;
@@ -455,8 +469,8 @@ fn extract_fixed_content(full_content: &str, json_start: usize) -> String {
///
/// Call this between independent filtering sessions to ensure clean state.
/// This is particularly important in tests and when starting new conversations.
pub fn reset_fixed_json_tool_state() {
FIXED_JSON_TOOL_STATE.with(|state| {
pub fn reset_json_tool_state() {
JSON_TOOL_STATE.with(|state| {
let mut state = state.borrow_mut();
state.reset();
});

View File

@@ -1,5 +1,5 @@
// JSON tool call filtering for display (moved from g3-core)
pub mod fixed_filter_json;
pub mod filter_json;
use anyhow::Result;
use crossterm::style::{Color, ResetColor, SetForegroundColor};
@@ -2478,7 +2478,7 @@ async fn run_autonomous(
let coach_config = base_config.for_coach()?;
// Reset filter suppression state before creating coach agent
crate::fixed_filter_json::reset_fixed_json_tool_state();
crate::filter_json::reset_json_tool_state();
let ui_writer = ConsoleUiWriter::new();
let mut coach_agent =

View File

@@ -1,4 +1,4 @@
use crate::fixed_filter_json::{fixed_filter_json_tool_calls, reset_fixed_json_tool_state};
use crate::filter_json::{filter_json_tool_calls, reset_json_tool_state};
use g3_core::ui_writer::UiWriter;
use std::io::{self, Write};
use termimad::MadSkin;
@@ -354,11 +354,11 @@ impl UiWriter for ConsoleUiWriter {
fn filter_json_tool_calls(&self, content: &str) -> String {
// Apply JSON tool call filtering for display
fixed_filter_json_tool_calls(content)
filter_json_tool_calls(content)
}
fn reset_json_filter(&self) {
// Reset the filter state for a new response
reset_fixed_json_tool_state();
reset_json_tool_state();
}
}

View File

@@ -4,28 +4,28 @@
//! from LLM output streams while preserving all other content.
#[cfg(test)]
mod fixed_filter_tests {
use g3_cli::fixed_filter_json::{fixed_filter_json_tool_calls, reset_fixed_json_tool_state};
mod filter_json_tests {
use g3_cli::filter_json::{filter_json_tool_calls, reset_json_tool_state};
use regex::Regex;
/// Test that regular text without tool calls passes through unchanged.
#[test]
fn test_no_tool_call_passthrough() {
reset_fixed_json_tool_state();
reset_json_tool_state();
let input = "This is regular text without any tool calls.";
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
assert_eq!(result, input);
}
/// Test detection and removal of a complete tool call in a single chunk.
#[test]
fn test_simple_tool_call_detection() {
reset_fixed_json_tool_state();
reset_json_tool_state();
let input = r#"Some text before
{"tool": "shell", "args": {"command": "ls"}}
Some text after"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "Some text before\n\nSome text after";
assert_eq!(result, expected);
}
@@ -33,7 +33,7 @@ Some text after"#;
/// Test handling of tool calls that arrive across multiple streaming chunks.
#[test]
fn test_streaming_chunks() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Simulate streaming where the tool call comes in multiple chunks
let chunks = vec![
@@ -46,7 +46,7 @@ Some text after"#;
let mut results = Vec::new();
for chunk in chunks {
let result = fixed_filter_json_tool_calls(chunk);
let result = filter_json_tool_calls(chunk);
results.push(result);
}
@@ -59,13 +59,13 @@ Some text after"#;
/// Test correct handling of nested braces within JSON strings.
#[test]
fn test_nested_braces_in_tool_call() {
reset_fixed_json_tool_state();
reset_json_tool_state();
let input = r#"Text before
{"tool": "write_file", "args": {"file_path": "test.json", "content": "{\"nested\": \"value\"}"}}
Text after"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "Text before\n\nText after";
assert_eq!(result, expected);
}
@@ -117,16 +117,16 @@ Text after"#;
/// Test that tool calls must appear at the start of a line (after newline).
#[test]
fn test_newline_requirement() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// According to spec, tool call should be detected "on the very next newline"
// Our current regex matches any line that contains the pattern, not just after newlines
let input_with_newline = "Text\n{\"tool\": \"shell\", \"args\": {\"command\": \"ls\"}}";
let input_without_newline = "Text {\"tool\": \"shell\", \"args\": {\"command\": \"ls\"}}";
let result1 = fixed_filter_json_tool_calls(input_with_newline);
reset_fixed_json_tool_state();
let result2 = fixed_filter_json_tool_calls(input_without_newline);
let result1 = filter_json_tool_calls(input_with_newline);
reset_json_tool_state();
let result2 = filter_json_tool_calls(input_without_newline);
// With the new aggressive filtering, only the newline case should trigger suppression
// The pattern requires { to be at the start of a line (after ^)
@@ -138,13 +138,13 @@ Text after"#;
/// Test handling of escaped quotes within JSON strings.
#[test]
fn test_json_with_escaped_quotes() {
reset_fixed_json_tool_state();
reset_json_tool_state();
let input = r#"Text
{"tool": "write_file", "args": {"content": "He said \"hello\" to me"}}
More text"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "Text\n\nMore text";
assert_eq!(result, expected);
}
@@ -152,14 +152,14 @@ More text"#;
/// Test graceful handling of incomplete/malformed JSON.
#[test]
fn test_edge_case_malformed_json() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test what happens with malformed JSON that starts like a tool call
let input = r#"Text
{"tool": "shell", "args": {"command": "ls"
More text"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
// Should handle gracefully - since JSON is incomplete, it should return content before JSON
let expected = "Text\n";
assert_eq!(result, expected);
@@ -168,22 +168,22 @@ More text"#;
/// Test processing multiple independent tool calls sequentially.
#[test]
fn test_multiple_tool_calls_sequential() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test processing multiple tool calls one at a time
let input1 = r#"First text
{"tool": "shell", "args": {"command": "ls"}}
Middle text"#;
let result1 = fixed_filter_json_tool_calls(input1);
let result1 = filter_json_tool_calls(input1);
let expected1 = "First text\n\nMiddle text";
assert_eq!(result1, expected1);
// Reset and process second tool call
reset_fixed_json_tool_state();
reset_json_tool_state();
let input2 = r#"More text
{"tool": "read_file", "args": {"file_path": "test.txt"}}
Final text"#;
let result2 = fixed_filter_json_tool_calls(input2);
let result2 = filter_json_tool_calls(input2);
let expected2 = "More text\n\nFinal text";
assert_eq!(result2, expected2);
}
@@ -191,13 +191,13 @@ Final text"#;
/// Test tool calls with complex multi-line arguments.
#[test]
fn test_tool_call_with_complex_args() {
reset_fixed_json_tool_state();
reset_json_tool_state();
let input = r#"Before
{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "--- old\n-old line\n+++ new\n+new line", "start": 0, "end": 100}}
After"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "Before\n\nAfter";
assert_eq!(result, expected);
}
@@ -205,12 +205,12 @@ After"#;
/// Test input containing only a tool call with no surrounding text.
#[test]
fn test_tool_call_only() {
reset_fixed_json_tool_state();
reset_json_tool_state();
let input = r#"
{"tool": "final_output", "args": {"summary": "Task completed successfully"}}"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "\n";
assert_eq!(result, expected);
}
@@ -218,14 +218,14 @@ After"#;
/// Test accurate brace counting with deeply nested structures.
#[test]
fn test_brace_counting_accuracy() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test complex nested structure
let input = r#"Start
{"tool": "write_file", "args": {"content": "function() { return {a: 1, b: {c: 2}}; }", "file_path": "test.js"}}
End"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "Start\n\nEnd";
assert_eq!(result, expected);
}
@@ -233,14 +233,14 @@ End"#;
/// Test that braces within strings don't affect brace counting.
#[test]
fn test_string_escaping_in_json() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test JSON with escaped quotes and braces in strings
let input = r#"Text
{"tool": "shell", "args": {"command": "echo \"Hello {world}\" > file.txt"}}
More"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "Text\n\nMore";
assert_eq!(result, expected);
}
@@ -248,7 +248,7 @@ More"#;
/// Verify compliance with the exact specification requirements.
#[test]
fn test_specification_compliance() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test the exact specification requirements:
// 1. Detect start with regex '\w*{\w*"tool"\w*:\w*"' on newline
@@ -257,7 +257,7 @@ More"#;
// 4. Return everything else
let input = "Before text\nSome more text\n{\"tool\": \"test\", \"args\": {}}\nAfter text\nMore after";
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
let expected = "Before text\nSome more text\n\nAfter text\nMore after";
assert_eq!(result, expected);
}
@@ -265,13 +265,13 @@ More"#;
/// Test that non-tool JSON objects are not filtered.
#[test]
fn test_no_false_positives() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test that we don't incorrectly identify non-tool JSON as tool calls
let input = r#"Some text
{"not_tool": "value", "other": "data"}
More text"#;
let result = fixed_filter_json_tool_calls(input);
let result = filter_json_tool_calls(input);
// Should pass through unchanged since it doesn't match the tool pattern
assert_eq!(result, input);
}
@@ -279,7 +279,7 @@ More text"#;
/// Test patterns that look similar to tool calls but aren't exact matches.
#[test]
fn test_partial_tool_patterns() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test patterns that look like tool calls but aren't complete
let test_cases = vec![
@@ -289,8 +289,8 @@ More text"#;
];
for input in test_cases {
reset_fixed_json_tool_state();
let result = fixed_filter_json_tool_calls(input);
reset_json_tool_state();
let result = filter_json_tool_calls(input);
// These should all pass through unchanged
assert_eq!(result, input, "Input should pass through: {}", input);
}
@@ -299,7 +299,7 @@ More text"#;
/// Test streaming with very small chunks (character-by-character).
#[test]
fn test_streaming_edge_cases() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Test streaming with very small chunks
let chunks = vec![
@@ -308,7 +308,7 @@ More text"#;
let mut results = Vec::new();
for chunk in chunks {
let result = fixed_filter_json_tool_calls(chunk);
let result = filter_json_tool_calls(chunk);
results.push(result);
}
@@ -322,7 +322,7 @@ More text"#;
/// Debug test with detailed logging for streaming behavior.
#[test]
fn test_streaming_debug() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Debug the exact failing case
let chunks = vec![
@@ -335,7 +335,7 @@ More text"#;
let mut results = Vec::new();
for (i, chunk) in chunks.iter().enumerate() {
let result = fixed_filter_json_tool_calls(chunk);
let result = filter_json_tool_calls(chunk);
println!("Chunk {}: {:?} -> {:?}", i, chunk, result);
results.push(result);
}
@@ -351,7 +351,7 @@ More text"#;
/// Test handling of truncated JSON followed by complete JSON (the json_err pattern)
#[test]
fn test_truncated_then_complete_json() {
reset_fixed_json_tool_state();
reset_json_tool_state();
// Simulate the pattern from json_err trace:
// 1. Incomplete/truncated JSON appears
@@ -365,7 +365,7 @@ More text"#;
let mut results = Vec::new();
for (i, chunk) in chunks.iter().enumerate() {
let result = fixed_filter_json_tool_calls(chunk);
let result = filter_json_tool_calls(chunk);
println!("Chunk {}: {:?} -> {:?}", i, chunk, result);
results.push(result);
}