From a84fead03bf20c350e8c9f8b78816881e34f7bc9 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Thu, 15 Jan 2026 12:22:32 +0530 Subject: [PATCH] refactor: improve readability of streaming parser and JSON filter Agent: carmack Changes: - streaming_parser.rs: Unified find_first/last_tool_call_start into single find_tool_call_start with SearchDirection enum, reducing duplication. Simplified is_json_invalidated from 45 to 20 lines with clearer logic. Fixed redundant !escape_next check in find_complete_json_object_end. - filter_json.rs: Simplified check_tool_pattern from 40 to 24 lines. Replaced repetitive prefix checks with loop over ["t", "to", "too", "tool"]. Reduced trailing return statements with direct expression returns. - ui_writer_impl.rs: Added ansi module for duration color constants. Simplified duration_color function by removing redundant comments. - language_prompts.rs: Fixed test assertions to match actual prompt content ("obvious, readable Racket" instead of "RACKET-SPECIFIC GUIDANCE"). All 174+ tests pass. No behavior changes. --- crates/g3-cli/src/filter_json.rs | 60 ++++------ crates/g3-cli/src/language_prompts.rs | 4 +- crates/g3-cli/src/ui_writer_impl.rs | 20 ++-- crates/g3-core/src/streaming_parser.rs | 159 +++++++++++-------------- 4 files changed, 105 insertions(+), 138 deletions(-) diff --git a/crates/g3-cli/src/filter_json.rs b/crates/g3-cli/src/filter_json.rs index b9dad8f..f4d7d6f 100644 --- a/crates/g3-cli/src/filter_json.rs +++ b/crates/g3-cli/src/filter_json.rs @@ -106,64 +106,44 @@ fn check_tool_pattern(buffer: &str) -> Option { if !buffer.starts_with('{') { return Some(false); } - - let after_brace = &buffer[1..]; - - // Skip leading whitespace after { - let trimmed = after_brace.trim_start(); - + + let trimmed = buffer[1..].trim_start(); + // Need at least `"tool":"` = 8 chars after whitespace if trimmed.len() < 8 { - // Not enough data yet - but check for early rejection - if trimmed.starts_with('"') { - let after_quote = &trimmed[1..]; - // If we have chars after the quote, check if it starts with 't' - if !after_quote.is_empty() && !after_quote.starts_with('t') { - return Some(false); // Definitely not "tool - } - if after_quote.len() >= 2 && !after_quote.starts_with("to") { - return Some(false); - } - if after_quote.len() >= 3 && !after_quote.starts_with("too") { - return Some(false); - } - if after_quote.len() >= 4 && !after_quote.starts_with("tool") { - return Some(false); + // Early rejection: check progressive prefix of "tool + if let Some(after_quote) = trimmed.strip_prefix('"') { + // Check each prefix of "tool" we have so far + for (i, expected) in ["t", "to", "too", "tool"].iter().enumerate() { + if after_quote.len() > i && !after_quote.starts_with(expected) { + return Some(false); + } } } else if !trimmed.is_empty() && !trimmed.starts_with('"') { - // First non-whitespace char after { is not " - not a tool call return Some(false); } - return None; // Need more data + return None; } - - // We have enough data - check the full pattern - // Must be: "tool" followed by optional whitespace, :, optional whitespace, " + + // Full pattern check: "tool" : " if !trimmed.starts_with("\"tool\"") { return Some(false); } - - let after_tool = trimmed[6..].trim_start(); // 6 = len of "tool" - + + let after_tool = trimmed[6..].trim_start(); if after_tool.is_empty() { - return None; // Need more data + return None; } - if !after_tool.starts_with(':') { return Some(false); } - + let after_colon = after_tool[1..].trim_start(); - if after_colon.is_empty() { - return None; // Need more data + return None; } - - if after_colon.starts_with('"') { - return Some(true); // Confirmed tool call! - } - - Some(false) // Has : but not followed by " + + Some(after_colon.starts_with('"')) } /// Filters JSON tool calls from streaming LLM content. diff --git a/crates/g3-cli/src/language_prompts.rs b/crates/g3-cli/src/language_prompts.rs index 4a15b1f..e9c677d 100644 --- a/crates/g3-cli/src/language_prompts.rs +++ b/crates/g3-cli/src/language_prompts.rs @@ -224,7 +224,7 @@ mod tests { fn test_carmack_racket_prompt_embedded() { let prompt = get_agent_language_prompt("carmack", "racket"); assert!(prompt.is_some()); - assert!(prompt.unwrap().contains("RACKET-SPECIFIC GUIDANCE")); + assert!(prompt.unwrap().contains("obvious, readable Racket")); } #[test] @@ -242,6 +242,6 @@ mod tests { let prompts = get_agent_language_prompts_for_workspace(temp_dir.path(), "carmack"); assert!(prompts.is_some()); let content = prompts.unwrap(); - assert!(content.contains("RACKET-SPECIFIC GUIDANCE")); + assert!(content.contains("obvious, readable Racket")); } } diff --git a/crates/g3-cli/src/ui_writer_impl.rs b/crates/g3-cli/src/ui_writer_impl.rs index 4c2c64e..962eb2f 100644 --- a/crates/g3-cli/src/ui_writer_impl.rs +++ b/crates/g3-cli/src/ui_writer_impl.rs @@ -8,6 +8,13 @@ use termimad::MadSkin; /// Padding width for tool names in compact display (longest tool: "str_replace" = 11 chars) const TOOL_NAME_PADDING: usize = 11; +/// ANSI escape codes +mod ansi { + pub const YELLOW: &str = "\x1b[33m"; + pub const ORANGE: &str = "\x1b[38;5;208m"; + pub const RED: &str = "\x1b[31m"; +} + /// ANSI color codes for tool names const TOOL_COLOR_NORMAL: &str = "\x1b[32m"; const TOOL_COLOR_NORMAL_BOLD: &str = "\x1b[1;32m"; @@ -129,30 +136,27 @@ pub struct ConsoleUiWriter { /// ANSI color code for duration display based on elapsed time. /// Returns empty string for fast operations, yellow/orange/red for slower ones. fn duration_color(duration_str: &str) -> &'static str { - // Format: "500ms", "1.5s", "2m 30.0s" if duration_str.ends_with("ms") { - return ""; // Sub-second: no color + return ""; } if let Some(m_pos) = duration_str.find('m') { - // Contains minutes (e.g., "2m 30.0s") if let Ok(minutes) = duration_str[..m_pos].trim().parse::() { return match minutes { - 5.. => "\x1b[31m", // Red: >= 5 minutes - 1.. => "\x1b[38;5;208m", // Orange: 1-4 minutes + 5.. => ansi::RED, + 1.. => ansi::ORANGE, _ => "", }; } } else if let Some(s_value) = duration_str.strip_suffix('s') { - // Seconds only (e.g., "1.5s") if let Ok(seconds) = s_value.trim().parse::() { if seconds >= 1.0 { - return "\x1b[33m"; // Yellow: >= 1 second + return ansi::YELLOW; } } } - "" // Default: no color + "" } impl ConsoleUiWriter { diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index 6b8cba6..3abbae1 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -20,6 +20,13 @@ const TOOL_CALL_PATTERNS: [&str; 4] = [ r#"{ "tool" :"#, ]; +/// Search direction for tool call pattern matching. +#[derive(Clone, Copy, PartialEq)] +enum SearchDirection { + Forward, + Backward, +} + /// Modern streaming tool parser that properly handles native tool calls and SSE chunks. #[derive(Debug)] pub struct StreamingToolParser { @@ -53,59 +60,60 @@ impl StreamingToolParser { } } - /// Find the starting position of the last tool call pattern in the given text, - /// but ONLY if it appears on its own line (preceded by newline or at start of text, - /// with only whitespace before the pattern on that line). - /// Returns None if no valid tool call pattern is found. - pub fn find_last_tool_call_start(text: &str) -> Option { + /// Find a tool call pattern in text, searching in the specified direction. + /// Only matches patterns on their own line (at start or after newline + whitespace). + fn find_tool_call_start(text: &str, direction: SearchDirection) -> Option { let mut best_start: Option = None; + for pattern in &TOOL_CALL_PATTERNS { - let mut search_end = text.len(); - while search_end > 0 { - if let Some(pos) = text[..search_end].rfind(pattern) { - // Check if this pattern is on its own line - if Self::is_on_own_line(text, pos) { - if best_start.map_or(true, |best| pos > best) { - best_start = Some(pos); + match direction { + SearchDirection::Forward => { + let mut search_start = 0; + while search_start < text.len() { + if let Some(rel) = text[search_start..].find(pattern) { + let pos = search_start + rel; + if Self::is_on_own_line(text, pos) { + if best_start.map_or(true, |best| pos < best) { + best_start = Some(pos); + } + break; + } + search_start = pos + 1; + } else { + break; + } + } + } + SearchDirection::Backward => { + let mut search_end = text.len(); + while search_end > 0 { + if let Some(pos) = text[..search_end].rfind(pattern) { + if Self::is_on_own_line(text, pos) { + if best_start.map_or(true, |best| pos > best) { + best_start = Some(pos); + } + break; + } + search_end = pos; + } else { + break; } - break; // Found a valid one for this pattern } - // Not on its own line, keep searching backwards - search_end = pos; - } else { - break; } } } + best_start } - /// Find the starting position of the FIRST tool call pattern in the given text, - /// but ONLY if it appears on its own line (preceded by newline or at start of text, - /// with only whitespace before the pattern on that line). - /// Returns None if no valid tool call pattern is found. + /// Find the starting position of the FIRST tool call pattern on its own line. pub fn find_first_tool_call_start(text: &str) -> Option { - let mut best_start: Option = None; - for pattern in &TOOL_CALL_PATTERNS { - let mut search_start = 0; - while search_start < text.len() { - if let Some(relative_pos) = text[search_start..].find(pattern) { - let pos = search_start + relative_pos; - // Check if this pattern is on its own line - if Self::is_on_own_line(text, pos) { - if best_start.map_or(true, |best| pos < best) { - best_start = Some(pos); - } - break; // Found a valid one for this pattern - } - // Not on its own line, keep searching forward - search_start = pos + 1; - } else { - break; - } - } - } - best_start + Self::find_tool_call_start(text, SearchDirection::Forward) + } + + /// Find the starting position of the LAST tool call pattern on its own line. + pub fn find_last_tool_call_start(text: &str) -> Option { + Self::find_tool_call_start(text, SearchDirection::Backward) } /// Check if a position in text is "on its own line" - meaning it's either @@ -384,18 +392,10 @@ impl StreamingToolParser { } /// Check if a partial JSON tool call has been invalidated by subsequent content. - /// - /// This detects cases where we started parsing what looked like a tool call - /// (e.g., `{"tool": "read_file`) but subsequent content makes it clear this - /// isn't valid JSON (e.g., a newline followed by regular prose). - /// - /// The key insight: in valid JSON, after an open quote for a string value, - /// we must see the string content and closing quote before any unescaped newline. - /// If we see a newline followed by text that looks like prose (starts with a letter), - /// this can't be a valid JSON tool call. - /// - /// Additionally, an unescaped newline INSIDE a string is invalid JSON. So if we're - /// in a string and see a newline (not escaped), the JSON is invalid. + /// + /// Detects two invalidation cases: + /// 1. Unescaped newline inside a JSON string (invalid JSON) + /// 2. Newline followed by non-JSON prose (e.g., regular text, not `"`, `{`, `}`, etc.) fn is_json_invalidated(json_text: &str) -> bool { let mut in_string = false; let mut escape_next = false; @@ -410,51 +410,34 @@ impl StreamingToolParser { match ch { '\\' => escape_next = true, '"' => in_string = !in_string, - '\n' if in_string => { - // Unescaped newline inside a string is invalid JSON! - // Valid JSON strings cannot contain literal newlines (must be \n). - return true; - } + // Unescaped newline inside a string is invalid JSON + '\n' if in_string => return true, '\n' if !in_string => { - // We hit a newline outside of a string. - // Check what comes after - if it's regular prose, this isn't valid JSON. - - // Skip any whitespace after the newline + // Skip whitespace after newline while let Some(&(_, next_ch)) = chars.peek() { - if next_ch == ' ' || next_ch == '\t' { - chars.next(); - } else { - break; + if next_ch != ' ' && next_ch != '\t' { + break } + chars.next(); } - - // Check the first non-whitespace character after the newline + + // Check if next char is valid JSON continuation if let Some(&(_, next_ch)) = chars.peek() { - // Valid JSON continuation characters after newline: - // - '"' (string) - // - '{' or '}' (object) - // - '[' or ']' (array) - // - digits (number) - // - 't', 'f', 'n' could be true/false/null but also prose - // - ':' or ',' (separators) - // - // If we see a letter that's clearly prose (not t/f/n at start of token), - // or other non-JSON characters, this is invalidated. - let is_valid_json_continuation = matches!(next_ch, - '"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | - '0'..='9' | 't' | 'f' | 'n' | '\n' + // Valid: ", {, }, [, ], :, ,, -, digits, t/f/n (true/false/null), newline + let valid_json_char = matches!( + next_ch, + '"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | '0'..='9' | 't' | 'f' | 'n' | '\n' ); - - if !is_valid_json_continuation { - return true; // Invalidated! + if !valid_json_char { + return true; } } } _ => {} } } - - false // Not invalidated (yet) + + false } /// Find the end position (byte index) of a complete JSON object in the text. @@ -473,7 +456,7 @@ impl StreamingToolParser { match ch { '\\' => escape_next = true, - '"' if !escape_next => in_string = !in_string, + '"' => in_string = !in_string, '{' if !in_string => { brace_count += 1; found_start = true;