refactor: improve readability of streaming parser and JSON filter
Agent: carmack
Changes:
- streaming_parser.rs: Unified find_first/last_tool_call_start into single
find_tool_call_start with SearchDirection enum, reducing duplication.
Simplified is_json_invalidated from 45 to 20 lines with clearer logic.
Fixed redundant !escape_next check in find_complete_json_object_end.
- filter_json.rs: Simplified check_tool_pattern from 40 to 24 lines.
Replaced repetitive prefix checks with loop over ["t", "to", "too", "tool"].
Reduced trailing return statements with direct expression returns.
- ui_writer_impl.rs: Added ansi module for duration color constants.
Simplified duration_color function by removing redundant comments.
- language_prompts.rs: Fixed test assertions to match actual prompt content
("obvious, readable Racket" instead of "RACKET-SPECIFIC GUIDANCE").
All 174+ tests pass. No behavior changes.
This commit is contained in:
@@ -107,63 +107,43 @@ fn check_tool_pattern(buffer: &str) -> Option<bool> {
|
|||||||
return Some(false);
|
return Some(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
let after_brace = &buffer[1..];
|
let trimmed = buffer[1..].trim_start();
|
||||||
|
|
||||||
// Skip leading whitespace after {
|
|
||||||
let trimmed = after_brace.trim_start();
|
|
||||||
|
|
||||||
// Need at least `"tool":"` = 8 chars after whitespace
|
// Need at least `"tool":"` = 8 chars after whitespace
|
||||||
if trimmed.len() < 8 {
|
if trimmed.len() < 8 {
|
||||||
// Not enough data yet - but check for early rejection
|
// Early rejection: check progressive prefix of "tool
|
||||||
if trimmed.starts_with('"') {
|
if let Some(after_quote) = trimmed.strip_prefix('"') {
|
||||||
let after_quote = &trimmed[1..];
|
// Check each prefix of "tool" we have so far
|
||||||
// If we have chars after the quote, check if it starts with 't'
|
for (i, expected) in ["t", "to", "too", "tool"].iter().enumerate() {
|
||||||
if !after_quote.is_empty() && !after_quote.starts_with('t') {
|
if after_quote.len() > i && !after_quote.starts_with(expected) {
|
||||||
return Some(false); // Definitely not "tool
|
return Some(false);
|
||||||
}
|
}
|
||||||
if after_quote.len() >= 2 && !after_quote.starts_with("to") {
|
|
||||||
return Some(false);
|
|
||||||
}
|
|
||||||
if after_quote.len() >= 3 && !after_quote.starts_with("too") {
|
|
||||||
return Some(false);
|
|
||||||
}
|
|
||||||
if after_quote.len() >= 4 && !after_quote.starts_with("tool") {
|
|
||||||
return Some(false);
|
|
||||||
}
|
}
|
||||||
} else if !trimmed.is_empty() && !trimmed.starts_with('"') {
|
} else if !trimmed.is_empty() && !trimmed.starts_with('"') {
|
||||||
// First non-whitespace char after { is not " - not a tool call
|
|
||||||
return Some(false);
|
return Some(false);
|
||||||
}
|
}
|
||||||
return None; // Need more data
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We have enough data - check the full pattern
|
// Full pattern check: "tool" : "
|
||||||
// Must be: "tool" followed by optional whitespace, :, optional whitespace, "
|
|
||||||
if !trimmed.starts_with("\"tool\"") {
|
if !trimmed.starts_with("\"tool\"") {
|
||||||
return Some(false);
|
return Some(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
let after_tool = trimmed[6..].trim_start(); // 6 = len of "tool"
|
let after_tool = trimmed[6..].trim_start();
|
||||||
|
|
||||||
if after_tool.is_empty() {
|
if after_tool.is_empty() {
|
||||||
return None; // Need more data
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
if !after_tool.starts_with(':') {
|
if !after_tool.starts_with(':') {
|
||||||
return Some(false);
|
return Some(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
let after_colon = after_tool[1..].trim_start();
|
let after_colon = after_tool[1..].trim_start();
|
||||||
|
|
||||||
if after_colon.is_empty() {
|
if after_colon.is_empty() {
|
||||||
return None; // Need more data
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
if after_colon.starts_with('"') {
|
Some(after_colon.starts_with('"'))
|
||||||
return Some(true); // Confirmed tool call!
|
|
||||||
}
|
|
||||||
|
|
||||||
Some(false) // Has : but not followed by "
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Filters JSON tool calls from streaming LLM content.
|
/// Filters JSON tool calls from streaming LLM content.
|
||||||
|
|||||||
@@ -224,7 +224,7 @@ mod tests {
|
|||||||
fn test_carmack_racket_prompt_embedded() {
|
fn test_carmack_racket_prompt_embedded() {
|
||||||
let prompt = get_agent_language_prompt("carmack", "racket");
|
let prompt = get_agent_language_prompt("carmack", "racket");
|
||||||
assert!(prompt.is_some());
|
assert!(prompt.is_some());
|
||||||
assert!(prompt.unwrap().contains("RACKET-SPECIFIC GUIDANCE"));
|
assert!(prompt.unwrap().contains("obvious, readable Racket"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -242,6 +242,6 @@ mod tests {
|
|||||||
let prompts = get_agent_language_prompts_for_workspace(temp_dir.path(), "carmack");
|
let prompts = get_agent_language_prompts_for_workspace(temp_dir.path(), "carmack");
|
||||||
assert!(prompts.is_some());
|
assert!(prompts.is_some());
|
||||||
let content = prompts.unwrap();
|
let content = prompts.unwrap();
|
||||||
assert!(content.contains("RACKET-SPECIFIC GUIDANCE"));
|
assert!(content.contains("obvious, readable Racket"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -8,6 +8,13 @@ use termimad::MadSkin;
|
|||||||
/// Padding width for tool names in compact display (longest tool: "str_replace" = 11 chars)
|
/// Padding width for tool names in compact display (longest tool: "str_replace" = 11 chars)
|
||||||
const TOOL_NAME_PADDING: usize = 11;
|
const TOOL_NAME_PADDING: usize = 11;
|
||||||
|
|
||||||
|
/// ANSI escape codes
|
||||||
|
mod ansi {
|
||||||
|
pub const YELLOW: &str = "\x1b[33m";
|
||||||
|
pub const ORANGE: &str = "\x1b[38;5;208m";
|
||||||
|
pub const RED: &str = "\x1b[31m";
|
||||||
|
}
|
||||||
|
|
||||||
/// ANSI color codes for tool names
|
/// ANSI color codes for tool names
|
||||||
const TOOL_COLOR_NORMAL: &str = "\x1b[32m";
|
const TOOL_COLOR_NORMAL: &str = "\x1b[32m";
|
||||||
const TOOL_COLOR_NORMAL_BOLD: &str = "\x1b[1;32m";
|
const TOOL_COLOR_NORMAL_BOLD: &str = "\x1b[1;32m";
|
||||||
@@ -129,30 +136,27 @@ pub struct ConsoleUiWriter {
|
|||||||
/// ANSI color code for duration display based on elapsed time.
|
/// ANSI color code for duration display based on elapsed time.
|
||||||
/// Returns empty string for fast operations, yellow/orange/red for slower ones.
|
/// Returns empty string for fast operations, yellow/orange/red for slower ones.
|
||||||
fn duration_color(duration_str: &str) -> &'static str {
|
fn duration_color(duration_str: &str) -> &'static str {
|
||||||
// Format: "500ms", "1.5s", "2m 30.0s"
|
|
||||||
if duration_str.ends_with("ms") {
|
if duration_str.ends_with("ms") {
|
||||||
return ""; // Sub-second: no color
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(m_pos) = duration_str.find('m') {
|
if let Some(m_pos) = duration_str.find('m') {
|
||||||
// Contains minutes (e.g., "2m 30.0s")
|
|
||||||
if let Ok(minutes) = duration_str[..m_pos].trim().parse::<u32>() {
|
if let Ok(minutes) = duration_str[..m_pos].trim().parse::<u32>() {
|
||||||
return match minutes {
|
return match minutes {
|
||||||
5.. => "\x1b[31m", // Red: >= 5 minutes
|
5.. => ansi::RED,
|
||||||
1.. => "\x1b[38;5;208m", // Orange: 1-4 minutes
|
1.. => ansi::ORANGE,
|
||||||
_ => "",
|
_ => "",
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} else if let Some(s_value) = duration_str.strip_suffix('s') {
|
} else if let Some(s_value) = duration_str.strip_suffix('s') {
|
||||||
// Seconds only (e.g., "1.5s")
|
|
||||||
if let Ok(seconds) = s_value.trim().parse::<f64>() {
|
if let Ok(seconds) = s_value.trim().parse::<f64>() {
|
||||||
if seconds >= 1.0 {
|
if seconds >= 1.0 {
|
||||||
return "\x1b[33m"; // Yellow: >= 1 second
|
return ansi::YELLOW;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
"" // Default: no color
|
""
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ConsoleUiWriter {
|
impl ConsoleUiWriter {
|
||||||
|
|||||||
@@ -20,6 +20,13 @@ const TOOL_CALL_PATTERNS: [&str; 4] = [
|
|||||||
r#"{ "tool" :"#,
|
r#"{ "tool" :"#,
|
||||||
];
|
];
|
||||||
|
|
||||||
|
/// Search direction for tool call pattern matching.
|
||||||
|
#[derive(Clone, Copy, PartialEq)]
|
||||||
|
enum SearchDirection {
|
||||||
|
Forward,
|
||||||
|
Backward,
|
||||||
|
}
|
||||||
|
|
||||||
/// Modern streaming tool parser that properly handles native tool calls and SSE chunks.
|
/// Modern streaming tool parser that properly handles native tool calls and SSE chunks.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct StreamingToolParser {
|
pub struct StreamingToolParser {
|
||||||
@@ -53,59 +60,60 @@ impl StreamingToolParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Find the starting position of the last tool call pattern in the given text,
|
/// Find a tool call pattern in text, searching in the specified direction.
|
||||||
/// but ONLY if it appears on its own line (preceded by newline or at start of text,
|
/// Only matches patterns on their own line (at start or after newline + whitespace).
|
||||||
/// with only whitespace before the pattern on that line).
|
fn find_tool_call_start(text: &str, direction: SearchDirection) -> Option<usize> {
|
||||||
/// Returns None if no valid tool call pattern is found.
|
|
||||||
pub fn find_last_tool_call_start(text: &str) -> Option<usize> {
|
|
||||||
let mut best_start: Option<usize> = None;
|
let mut best_start: Option<usize> = None;
|
||||||
|
|
||||||
for pattern in &TOOL_CALL_PATTERNS {
|
for pattern in &TOOL_CALL_PATTERNS {
|
||||||
let mut search_end = text.len();
|
match direction {
|
||||||
while search_end > 0 {
|
SearchDirection::Forward => {
|
||||||
if let Some(pos) = text[..search_end].rfind(pattern) {
|
let mut search_start = 0;
|
||||||
// Check if this pattern is on its own line
|
while search_start < text.len() {
|
||||||
if Self::is_on_own_line(text, pos) {
|
if let Some(rel) = text[search_start..].find(pattern) {
|
||||||
if best_start.map_or(true, |best| pos > best) {
|
let pos = search_start + rel;
|
||||||
best_start = Some(pos);
|
if Self::is_on_own_line(text, pos) {
|
||||||
|
if best_start.map_or(true, |best| pos < best) {
|
||||||
|
best_start = Some(pos);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
search_start = pos + 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SearchDirection::Backward => {
|
||||||
|
let mut search_end = text.len();
|
||||||
|
while search_end > 0 {
|
||||||
|
if let Some(pos) = text[..search_end].rfind(pattern) {
|
||||||
|
if Self::is_on_own_line(text, pos) {
|
||||||
|
if best_start.map_or(true, |best| pos > best) {
|
||||||
|
best_start = Some(pos);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
search_end = pos;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break; // Found a valid one for this pattern
|
|
||||||
}
|
}
|
||||||
// Not on its own line, keep searching backwards
|
|
||||||
search_end = pos;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
best_start
|
best_start
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Find the starting position of the FIRST tool call pattern in the given text,
|
/// Find the starting position of the FIRST tool call pattern on its own line.
|
||||||
/// but ONLY if it appears on its own line (preceded by newline or at start of text,
|
|
||||||
/// with only whitespace before the pattern on that line).
|
|
||||||
/// Returns None if no valid tool call pattern is found.
|
|
||||||
pub fn find_first_tool_call_start(text: &str) -> Option<usize> {
|
pub fn find_first_tool_call_start(text: &str) -> Option<usize> {
|
||||||
let mut best_start: Option<usize> = None;
|
Self::find_tool_call_start(text, SearchDirection::Forward)
|
||||||
for pattern in &TOOL_CALL_PATTERNS {
|
}
|
||||||
let mut search_start = 0;
|
|
||||||
while search_start < text.len() {
|
/// Find the starting position of the LAST tool call pattern on its own line.
|
||||||
if let Some(relative_pos) = text[search_start..].find(pattern) {
|
pub fn find_last_tool_call_start(text: &str) -> Option<usize> {
|
||||||
let pos = search_start + relative_pos;
|
Self::find_tool_call_start(text, SearchDirection::Backward)
|
||||||
// Check if this pattern is on its own line
|
|
||||||
if Self::is_on_own_line(text, pos) {
|
|
||||||
if best_start.map_or(true, |best| pos < best) {
|
|
||||||
best_start = Some(pos);
|
|
||||||
}
|
|
||||||
break; // Found a valid one for this pattern
|
|
||||||
}
|
|
||||||
// Not on its own line, keep searching forward
|
|
||||||
search_start = pos + 1;
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
best_start
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if a position in text is "on its own line" - meaning it's either
|
/// Check if a position in text is "on its own line" - meaning it's either
|
||||||
@@ -385,17 +393,9 @@ impl StreamingToolParser {
|
|||||||
|
|
||||||
/// Check if a partial JSON tool call has been invalidated by subsequent content.
|
/// Check if a partial JSON tool call has been invalidated by subsequent content.
|
||||||
///
|
///
|
||||||
/// This detects cases where we started parsing what looked like a tool call
|
/// Detects two invalidation cases:
|
||||||
/// (e.g., `{"tool": "read_file`) but subsequent content makes it clear this
|
/// 1. Unescaped newline inside a JSON string (invalid JSON)
|
||||||
/// isn't valid JSON (e.g., a newline followed by regular prose).
|
/// 2. Newline followed by non-JSON prose (e.g., regular text, not `"`, `{`, `}`, etc.)
|
||||||
///
|
|
||||||
/// The key insight: in valid JSON, after an open quote for a string value,
|
|
||||||
/// we must see the string content and closing quote before any unescaped newline.
|
|
||||||
/// If we see a newline followed by text that looks like prose (starts with a letter),
|
|
||||||
/// this can't be a valid JSON tool call.
|
|
||||||
///
|
|
||||||
/// Additionally, an unescaped newline INSIDE a string is invalid JSON. So if we're
|
|
||||||
/// in a string and see a newline (not escaped), the JSON is invalid.
|
|
||||||
fn is_json_invalidated(json_text: &str) -> bool {
|
fn is_json_invalidated(json_text: &str) -> bool {
|
||||||
let mut in_string = false;
|
let mut in_string = false;
|
||||||
let mut escape_next = false;
|
let mut escape_next = false;
|
||||||
@@ -410,43 +410,26 @@ impl StreamingToolParser {
|
|||||||
match ch {
|
match ch {
|
||||||
'\\' => escape_next = true,
|
'\\' => escape_next = true,
|
||||||
'"' => in_string = !in_string,
|
'"' => in_string = !in_string,
|
||||||
'\n' if in_string => {
|
// Unescaped newline inside a string is invalid JSON
|
||||||
// Unescaped newline inside a string is invalid JSON!
|
'\n' if in_string => return true,
|
||||||
// Valid JSON strings cannot contain literal newlines (must be \n).
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
'\n' if !in_string => {
|
'\n' if !in_string => {
|
||||||
// We hit a newline outside of a string.
|
// Skip whitespace after newline
|
||||||
// Check what comes after - if it's regular prose, this isn't valid JSON.
|
|
||||||
|
|
||||||
// Skip any whitespace after the newline
|
|
||||||
while let Some(&(_, next_ch)) = chars.peek() {
|
while let Some(&(_, next_ch)) = chars.peek() {
|
||||||
if next_ch == ' ' || next_ch == '\t' {
|
if next_ch != ' ' && next_ch != '\t' {
|
||||||
chars.next();
|
break
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
chars.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the first non-whitespace character after the newline
|
// Check if next char is valid JSON continuation
|
||||||
if let Some(&(_, next_ch)) = chars.peek() {
|
if let Some(&(_, next_ch)) = chars.peek() {
|
||||||
// Valid JSON continuation characters after newline:
|
// Valid: ", {, }, [, ], :, ,, -, digits, t/f/n (true/false/null), newline
|
||||||
// - '"' (string)
|
let valid_json_char = matches!(
|
||||||
// - '{' or '}' (object)
|
next_ch,
|
||||||
// - '[' or ']' (array)
|
'"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | '0'..='9' | 't' | 'f' | 'n' | '\n'
|
||||||
// - digits (number)
|
|
||||||
// - 't', 'f', 'n' could be true/false/null but also prose
|
|
||||||
// - ':' or ',' (separators)
|
|
||||||
//
|
|
||||||
// If we see a letter that's clearly prose (not t/f/n at start of token),
|
|
||||||
// or other non-JSON characters, this is invalidated.
|
|
||||||
let is_valid_json_continuation = matches!(next_ch,
|
|
||||||
'"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' |
|
|
||||||
'0'..='9' | 't' | 'f' | 'n' | '\n'
|
|
||||||
);
|
);
|
||||||
|
if !valid_json_char {
|
||||||
if !is_valid_json_continuation {
|
return true;
|
||||||
return true; // Invalidated!
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -454,7 +437,7 @@ impl StreamingToolParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
false // Not invalidated (yet)
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Find the end position (byte index) of a complete JSON object in the text.
|
/// Find the end position (byte index) of a complete JSON object in the text.
|
||||||
@@ -473,7 +456,7 @@ impl StreamingToolParser {
|
|||||||
|
|
||||||
match ch {
|
match ch {
|
||||||
'\\' => escape_next = true,
|
'\\' => escape_next = true,
|
||||||
'"' if !escape_next => in_string = !in_string,
|
'"' => in_string = !in_string,
|
||||||
'{' if !in_string => {
|
'{' if !in_string => {
|
||||||
brace_count += 1;
|
brace_count += 1;
|
||||||
found_start = true;
|
found_start = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user