diff --git a/crates/g3-cli/src/commands.rs b/crates/g3-cli/src/commands.rs index e74aedb..4d39eea 100644 --- a/crates/g3-cli/src/commands.rs +++ b/crates/g3-cli/src/commands.rs @@ -1,6 +1,6 @@ //! Interactive command handlers for G3 CLI. //! -//! Handles `/` commands in interactive mode. +//! Handles `/` commands in interactive mode (help, compact, research, etc.). use anyhow::Result; use rustyline::Editor; @@ -16,6 +16,33 @@ use crate::project::load_and_validate_project; use crate::template::process_template; use crate::task_execution::execute_task_with_retry; +// --- Research command helpers --- + +fn format_research_task_summary(task: &g3_core::pending_research::ResearchTask) -> String { + let status_emoji = match task.status { + g3_core::pending_research::ResearchStatus::Pending => "🔄", + g3_core::pending_research::ResearchStatus::Complete => "✅", + g3_core::pending_research::ResearchStatus::Failed => "❌", + }; + let injected = if task.injected { " (injected)" } else { "" }; + let query_preview = if task.query.len() > 60 { + format!("{}...", task.query.chars().take(57).collect::()) + } else { + task.query.clone() + }; + format!( + " {} `{}` - {} ({}){}\n Query: {}", + status_emoji, task.id, task.status, task.elapsed_display(), injected, query_preview + ) +} + +fn format_research_report_header(task: &g3_core::pending_research::ResearchTask) -> String { + format!( + "📋 Research Report: `{}`\n\nQuery: {}\n\nStatus: {} | Elapsed: {}\n\n{}", + task.id, task.query, task.status, task.elapsed_display(), "─".repeat(60) + ) +} + /// Handle a control command. Returns true if the command was handled and the loop should continue. pub async fn handle_command( input: &str, @@ -135,89 +162,49 @@ pub async fn handle_command( } cmd if cmd == "/research" || cmd.starts_with("/research ") => { let manager = agent.get_pending_research_manager(); - - // Parse argument: /research, /research latest, /research let arg = cmd.strip_prefix("/research").unwrap_or("").trim(); if arg.is_empty() { - // List all research tasks let all_tasks = manager.list_all(); - if all_tasks.is_empty() { - output.print("📋 No research tasks (pending or completed)."); + output.print("📋 No research tasks (pending or completed)."); } else { - output.print(&format!("📋 Research Tasks ({} total):\n", all_tasks.len())); - - for task in all_tasks { - let status_emoji = match task.status { - g3_core::pending_research::ResearchStatus::Pending => "🔄", - g3_core::pending_research::ResearchStatus::Complete => "✅", - g3_core::pending_research::ResearchStatus::Failed => "❌", - }; - - let injected_marker = if task.injected { " (injected)" } else { "" }; - - output.print(&format!( - " {} `{}` - {} ({}){}\n Query: {}", - status_emoji, - task.id, - task.status, - task.elapsed_display(), - injected_marker, - if task.query.len() > 60 { - format!("{}...", &task.query.chars().take(57).collect::()) - } else { - task.query.clone() - } - )); - output.print(""); + output.print(&format!("📋 Research Tasks ({} total):\n", all_tasks.len())); + for task in all_tasks { + output.print(&format_research_task_summary(&task)); + output.print(""); } } } else if arg == "latest" { - // Show the most recent research report let all_tasks = manager.list_all(); - - // Find the most recent completed task (smallest elapsed time = most recent) let latest = all_tasks.iter() .filter(|t| t.status != g3_core::pending_research::ResearchStatus::Pending) .min_by_key(|t| t.started_at.elapsed()); match latest { Some(task) => { - output.print(&format!("📋 Research Report: `{}`\n", task.id)); - output.print(&format!("Query: {}\n", task.query)); - output.print(&format!("Status: {} | Elapsed: {}\n", task.status, task.elapsed_display())); - output.print(&"─".repeat(60)); - if let Some(ref result) = task.result { - output.print(result); - } else { - output.print("(No report content available)"); - } + output.print(&format_research_report_header(task)); + output.print(task.result.as_deref().unwrap_or("(No report content available)")); } None => { output.print("📋 No completed research tasks yet."); } } } else { - // View a specific research report by ID - let task_id = arg.to_string(); - - match manager.get(&task_id) { + match manager.get(&arg.to_string()) { Some(task) => { - output.print(&format!("📋 Research Report: `{}`\n", task.id)); - output.print(&format!("Query: {}\n", task.query)); - output.print(&format!("Status: {} | Elapsed: {}\n", task.status, task.elapsed_display())); - output.print(&"─".repeat(60)); - if let Some(ref result) = task.result { - output.print(result); + output.print(&format_research_report_header(&task)); + let content = if let Some(ref result) = task.result { + result.as_str() } else if task.status == g3_core::pending_research::ResearchStatus::Pending { - output.print("(Research still in progress...)"); + "(Research still in progress...)" } else { - output.print("(No report content available)"); - } + "(No report content available)" + }; + output.print(content); } None => { - output.print(&format!("❓ No research task found with id: `{}`", task_id)); + output.print(&format!("❓ No research task found with id: `{}`", arg)); } } } diff --git a/crates/g3-cli/src/input_formatter.rs b/crates/g3-cli/src/input_formatter.rs index 7c1e434..95bc34f 100644 --- a/crates/g3-cli/src/input_formatter.rs +++ b/crates/g3-cli/src/input_formatter.rs @@ -1,129 +1,102 @@ //! Input formatting for interactive mode. //! -//! Formats user input with markdown-style highlighting: -//! - ALL CAPS words become bold -//! - Quoted text ("..." or '...') becomes cyan -//! - Standard markdown formatting (bold, italic, code) is applied +//! Applies visual highlighting to user input: +//! - ALL CAPS words (2+ chars) → bold green +//! - Quoted text ("..." or '...') → cyan +//! - Standard markdown (bold, italic, code) via termimad use crossterm::terminal; use regex::Regex; use std::io::Write; use std::io::IsTerminal; +use once_cell::sync::Lazy; use termimad::MadSkin; use crate::streaming_markdown::StreamingMarkdownFormatter; -/// Pre-process input text to add markdown markers for special formatting. -/// -/// This pass runs BEFORE markdown formatting: -/// 1. ALL CAPS words (2+ chars) → wrapped in ** for bold -/// 2. Quoted text "..." or '...' → wrapped in special markers for cyan -/// -/// Returns the preprocessed text ready for markdown formatting. +// Compiled regexes for preprocessing (compiled once, reused) +static CAPS_RE: Lazy = Lazy::new(|| { + // ALL CAPS words: 2+ uppercase letters, may include numbers, word boundaries + Regex::new(r"\b([A-Z][A-Z0-9]{1,}[A-Z0-9]*)\b").unwrap() +}); +static DOUBLE_QUOTE_RE: Lazy = Lazy::new(|| Regex::new(r#""([^"]+)""#).unwrap()); +static SINGLE_QUOTE_RE: Lazy = Lazy::new(|| Regex::new(r"'([^']+)'").unwrap()); + +/// Pre-process input to add markdown markers before formatting. +/// ALL CAPS → **bold**, quoted text → special markers for cyan. pub fn preprocess_input(input: &str) -> String { let mut result = input.to_string(); - // First, handle ALL CAPS words (2+ uppercase letters, may include numbers) - // Must be a standalone word (word boundaries) - let caps_re = Regex::new(r"\b([A-Z][A-Z0-9]{1,}[A-Z0-9]*)\b").unwrap(); - result = caps_re.replace_all(&result, "**$1**").to_string(); + // ALL CAPS → **bold** + result = CAPS_RE.replace_all(&result, "**$1**").to_string(); - // Then, handle quoted text - wrap in a special marker that we'll process after markdown - // Use lowercase placeholders that won't be matched by the ALL CAPS regex - let double_quote_re = Regex::new(r#""([^"]+)""#).unwrap(); - result = double_quote_re.replace_all(&result, "\x00qdbl\x00$1\x00qend\x00").to_string(); - - let single_quote_re = Regex::new(r"'([^']+)'").unwrap(); - result = single_quote_re.replace_all(&result, "\x00qsgl\x00$1\x00qend\x00").to_string(); + // Quoted text → markers (processed after markdown to apply cyan) + result = DOUBLE_QUOTE_RE.replace_all(&result, "\x00qdbl\x00$1\x00qend\x00").to_string(); + result = SINGLE_QUOTE_RE.replace_all(&result, "\x00qsgl\x00$1\x00qend\x00").to_string(); result } -/// Apply cyan highlighting to quoted text markers. -/// This runs AFTER markdown formatting to apply the cyan color. +// Regexes for post-processing quote markers into ANSI cyan +static CYAN_DOUBLE_RE: Lazy = Lazy::new(|| { + Regex::new(r#"(\x1b\[36m")([^\x1b]*)\x1b\[0m"#).unwrap() +}); +static CYAN_SINGLE_RE: Lazy = Lazy::new(|| { + Regex::new(r"(\x1b\[36m')([^\x1b]*)\x1b\[0m").unwrap() +}); + +/// Apply cyan highlighting to quoted text markers (runs after markdown formatting). fn apply_quote_highlighting(text: &str) -> String { let mut result = text.to_string(); - // Replace double-quote markers with cyan formatting // \x1b[36m = cyan, \x1b[0m = reset result = result.replace("\x00qdbl\x00", "\x1b[36m\""); result = result.replace("\x00qsgl\x00", "\x1b[36m'"); result = result.replace("\x00qend\x00", "\x1b[0m"); - // Add back the closing quotes - // We need to insert them before the reset code - let re = Regex::new(r#"(\x1b\[36m")([^\x1b]*)\x1b\[0m"#).unwrap(); - result = re.replace_all(&result, |caps: ®ex::Captures| { + // Insert closing quotes before reset code + result = CYAN_DOUBLE_RE.replace_all(&result, |caps: ®ex::Captures| { format!("{}{}\"\x1b[0m", &caps[1], &caps[2]) }).to_string(); - - let re = Regex::new(r"(\x1b\[36m')([^\x1b]*)\x1b\[0m").unwrap(); - result = re.replace_all(&result, |caps: ®ex::Captures| { + result = CYAN_SINGLE_RE.replace_all(&result, |caps: ®ex::Captures| { format!("{}{}'\x1b[0m", &caps[1], &caps[2]) }).to_string(); result } -/// Format user input with markdown and special highlighting. -/// -/// Applies: -/// 1. ALL CAPS → bold (green) -/// 2. Quoted text → cyan -/// 3. Standard markdown (bold, italic, inline code) +/// Format user input with markdown and special highlighting (ALL CAPS, quotes). pub fn format_input(input: &str) -> String { - // Pre-process to add markdown markers let preprocessed = preprocess_input(input); - // Apply markdown formatting using the streaming formatter let skin = MadSkin::default(); let mut formatter = StreamingMarkdownFormatter::new(skin); let formatted = formatter.process(&preprocessed); let formatted = formatted + &formatter.finish(); - // Apply quote highlighting (after markdown so colors don't interfere) apply_quote_highlighting(&formatted) } -/// Reprint user input in place with formatting. -/// -/// This moves the cursor up to overwrite the original input line, -/// then prints the formatted version. -/// -/// Note: This function only performs formatting when stdout is a TTY. -/// In non-TTY contexts (piped output, etc.), it does nothing to avoid -/// corrupting terminal state for subsequent stdin operations. +/// Reprint user input in place with formatting (TTY only). +/// Moves cursor up to overwrite original input, then prints formatted version. pub fn reprint_formatted_input(input: &str, prompt: &str) { - // Only reformat if stdout is a TTY - avoid corrupting terminal state otherwise if !std::io::stdout().is_terminal() { return; } - // Format the input let formatted = format_input(input); - // Get terminal width to calculate visual lines - // The prompt + input may wrap across multiple terminal rows - let term_width = terminal::size() - .map(|(w, _)| w as usize) - .unwrap_or(80); + // Calculate visual lines (prompt + input may wrap across terminal rows) + let term_width = terminal::size().map(|(w, _)| w as usize).unwrap_or(80); + let visual_lines = (prompt.len() + input.len()).div_ceil(term_width).max(1); - // Calculate visual lines: prompt + input length divided by terminal width - // This accounts for line wrapping in the terminal - let total_chars = prompt.len() + input.len(); - let visual_lines = ((total_chars + term_width - 1) / term_width).max(1); // ceiling division - - // Move cursor up by the number of lines and clear + // Move up and clear each line for _ in 0..visual_lines { - // Move up one line and clear it print!("\x1b[1A\x1b[2K"); } - // Reprint with prompt and formatted input - // Use dim color for the prompt to distinguish from the formatted input + // Dim prompt + formatted input println!("\x1b[2m{}\x1b[0m{}", prompt, formatted); - - // Ensure output is flushed let _ = std::io::stdout().flush(); } diff --git a/crates/g3-core/src/pending_research.rs b/crates/g3-core/src/pending_research.rs index 4a9a44b..362ae52 100644 --- a/crates/g3-core/src/pending_research.rs +++ b/crates/g3-core/src/pending_research.rs @@ -366,7 +366,7 @@ mod tests { let id1 = manager.register("Query 1"); let id2 = manager.register("Query 2"); - let id3 = manager.register("Query 3"); + let _id3 = manager.register("Query 3"); // Complete two, leave one pending manager.complete(&id1, "Report 1".to_string()); @@ -415,7 +415,7 @@ mod tests { assert!(manager.format_status_summary().is_none()); // One pending - let id1 = manager.register("Query 1"); + let _id1 = manager.register("Query 1"); let summary = manager.format_status_summary().unwrap(); assert!(summary.contains("1 researching")); diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index 723bd95..a9712bd 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -1,19 +1,21 @@ //! Streaming tool parser for processing LLM response chunks. //! -//! This module handles parsing of tool calls from streaming LLM responses, -//! supporting both native tool calls and JSON-based fallback parsing. +//! Parses tool calls from streaming LLM responses, supporting: +//! - Native tool calls (returned directly by the provider) +//! - JSON-based fallback parsing (for embedded models) //! -//! **Important**: JSON tool calls are only recognized when they appear on their -//! own line (preceded by a newline or at the start of the buffer). This prevents -//! inline JSON examples in prose from being incorrectly parsed as tool calls. +//! # JSON Tool Call Recognition +//! +//! To prevent false positives from JSON examples in prose, tool calls are only +//! recognized when they appear "on their own line" - either at the start of the +//! buffer or preceded by a newline (with optional whitespace). use tracing::debug; use crate::ToolCall; -/// Patterns used to detect JSON tool calls in text. -/// These cover common whitespace variations in JSON formatting. -const TOOL_CALL_PATTERNS: [&str; 4] = [ +/// JSON patterns that indicate a tool call. Covers common whitespace variations. +const TOOL_CALL_PATTERNS: &[&str] = &[ r#"{"tool":"#, r#"{ "tool":"#, r#"{"tool" :"#, @@ -24,10 +26,7 @@ const TOOL_CALL_PATTERNS: [&str; 4] = [ // Code Fence Tracking // ============================================================================ -/// Tracks whether we're inside a markdown code fence (``` block). -/// -/// Used during streaming to avoid parsing JSON examples inside code blocks -/// as tool calls. +/// Tracks code fence state to avoid parsing JSON examples inside ``` blocks. #[derive(Debug, Default)] struct CodeFenceTracker { /// Whether we're currently inside a code fence @@ -53,10 +52,8 @@ impl CodeFenceTracker { } } - /// Check if current_line is a code fence marker and toggle state if so. fn check_and_toggle_fence(&mut self) { - let trimmed = self.current_line.trim_start(); - if trimmed.starts_with("```") && trimmed.chars().take_while(|&c| c == '`').count() >= 3 { + if self.current_line.trim_start().starts_with("```") { self.in_fence = !self.in_fence; debug!( "Code fence toggled: in_fence={} (line: {:?})", @@ -75,9 +72,7 @@ impl CodeFenceTracker { } } -/// Find all code fence ranges in text (for batch processing). -/// -/// Returns a vector of (start, end) byte positions where code fences are. +/// Find all code fence ranges in text. Returns (start, end) byte positions. /// Each range represents content INSIDE a fence (between ``` markers). fn find_code_fence_ranges(text: &str) -> Vec<(usize, usize)> { let mut ranges = Vec::new(); @@ -121,8 +116,7 @@ fn is_position_in_fence_ranges(pos: usize, ranges: &[(usize, usize)]) -> bool { // JSON Parsing Utilities // ============================================================================ -/// Find the end position (byte index) of a complete JSON object in the text. -/// Returns None if no complete JSON object is found. +/// Find the end byte index of a complete JSON object, or None if incomplete. fn find_json_object_end(text: &str) -> Option { let mut brace_count = 0; let mut in_string = false; @@ -155,12 +149,12 @@ fn find_json_object_end(text: &str) -> Option { None } -/// Check if a partial JSON tool call has been invalidated by subsequent content. +/// Check if a partial JSON tool call has been invalidated. /// -/// Detects two invalidation cases: +/// Invalidation cases: /// 1. Unescaped newline inside a JSON string (invalid JSON) -/// 2. Newline followed by non-JSON prose (e.g., regular text, not `"`, `{`, `}`, etc.) -/// 3. Newline followed by a new tool call pattern (`{"tool"`) - indicates abandoned fragment +/// 2. Newline followed by non-JSON prose (regular text) +/// 3. Newline followed by a new tool call pattern - indicates abandoned fragment fn is_json_invalidated(json_text: &str) -> bool { let mut in_string = false; let mut escape_next = false; @@ -187,8 +181,7 @@ fn is_json_invalidated(json_text: &str) -> bool { // Check what comes after the newline if let Some(&(next_pos, next_ch)) = chars.peek() { - // Check if this is the start of a NEW tool call pattern - // This indicates the previous JSON fragment was abandoned + // New tool call pattern = previous fragment was abandoned let remaining = &json_text[next_pos..]; if remaining.starts_with("{\"tool\"") || remaining.starts_with("{ \"tool\"") @@ -198,7 +191,6 @@ fn is_json_invalidated(json_text: &str) -> bool { return true; // New tool call started, previous fragment is abandoned } - // Check if next char is valid JSON continuation let valid_json_char = matches!( next_ch, '"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | '0'..='9' | 't' | 'f' | 'n' | '\n' @@ -216,11 +208,8 @@ fn is_json_invalidated(json_text: &str) -> bool { } /// Detect malformed tool calls where LLM prose leaked into JSON keys. -/// -/// When the LLM "stutters" or mixes formats, it sometimes emits JSON where -/// the keys are actually fragments of conversational text rather than valid -/// parameter names. fn args_contain_prose_fragments(args: &serde_json::Map) -> bool { + // When the LLM "stutters", keys may contain conversational text fragments const PROSE_MARKERS: &[&str] = &[ "I'll", "Let me", "Here's", "I can", "I need", "First", "Now", "The ", ]; @@ -236,9 +225,7 @@ fn args_contain_prose_fragments(args: &serde_json::Map bool { if pos == 0 { return true; @@ -247,22 +234,19 @@ fn is_on_own_line(text: &str, pos: usize) -> bool { text[line_start..pos].chars().all(|c| c.is_whitespace()) } -/// Find the first tool call pattern that appears on its own line. fn find_first_tool_call_start(text: &str) -> Option { find_tool_call_start(text, false) } -/// Find the last tool call pattern that appears on its own line. fn find_last_tool_call_start(text: &str) -> Option { find_tool_call_start(text, true) } -/// Find a tool call pattern in text, optionally searching backwards. -/// Only matches patterns on their own line (at start or after newline + whitespace). +/// Find a tool call pattern on its own line. If `find_last`, search backwards. fn find_tool_call_start(text: &str, find_last: bool) -> Option { let mut best_pos: Option = None; - for pattern in &TOOL_CALL_PATTERNS { + for pattern in TOOL_CALL_PATTERNS { if find_last { // Search backwards let mut search_end = text.len(); @@ -306,20 +290,16 @@ fn find_tool_call_start(text: &str, find_last: bool) -> Option { // StreamingToolParser // ============================================================================ -/// Modern streaming tool parser that properly handles native tool calls and SSE chunks. +/// Streaming parser for tool calls from LLM responses (native or JSON fallback). #[derive(Debug)] pub struct StreamingToolParser { - /// Buffer for accumulating text content text_buffer: String, - /// Position in text_buffer up to which tool calls have been consumed/executed. last_consumed_position: usize, - /// Whether we've received a message_stop event message_stopped: bool, - /// Whether we're currently in a JSON tool call (for fallback parsing) + // JSON fallback parsing state in_json_tool_call: bool, - /// Start position of JSON tool call (for fallback parsing) json_tool_start: Option, - /// Tracks code fence state during streaming + // Code fence tracking (to skip JSON examples in ``` blocks) fence_tracker: CodeFenceTracker, } @@ -345,13 +325,11 @@ impl StreamingToolParser { pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec { let mut completed_tools = Vec::new(); - // Add text content to buffer and track code fence state if !chunk.content.is_empty() { self.fence_tracker.process(&chunk.content); self.text_buffer.push_str(&chunk.content); } - // Handle native tool calls - return them immediately when received if let Some(ref tool_calls) = chunk.tool_calls { debug!("Received native tool calls: {:?}", tool_calls); for tool_call in tool_calls { @@ -362,10 +340,8 @@ impl StreamingToolParser { } } - // Check if message is finished/stopped if chunk.finished { self.message_stopped = true; - debug!("Message finished, processing accumulated tool calls"); // When stream finishes, find ALL JSON tool calls in the accumulated buffer if completed_tools.is_empty() && !self.text_buffer.is_empty() { @@ -380,8 +356,7 @@ impl StreamingToolParser { } } - // Fallback: Try to parse JSON tool calls from current chunk content if no native tool calls. - // Skip when inside a code fence to prevent false positives from JSON examples. + // JSON fallback: try to parse if no native calls and not inside a code fence if completed_tools.is_empty() && !chunk.content.is_empty() && !chunk.finished @@ -395,14 +370,11 @@ impl StreamingToolParser { completed_tools } - /// Try to parse a JSON tool call from the streaming buffer. - /// - /// Maintains state (`in_json_tool_call`, `json_tool_start`) to track - /// partial JSON tool calls across streaming chunks. + /// Try to parse a JSON tool call, tracking partial state across chunks. fn try_parse_streaming_json_tool_call(&mut self) -> Option { let fence_ranges = find_code_fence_ranges(&self.text_buffer); - // If not currently in a JSON tool call, look for the start + // Look for the start of a new tool call if !self.in_json_tool_call { let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; if let Some(relative_pos) = find_first_tool_call_start(unchecked_buffer) { @@ -428,7 +400,6 @@ impl StreamingToolParser { if let Some(start_pos) = self.json_tool_start { let json_text = &self.text_buffer[start_pos..]; - // Try to find a complete JSON object if let Some(end_pos) = find_json_object_end(json_text) { let json_str = &json_text[..=end_pos]; debug!("Attempting to parse JSON tool call: {}", json_str); @@ -439,12 +410,10 @@ impl StreamingToolParser { return Some(tool_call); } - // Parse failed, reset and continue looking self.in_json_tool_call = false; self.json_tool_start = None; } - // Check if the partial JSON has been invalidated if self.in_json_tool_call && is_json_invalidated(json_text) { debug!("JSON tool call invalidated by subsequent content, clearing state"); self.in_json_tool_call = false; @@ -458,7 +427,7 @@ impl StreamingToolParser { None } - /// Parse ALL JSON tool calls from the accumulated text buffer. + /// Parse all JSON tool calls from the accumulated buffer (used at stream end). fn parse_all_json_tool_calls(&self) -> Vec { let mut tool_calls = Vec::new(); let mut search_start = 0; @@ -472,7 +441,6 @@ impl StreamingToolParser { }; let abs_start = search_start + relative_pos; - let json_text = &self.text_buffer[abs_start..]; // Skip if inside a code fence if is_position_in_fence_ranges(abs_start, &fence_ranges) { @@ -480,7 +448,7 @@ impl StreamingToolParser { continue; } - // Try to find a complete JSON object + let json_text = &self.text_buffer[abs_start..]; let Some(end_pos) = find_json_object_end(json_text) else { break; // Incomplete JSON, stop searching }; @@ -497,31 +465,22 @@ impl StreamingToolParser { tool_calls } - /// Try to parse a JSON string as a ToolCall, validating the args. fn try_parse_tool_call_json(&self, json_str: &str) -> Option { let tool_call: ToolCall = serde_json::from_str(json_str).ok()?; - - // Validate that args is an object with reasonable keys let args_obj = tool_call.args.as_object()?; + if args_contain_prose_fragments(args_obj) { - debug!("Detected malformed tool call with message-like keys, skipping"); return None; } - debug!("Successfully parsed valid JSON tool call: {:?}", tool_call); Some(tool_call) } - // ======================================================================== - // Public Accessors - // ======================================================================== - - /// Get the accumulated text content. + // --- Public Accessors --- pub fn get_text_content(&self) -> &str { &self.text_buffer } - /// Get content before a specific position (for display purposes). pub fn get_content_before_position(&self, pos: usize) -> String { if pos <= self.text_buffer.len() { self.text_buffer[..pos].to_string() @@ -530,12 +489,10 @@ impl StreamingToolParser { } } - /// Check if the message has been stopped/finished. pub fn is_message_stopped(&self) -> bool { self.message_stopped } - /// Check if the text buffer contains an incomplete JSON tool call. pub fn has_incomplete_tool_call(&self) -> bool { let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; let Some(start_pos) = find_last_tool_call_start(unchecked_buffer) else { @@ -544,7 +501,6 @@ impl StreamingToolParser { let json_text = &unchecked_buffer[start_pos..]; - // Complete or invalidated = not incomplete if find_json_object_end(json_text).is_some() || is_json_invalidated(json_text) { return false; } @@ -552,7 +508,6 @@ impl StreamingToolParser { true } - /// Check if the text buffer contains an unexecuted tool call. pub fn has_unexecuted_tool_call(&self) -> bool { let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; let Some(start_pos) = find_last_tool_call_start(unchecked_buffer) else { @@ -568,27 +523,22 @@ impl StreamingToolParser { serde_json::from_str::(json_only).is_ok() } - /// Mark all tool calls up to the current buffer position as consumed/executed. pub fn mark_tool_calls_consumed(&mut self) { self.last_consumed_position = self.text_buffer.len(); } - /// Get the current text buffer length (for position tracking). pub fn text_buffer_len(&self) -> usize { self.text_buffer.len() } - /// Check if currently parsing a JSON tool call (for debugging). pub fn is_in_json_tool_call(&self) -> bool { self.in_json_tool_call } - /// Get the JSON tool start position (for debugging). pub fn json_tool_start_position(&self) -> Option { self.json_tool_start } - /// Reset the parser state for a new message. pub fn reset(&mut self) { self.text_buffer.clear(); self.last_consumed_position = 0; @@ -598,34 +548,25 @@ impl StreamingToolParser { self.fence_tracker.reset(); } - // ======================================================================== - // Static Methods (for external use) - // ======================================================================== - - /// Find the starting position of the FIRST tool call pattern on its own line. + // --- Static Methods (for external use) --- pub fn find_first_tool_call_start(text: &str) -> Option { find_first_tool_call_start(text) } - /// Find the starting position of the LAST tool call pattern on its own line. pub fn find_last_tool_call_start(text: &str) -> Option { find_last_tool_call_start(text) } - /// Check if a position in text is "on its own line". pub fn is_on_own_line(text: &str, pos: usize) -> bool { is_on_own_line(text, pos) } - /// Find the end position of a complete JSON object. pub fn find_complete_json_object_end(text: &str) -> Option { find_json_object_end(text) } } -// ============================================================================ // Tests -// ============================================================================ #[cfg(test)] mod tests {