diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs index e0c7cb3..0b2d677 100644 --- a/crates/g3-core/src/context_window.rs +++ b/crates/g3-core/src/context_window.rs @@ -13,23 +13,22 @@ use tracing::{debug, warn}; use crate::paths::get_thinned_dir; use crate::ToolCall; +// ============================================================================ +// Types +// ============================================================================ + /// Result of a context thinning operation. /// Contains semantic data for the UI layer to format. #[derive(Debug, Clone)] pub struct ThinResult { - /// Scope of the thinning operation pub scope: ThinScope, - /// Context percentage before thinning pub before_percentage: u32, - /// Context percentage after thinning pub after_percentage: u32, /// Number of tool result messages that were thinned pub leaned_count: usize, /// Number of tool calls in assistant messages that were thinned pub tool_call_leaned_count: usize, - /// Total characters saved pub chars_saved: usize, - /// Whether any changes were made pub had_changes: bool, } @@ -58,20 +57,29 @@ impl ThinScope { } } -/// Represents a modification to be applied to a message +/// Represents a modification to be applied to a message during thinning #[derive(Debug)] enum ThinModification { - /// Replace the entire message content - ReplaceContent { index: usize, new_content: String, chars_saved: usize }, + ReplaceContent { + index: usize, + new_content: String, + chars_saved: usize, + }, } +// ============================================================================ +// ContextWindow +// ============================================================================ + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ContextWindow { pub used_tokens: u32, pub total_tokens: u32, - pub cumulative_tokens: u32, // Track cumulative tokens across all interactions + /// Track cumulative tokens across all interactions + pub cumulative_tokens: u32, pub conversation_history: Vec, - pub last_thinning_percentage: u32, // Track the last percentage at which we thinned + /// Track the last percentage at which we thinned + pub last_thinning_percentage: u32, } impl ContextWindow { @@ -85,19 +93,21 @@ impl ContextWindow { } } + // ======================================================================== + // Message Management + // ======================================================================== + pub fn add_message(&mut self, message: Message) { self.add_message_with_tokens(message, None); } /// Add a message with optional token count from the provider pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option) { - // Skip messages with empty content to avoid API errors if message.content.trim().is_empty() { warn!("Skipping empty message to avoid API error"); return; } - // Use provided token count if available, otherwise estimate let token_count = tokens.unwrap_or_else(|| Self::estimate_tokens(&message.content)); self.used_tokens += token_count; self.cumulative_tokens += token_count; @@ -109,66 +119,9 @@ impl ContextWindow { ); } - /// Update token usage from provider response - /// NOTE: This only updates cumulative_tokens (total API usage tracking). - /// It does NOT update used_tokens because: - /// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message) - /// 2. completion_tokens will be tracked when the assistant message is added via add_message - /// Adding total_tokens here would cause double/triple counting and break the 80% threshold check. - pub fn update_usage_from_response(&mut self, usage: &Usage) { - // Only update cumulative tokens for API usage tracking - // Do NOT update used_tokens - that's tracked via add_message to avoid double counting - self.cumulative_tokens += usage.total_tokens; - - debug!( - "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})", - usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens - ); - } - - /// More accurate token estimation - pub fn estimate_tokens(text: &str) -> u32 { - // Better heuristic: - // - Average English text: ~4 characters per token - // - Code/JSON: ~3 characters per token (more symbols) - // - Add 10% buffer for safety - let base_estimate = if text.contains("{") || text.contains("```") || text.contains("fn ") { - (text.len() as f32 / 3.0).ceil() as u32 // Code/JSON - } else { - (text.len() as f32 / 4.0).ceil() as u32 // Regular text - }; - (base_estimate as f32 * 1.1).ceil() as u32 // Add 10% buffer - } - - pub fn update_usage(&mut self, usage: &Usage) { - // Deprecated: Use update_usage_from_response instead - self.update_usage_from_response(usage); - } - - /// Update cumulative token usage (for streaming) when no provider usage data is available - /// NOTE: This only updates cumulative_tokens, not used_tokens. - /// The assistant message will be added via add_message which tracks used_tokens. - pub fn add_streaming_tokens(&mut self, new_tokens: u32) { - // Only update cumulative tokens - used_tokens is tracked via add_message - self.cumulative_tokens += new_tokens; - debug!( - "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})", - new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens - ); - } - - pub fn percentage_used(&self) -> f32 { - if self.total_tokens == 0 { - 0.0 - } else { - (self.used_tokens as f32 / self.total_tokens as f32) * 100.0 - } - } - - /// Clear the conversation history while preserving system messages - /// Used by /clear command to start fresh + /// Clear the conversation history while preserving system messages. + /// Used by /clear command to start fresh. pub fn clear_conversation(&mut self) { - // Keep only system messages (system prompt, README, etc.) let system_messages: Vec = self .conversation_history .iter() @@ -185,24 +138,102 @@ impl ContextWindow { self.last_thinning_percentage = 0; } + // ======================================================================== + // Token Tracking + // ======================================================================== + + /// Update token usage from provider response. + /// + /// NOTE: This only updates cumulative_tokens (total API usage tracking). + /// It does NOT update used_tokens because: + /// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message) + /// 2. completion_tokens will be tracked when the assistant message is added via add_message + /// Adding total_tokens here would cause double/triple counting and break the 80% threshold check. + pub fn update_usage_from_response(&mut self, usage: &Usage) { + self.cumulative_tokens += usage.total_tokens; + debug!( + "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})", + usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens + ); + } + + /// Deprecated: Use update_usage_from_response instead + pub fn update_usage(&mut self, usage: &Usage) { + self.update_usage_from_response(usage); + } + + /// Update cumulative token usage (for streaming) when no provider usage data is available. + /// NOTE: This only updates cumulative_tokens, not used_tokens. + pub fn add_streaming_tokens(&mut self, new_tokens: u32) { + self.cumulative_tokens += new_tokens; + debug!( + "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})", + new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens + ); + } + + /// Recalculate token usage based on current conversation history. + pub fn recalculate_tokens(&mut self) { + self.used_tokens = self + .conversation_history + .iter() + .map(|m| Self::estimate_tokens(&m.content)) + .sum(); + debug!("Recalculated tokens after thinning: {} tokens", self.used_tokens); + } + + /// More accurate token estimation. + pub fn estimate_tokens(text: &str) -> u32 { + // Heuristic: + // - Average English text: ~4 characters per token + // - Code/JSON: ~3 characters per token (more symbols) + // - Add 10% buffer for safety + let base_estimate = if text.contains('{') || text.contains("```") || text.contains("fn ") { + (text.len() as f32 / 3.0).ceil() as u32 + } else { + (text.len() as f32 / 4.0).ceil() as u32 + }; + (base_estimate as f32 * 1.1).ceil() as u32 + } + + // ======================================================================== + // Capacity Queries + // ======================================================================== + + pub fn percentage_used(&self) -> f32 { + if self.total_tokens == 0 { + 0.0 + } else { + (self.used_tokens as f32 / self.total_tokens as f32) * 100.0 + } + } + pub fn remaining_tokens(&self) -> u32 { self.total_tokens.saturating_sub(self.used_tokens) } - /// Check if we should trigger compaction (at 80% capacity) + /// Check if we should trigger compaction (at 80% capacity or 150k tokens). pub fn should_compact(&self) -> bool { - // Trigger at 80% OR if we're getting close to absolute limits - // This prevents issues with models that have large contexts but still hit limits - let percentage_trigger = self.percentage_used() >= 80.0; - - // Also trigger if we're approaching common token limits - // Most models start having issues around 150k tokens - let absolute_trigger = self.used_tokens > 150_000; - - percentage_trigger || absolute_trigger + self.percentage_used() >= 80.0 || self.used_tokens > 150_000 } - /// Create a summary request prompt for the current conversation + /// Check if we should trigger context thinning. + /// Triggers at 50%, 60%, 70%, and 80% thresholds. + pub fn should_thin(&self) -> bool { + let current_percentage = self.percentage_used() as u32; + if current_percentage < 50 { + return false; + } + + let current_threshold = (current_percentage / 10) * 10; + current_threshold > self.last_thinning_percentage && current_threshold <= 80 + } + + // ======================================================================== + // Compaction / Summary + // ======================================================================== + + /// Create a summary request prompt for the current conversation. pub fn create_summary_prompt(&self) -> String { "Please provide a comprehensive summary of our conversation so far. Include: @@ -216,122 +247,56 @@ impl ContextWindow { Format this as a detailed but concise summary that can be used to resume the conversation from scratch while maintaining full context.".to_string() } - /// Reset the context window with a summary - /// Preserves the original system prompt as the first message + /// Reset the context window with a summary. + /// Preserves the original system prompt as the first message. pub fn reset_with_summary( &mut self, summary: String, latest_user_message: Option, ) -> usize { - // Calculate chars saved (old history minus new summary) - let old_chars: usize = self - .conversation_history - .iter() - .map(|m| m.content.len()) - .sum(); - - // Preserve the original system prompt (first message) and optionally the README (second message) - let original_system_prompt = self.conversation_history.first().cloned(); - let readme_message = self.conversation_history.get(1).and_then(|msg| { - if matches!(msg.role, MessageRole::System) - && (msg.content.contains("Project README") - || msg.content.contains("Agent Configuration")) - { - Some(msg.clone()) - } else { - None - } - }); - - // Clear the conversation history - self.conversation_history.clear(); - self.used_tokens = 0; - - // Re-add the original system prompt first (critical invariant) - if let Some(system_prompt) = original_system_prompt { - self.add_message(system_prompt); - } - - // Re-add the README message if it existed - if let Some(readme) = readme_message { - self.add_message(readme); - } - - // Add the summary as a system message - let summary_message = Message::new( - MessageRole::System, - format!("Previous conversation summary:\n\n{}", summary), - ); - self.add_message(summary_message); - - // Add the latest user message if provided - if let Some(user_msg) = latest_user_message { - self.add_message(Message::new(MessageRole::User, user_msg)); - } - - let new_chars: usize = self - .conversation_history - .iter() - .map(|m| m.content.len()) - .sum(); - old_chars.saturating_sub(new_chars) + self.reset_with_summary_and_stub(summary, latest_user_message, None) } - /// Reset context window with a summary and optional ACD stub - /// Preserves the original system prompt as the first message - /// If stub is provided, it's added as a system message before the summary + /// Reset context window with a summary and optional ACD stub. + /// Preserves the original system prompt as the first message. + /// If stub is provided, it's added as a system message before the summary. pub fn reset_with_summary_and_stub( &mut self, summary: String, latest_user_message: Option, stub: Option, ) -> usize { - // Calculate chars saved (old history minus new summary) let old_chars: usize = self .conversation_history .iter() .map(|m| m.content.len()) .sum(); - // Preserve the original system prompt (first message) and optionally the README (second message) - let original_system_prompt = self.conversation_history.first().cloned(); - let readme_message = self.conversation_history.get(1).and_then(|msg| { - if matches!(msg.role, MessageRole::System) - && (msg.content.contains("Project README") - || msg.content.contains("Agent Configuration")) - { - Some(msg.clone()) - } else { - None - } - }); + // Extract preserved messages before clearing + let preserved = self.extract_preserved_messages(); - // Clear the conversation history + // Clear and rebuild self.conversation_history.clear(); self.used_tokens = 0; - // Re-add the original system prompt first (critical invariant) - if let Some(system_prompt) = original_system_prompt { + // Re-add preserved messages + if let Some(system_prompt) = preserved.system_prompt { self.add_message(system_prompt); } - - // Re-add the README message if it existed - if let Some(readme) = readme_message { + if let Some(readme) = preserved.readme { self.add_message(readme); } - // Add the ACD stub if provided (before summary so LLM knows about dehydrated context) + // Add ACD stub if provided (before summary so LLM knows about dehydrated context) if let Some(stub_content) = stub { - let stub_message = Message::new(MessageRole::System, stub_content); - self.add_message(stub_message); + self.add_message(Message::new(MessageRole::System, stub_content)); } - // Add the summary as a system message - let summary_message = Message::new( + // Add the summary + self.add_message(Message::new( MessageRole::System, format!("Previous conversation summary:\n\n{}", summary), - ); - self.add_message(summary_message); + )); // Add the latest user message if provided if let Some(user_msg) = latest_user_message { @@ -346,20 +311,39 @@ Format this as a detailed but concise summary that can be used to resume the con old_chars.saturating_sub(new_chars) } - /// Check if we should trigger context thinning - /// Triggers at 50%, 60%, 70%, and 80% thresholds - pub fn should_thin(&self) -> bool { - let current_percentage = self.percentage_used() as u32; + /// Extract messages that should be preserved across compaction. + fn extract_preserved_messages(&self) -> PreservedMessages { + let system_prompt = self.conversation_history.first().cloned(); - // Check if we've crossed a new 10% threshold starting at 50% - if current_percentage >= 50 { - let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10% - if current_threshold > self.last_thinning_percentage && current_threshold <= 80 { - return true; + let readme = self.conversation_history.get(1).and_then(|msg| { + if matches!(msg.role, MessageRole::System) + && (msg.content.contains("Project README") + || msg.content.contains("Agent Configuration")) + { + Some(msg.clone()) + } else { + None } - } + }); - false + PreservedMessages { + system_prompt, + readme, + } + } + + // ======================================================================== + // Context Thinning + // ======================================================================== + + /// Thin context (first third only). + pub fn thin_context(&mut self, session_id: Option<&str>) -> ThinResult { + self.thin_context_with_scope(session_id, ThinScope::FirstThird) + } + + /// Thin entire context (all messages). + pub fn thin_context_all(&mut self, session_id: Option<&str>) -> ThinResult { + self.thin_context_with_scope(session_id, ThinScope::All) } /// Perform context thinning: scan messages and replace large tool results with file references. @@ -367,9 +351,6 @@ Format this as a detailed but concise summary that can be used to resume the con /// # Arguments /// * `session_id` - If provided, thinned content is saved to .g3/session//thinned/ /// * `scope` - Controls which messages to process (first third or all) - /// - /// # Returns - /// A `ThinResult` with semantic data about the operation pub fn thin_context_with_scope( &mut self, session_id: Option<&str>, @@ -383,73 +364,58 @@ Format this as a detailed but concise summary that can be used to resume the con self.last_thinning_percentage = current_threshold; } - // Calculate message range based on scope - let total_messages = self.conversation_history.len(); - let end_index = match scope { - ThinScope::FirstThird => (total_messages / 3).max(1), - ThinScope::All => total_messages, - }; - - // Determine output directory: use session dir if available, otherwise ~/tmp + // Resolve output directory let tmp_dir = match Self::resolve_thinned_dir(session_id, scope) { Ok(dir) => dir, - Err(_) => { - return ThinResult { - scope, - before_percentage: current_percentage, - after_percentage: current_percentage, - leaned_count: 0, - tool_call_leaned_count: 0, - chars_saved: 0, - had_changes: false, - }; - } + Err(_) => return ThinResult::no_changes(scope, current_percentage), }; - // Collect modifications to apply (avoids borrow checker issues) - let modifications = self.collect_thin_modifications(end_index, &tmp_dir, scope.file_prefix()); + // Calculate message range based on scope + let end_index = match scope { + ThinScope::FirstThird => (self.conversation_history.len() / 3).max(1), + ThinScope::All => self.conversation_history.len(), + }; - // Count results - let mut leaned_count = 0; - let mut tool_call_leaned_count = 0; - let mut chars_saved = 0; - - // Apply modifications - for modification in &modifications { - match modification { - ThinModification::ReplaceContent { index, new_content, chars_saved: saved } => { - if let Some(msg) = self.conversation_history.get_mut(*index) { - // Determine if this was a tool result or tool call based on content - if msg.content.starts_with("Tool result:") { - leaned_count += 1; - } else { - tool_call_leaned_count += 1; - } - msg.content = new_content.clone(); - chars_saved += saved; - } - } - } - } + // Collect and apply modifications + let modifications = + self.collect_thin_modifications(end_index, &tmp_dir, scope.file_prefix()); + let (leaned_count, tool_call_leaned_count, chars_saved) = + self.apply_thin_modifications(&modifications); // Recalculate token usage after thinning self.recalculate_tokens(); - // Get new percentage after thinning - let new_percentage = self.percentage_used() as u32; - - // Build result message - self.build_thin_result( + ThinResult { scope, - current_percentage, - new_percentage, + before_percentage: current_percentage, + after_percentage: self.percentage_used() as u32, leaned_count, tool_call_leaned_count, chars_saved, - ) + had_changes: leaned_count > 0 || tool_call_leaned_count > 0, + } } - /// Collect all modifications needed for thinning without mutating + /// Resolve the directory for storing thinned content. + fn resolve_thinned_dir(session_id: Option<&str>, scope: ThinScope) -> Result { + let dir = if let Some(sid) = session_id { + get_thinned_dir(sid).to_string_lossy().to_string() + } else { + shellexpand::tilde("~/tmp").to_string() + }; + + if let Err(e) = std::fs::create_dir_all(&dir) { + warn!("Failed to create thinned directory: {}", e); + return Err(format!( + "⚠️ Context {} failed: could not create directory", + scope.error_action() + )); + } + + Ok(dir) + } + + /// Collect all modifications needed for thinning without mutating. fn collect_thin_modifications( &self, end_index: usize, @@ -459,36 +425,29 @@ Format this as a detailed but concise summary that can be used to resume the con let mut modifications = Vec::new(); for i in 0..end_index { - if let Some(message) = self.conversation_history.get(i) { - // Check if the previous message was a TODO tool call - let is_todo_result = self.is_todo_tool_result(i); + let Some(message) = self.conversation_history.get(i) else { + continue; + }; - // Process User messages that look like tool results - if matches!(message.role, MessageRole::User) - && message.content.starts_with("Tool result:") - && !is_todo_result - && message.content.len() > 500 + // Process User messages that look like tool results + if matches!(message.role, MessageRole::User) + && message.content.starts_with("Tool result:") + && !self.is_todo_tool_result(i) + && message.content.len() > 500 + { + if let Some(m) = + Self::create_tool_result_modification(&message.content, i, tmp_dir, file_prefix) { - if let Some(modification) = Self::create_tool_result_modification( - &message.content, - i, - tmp_dir, - file_prefix, - ) { - modifications.push(modification); - } + modifications.push(m); } + } - // Process Assistant messages that contain tool calls with large arguments - if matches!(message.role, MessageRole::Assistant) { - if let Some(modification) = Self::create_tool_call_modification( - &message.content, - i, - tmp_dir, - file_prefix, - ) { - modifications.push(modification); - } + // Process Assistant messages that contain tool calls with large arguments + if matches!(message.role, MessageRole::Assistant) { + if let Some(m) = + Self::create_tool_call_modification(&message.content, i, tmp_dir, file_prefix) + { + modifications.push(m); } } } @@ -496,59 +455,55 @@ Format this as a detailed but concise summary that can be used to resume the con modifications } - /// Thin context (first third only) - pub fn thin_context(&mut self, session_id: Option<&str>) -> ThinResult { - self.thin_context_with_scope(session_id, ThinScope::FirstThird) - } + /// Apply collected modifications and return counts. + fn apply_thin_modifications( + &mut self, + modifications: &[ThinModification], + ) -> (usize, usize, usize) { + let mut leaned_count = 0; + let mut tool_call_leaned_count = 0; + let mut chars_saved = 0; - /// Thin entire context (all messages) - pub fn thin_context_all(&mut self, session_id: Option<&str>) -> ThinResult { - self.thin_context_with_scope(session_id, ThinScope::All) - } + for modification in modifications { + let ThinModification::ReplaceContent { + index, + new_content, + chars_saved: saved, + } = modification; - /// Resolve the directory for storing thinned content - fn resolve_thinned_dir(session_id: Option<&str>, scope: ThinScope) -> Result { - if let Some(sid) = session_id { - let thinned_dir = get_thinned_dir(sid); - if let Err(e) = std::fs::create_dir_all(&thinned_dir) { - warn!("Failed to create thinned directory: {}", e); - return Err(format!( - "⚠️ Context {} failed: could not create thinned directory", - scope.error_action() - )); + if let Some(msg) = self.conversation_history.get_mut(*index) { + if msg.content.starts_with("Tool result:") { + leaned_count += 1; + } else { + tool_call_leaned_count += 1; + } + msg.content = new_content.clone(); + chars_saved += saved; } - Ok(thinned_dir.to_string_lossy().to_string()) - } else { - let fallback_dir = shellexpand::tilde("~/tmp").to_string(); - if let Err(e) = std::fs::create_dir_all(&fallback_dir) { - warn!("Failed to create ~/tmp directory: {}", e); - return Err(format!( - "⚠️ Context {} failed: could not create ~/tmp directory", - scope.error_action() - )); - } - Ok(fallback_dir) } + + (leaned_count, tool_call_leaned_count, chars_saved) } - /// Check if message at index i is a result of a TODO tool call + /// Check if message at index i is a result of a TODO tool call. fn is_todo_tool_result(&self, i: usize) -> bool { if i == 0 { return false; } - if let Some(prev_message) = self.conversation_history.get(i - 1) { - if matches!(prev_message.role, MessageRole::Assistant) { - return prev_message.content.contains(r#""tool":"todo_read""#) - || prev_message.content.contains(r#""tool":"todo_write""#) - || prev_message.content.contains(r#""tool": "todo_read""#) - || prev_message.content.contains(r#""tool": "todo_write""#); - } - } - false + self.conversation_history + .get(i - 1) + .map(|prev| { + matches!(prev.role, MessageRole::Assistant) + && (prev.content.contains(r#""tool":"todo_read""#) + || prev.content.contains(r#""tool":"todo_write""#) + || prev.content.contains(r#""tool": "todo_read""#) + || prev.content.contains(r#""tool": "todo_write""#)) + }) + .unwrap_or(false) } - /// Create a modification for thinning a tool result message + /// Create a modification for thinning a tool result message. fn create_tool_result_modification( content: &str, index: usize, @@ -583,7 +538,7 @@ Format this as a detailed but concise summary that can be used to resume the con }) } - /// Create a modification for thinning tool calls in an assistant message + /// Create a modification for thinning tool calls in an assistant message. fn create_tool_call_modification( content: &str, index: usize, @@ -644,8 +599,8 @@ Format this as a detailed but concise summary that can be used to resume the con }) } - /// Thin write_file args by saving content to file - /// Returns (chars_saved, new_args) if thinned + /// Thin write_file args by saving content to file. + /// Returns (chars_saved, new_args) if thinned. fn thin_write_file_args( args: &serde_json::Value, index: usize, @@ -654,9 +609,8 @@ Format this as a detailed but concise summary that can be used to resume the con ) -> Option<(usize, serde_json::Value)> { let args_obj = args.as_object()?; let content_str = args_obj.get("content")?.as_str()?; - let content_len = content_str.len(); - if content_len <= 500 { + if content_str.len() <= 500 { return None; } @@ -664,13 +618,15 @@ Format this as a detailed but concise summary that can be used to resume the con .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs(); - let filename = format!("{}_write_file_content_{}_{}.txt", file_prefix, timestamp, index); + let filename = format!( + "{}_write_file_content_{}_{}.txt", + file_prefix, timestamp, index + ); let file_path = format!("{}/{}", tmp_dir, filename); - if std::fs::write(&file_path, content_str).is_err() { - return None; - } + std::fs::write(&file_path, content_str).ok()?; + let content_len = content_str.len(); let mut new_args = args_obj.clone(); new_args.insert( "content".to_string(), @@ -685,8 +641,8 @@ Format this as a detailed but concise summary that can be used to resume the con Some((content_len, serde_json::Value::Object(new_args))) } - /// Thin str_replace args by saving diff to file - /// Returns (chars_saved, new_args) if thinned + /// Thin str_replace args by saving diff to file. + /// Returns (chars_saved, new_args) if thinned. fn thin_str_replace_args( args: &serde_json::Value, index: usize, @@ -695,9 +651,8 @@ Format this as a detailed but concise summary that can be used to resume the con ) -> Option<(usize, serde_json::Value)> { let args_obj = args.as_object()?; let diff_str = args_obj.get("diff")?.as_str()?; - let diff_len = diff_str.len(); - if diff_len <= 500 { + if diff_str.len() <= 500 { return None; } @@ -705,13 +660,15 @@ Format this as a detailed but concise summary that can be used to resume the con .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default() .as_secs(); - let filename = format!("{}_str_replace_diff_{}_{}.txt", file_prefix, timestamp, index); + let filename = format!( + "{}_str_replace_diff_{}_{}.txt", + file_prefix, timestamp, index + ); let file_path = format!("{}/{}", tmp_dir, filename); - if std::fs::write(&file_path, diff_str).is_err() { - return None; - } + std::fs::write(&file_path, diff_str).ok()?; + let diff_len = diff_str.len(); let mut new_args = args_obj.clone(); new_args.insert( "diff".to_string(), @@ -726,41 +683,11 @@ Format this as a detailed but concise summary that can be used to resume the con Some((diff_len, serde_json::Value::Object(new_args))) } - /// Build the result message for thinning operations - fn build_thin_result( - &self, - scope: ThinScope, - current_percentage: u32, - new_percentage: u32, - leaned_count: usize, - tool_call_leaned_count: usize, - chars_saved: usize, - ) -> ThinResult { - let had_changes = leaned_count > 0 || tool_call_leaned_count > 0; - ThinResult { - scope, - before_percentage: current_percentage, - after_percentage: new_percentage, - leaned_count, - tool_call_leaned_count, - chars_saved: if had_changes { chars_saved } else { 0 }, - had_changes, - } - } + // ======================================================================== + // JSON Utilities + // ======================================================================== - /// Recalculate token usage based on current conversation history - /// Recalculate the token count based on current conversation history. - pub fn recalculate_tokens(&mut self) { - let mut total = 0; - for message in &self.conversation_history { - total += Self::estimate_tokens(&message.content); - } - self.used_tokens = total; - - debug!("Recalculated tokens after thinning: {} tokens", total); - } - - /// Helper function to find the end of a JSON object + /// Find the end position of a JSON object. pub fn find_json_end(json_str: &str) -> Option { let mut brace_count = 0; let mut in_string = false; @@ -790,6 +717,35 @@ Format this as a detailed but concise summary that can be used to resume the con } } +// ============================================================================ +// Helper Types +// ============================================================================ + +/// Messages preserved across compaction. +struct PreservedMessages { + system_prompt: Option, + readme: Option, +} + +impl ThinResult { + /// Create a ThinResult indicating no changes were made. + fn no_changes(scope: ThinScope, percentage: u32) -> Self { + Self { + scope, + before_percentage: percentage, + after_percentage: percentage, + leaned_count: 0, + tool_call_leaned_count: 0, + chars_saved: 0, + had_changes: false, + } + } +} + +// ============================================================================ +// Tests +// ============================================================================ + #[cfg(test)] mod tests { use super::*; @@ -836,20 +792,20 @@ mod tests { #[test] fn test_should_thin_thresholds() { let mut cw = ContextWindow::new(100); - + // Below 50% - should not thin cw.used_tokens = 49; assert!(!cw.should_thin()); - + // At 50% - should thin (first time) cw.used_tokens = 50; assert!(cw.should_thin()); - + // After thinning at 50%, shouldn't thin again until 60% cw.last_thinning_percentage = 50; cw.used_tokens = 55; assert!(!cw.should_thin()); - + // At 60% - should thin again cw.used_tokens = 60; assert!(cw.should_thin()); @@ -859,7 +815,6 @@ mod tests { fn test_estimate_tokens_regular_text() { let text = "Hello world, this is a test."; let tokens = ContextWindow::estimate_tokens(text); - // ~28 chars / 4 * 1.1 = ~8 tokens assert!(tokens > 0 && tokens < 20); } @@ -867,7 +822,6 @@ mod tests { fn test_estimate_tokens_code() { let code = "fn main() { println!(\"hello\"); }"; let tokens = ContextWindow::estimate_tokens(code); - // Code uses 3 chars per token estimate assert!(tokens > 0); } diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs index 9150d38..7d3ebe6 100644 --- a/crates/g3-core/src/streaming_parser.rs +++ b/crates/g3-core/src/streaming_parser.rs @@ -20,20 +20,285 @@ const TOOL_CALL_PATTERNS: [&str; 4] = [ r#"{ "tool" :"#, ]; -/// Search direction for tool call pattern matching. -#[derive(Clone, Copy, PartialEq)] -enum SearchDirection { - Forward, - Backward, +// ============================================================================ +// Code Fence Tracking +// ============================================================================ + +/// Tracks whether we're inside a markdown code fence (``` block). +/// +/// Used during streaming to avoid parsing JSON examples inside code blocks +/// as tool calls. +#[derive(Debug, Default)] +struct CodeFenceTracker { + /// Whether we're currently inside a code fence + in_fence: bool, + /// Buffer for the current incomplete line (text since last newline) + current_line: String, } +impl CodeFenceTracker { + fn new() -> Self { + Self::default() + } + + /// Update fence state based on new streaming content. + fn process(&mut self, content: &str) { + for ch in content.chars() { + if ch == '\n' { + self.check_and_toggle_fence(); + self.current_line.clear(); + } else { + self.current_line.push(ch); + } + } + } + + /// Check if current_line is a code fence marker and toggle state if so. + fn check_and_toggle_fence(&mut self) { + let trimmed = self.current_line.trim_start(); + if trimmed.starts_with("```") && trimmed.chars().take_while(|&c| c == '`').count() >= 3 { + self.in_fence = !self.in_fence; + debug!( + "Code fence toggled: in_fence={} (line: {:?})", + self.in_fence, self.current_line + ); + } + } + + fn is_in_fence(&self) -> bool { + self.in_fence + } + + fn reset(&mut self) { + self.in_fence = false; + self.current_line.clear(); + } +} + +/// Find all code fence ranges in text (for batch processing). +/// +/// Returns a vector of (start, end) byte positions where code fences are. +/// Each range represents content INSIDE a fence (between ``` markers). +fn find_code_fence_ranges(text: &str) -> Vec<(usize, usize)> { + let mut ranges = Vec::new(); + let mut in_fence = false; + let mut fence_start = 0; + let mut line_start = 0; + + for (i, ch) in text.char_indices() { + if ch == '\n' { + let line = &text[line_start..i]; + let trimmed = line.trim_start(); + + if trimmed.starts_with("```") + && trimmed.chars().take_while(|&c| c == '`').count() >= 3 + { + if in_fence { + ranges.push((fence_start, line_start)); + in_fence = false; + } else { + fence_start = i + 1; // +1 to skip the newline + in_fence = true; + } + } + line_start = i + 1; + } + } + + // If we ended while still in a fence, include everything to the end + if in_fence { + ranges.push((fence_start, text.len())); + } + + ranges +} + +fn is_position_in_fence_ranges(pos: usize, ranges: &[(usize, usize)]) -> bool { + ranges.iter().any(|(start, end)| pos >= *start && pos < *end) +} + +// ============================================================================ +// JSON Parsing Utilities +// ============================================================================ + +/// Find the end position (byte index) of a complete JSON object in the text. +/// Returns None if no complete JSON object is found. +fn find_json_object_end(text: &str) -> Option { + let mut brace_count = 0; + let mut in_string = false; + let mut escape_next = false; + let mut found_start = false; + + for (i, ch) in text.char_indices() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => escape_next = true, + '"' => in_string = !in_string, + '{' if !in_string => { + brace_count += 1; + found_start = true; + } + '}' if !in_string => { + brace_count -= 1; + if brace_count == 0 && found_start { + return Some(i); + } + } + _ => {} + } + } + + None +} + +/// Check if a partial JSON tool call has been invalidated by subsequent content. +/// +/// Detects two invalidation cases: +/// 1. Unescaped newline inside a JSON string (invalid JSON) +/// 2. Newline followed by non-JSON prose (e.g., regular text, not `"`, `{`, `}`, etc.) +fn is_json_invalidated(json_text: &str) -> bool { + let mut in_string = false; + let mut escape_next = false; + let mut chars = json_text.char_indices().peekable(); + + while let Some((_, ch)) = chars.next() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => escape_next = true, + '"' => in_string = !in_string, + '\n' if in_string => return true, // Unescaped newline in string = invalid + '\n' if !in_string => { + // Skip whitespace after newline + while let Some(&(_, next_ch)) = chars.peek() { + if next_ch != ' ' && next_ch != '\t' { + break; + } + chars.next(); + } + + // Check if next char is valid JSON continuation + if let Some(&(_, next_ch)) = chars.peek() { + let valid_json_char = matches!( + next_ch, + '"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | '0'..='9' | 't' | 'f' | 'n' | '\n' + ); + if !valid_json_char { + return true; + } + } + } + _ => {} + } + } + + false +} + +/// Detect malformed tool calls where LLM prose leaked into JSON keys. +/// +/// When the LLM "stutters" or mixes formats, it sometimes emits JSON where +/// the keys are actually fragments of conversational text rather than valid +/// parameter names. +fn args_contain_prose_fragments(args: &serde_json::Map) -> bool { + const PROSE_MARKERS: &[&str] = &[ + "I'll", "Let me", "Here's", "I can", "I need", "First", "Now", "The ", + ]; + + args.keys().any(|key| { + key.len() > 100 + || key.contains('\n') + || PROSE_MARKERS.iter().any(|marker| key.contains(marker)) + }) +} + +// ============================================================================ +// Tool Call Pattern Matching +// ============================================================================ + +/// Check if a position in text is "on its own line" - meaning it's either +/// at the start of the text, or preceded by a newline with only whitespace +/// between the newline and the position. +fn is_on_own_line(text: &str, pos: usize) -> bool { + if pos == 0 { + return true; + } + let line_start = text[..pos].rfind('\n').map(|p| p + 1).unwrap_or(0); + text[line_start..pos].chars().all(|c| c.is_whitespace()) +} + +/// Find the first tool call pattern that appears on its own line. +fn find_first_tool_call_start(text: &str) -> Option { + find_tool_call_start(text, false) +} + +/// Find the last tool call pattern that appears on its own line. +fn find_last_tool_call_start(text: &str) -> Option { + find_tool_call_start(text, true) +} + +/// Find a tool call pattern in text, optionally searching backwards. +/// Only matches patterns on their own line (at start or after newline + whitespace). +fn find_tool_call_start(text: &str, find_last: bool) -> Option { + let mut best_pos: Option = None; + + for pattern in &TOOL_CALL_PATTERNS { + if find_last { + // Search backwards + let mut search_end = text.len(); + while search_end > 0 { + if let Some(pos) = text[..search_end].rfind(pattern) { + if is_on_own_line(text, pos) { + if best_pos.map_or(true, |best| pos > best) { + best_pos = Some(pos); + } + break; + } + search_end = pos; + } else { + break; + } + } + } else { + // Search forwards + let mut search_start = 0; + while search_start < text.len() { + if let Some(rel) = text[search_start..].find(pattern) { + let pos = search_start + rel; + if is_on_own_line(text, pos) { + if best_pos.map_or(true, |best| pos < best) { + best_pos = Some(pos); + } + break; + } + search_start = pos + 1; + } else { + break; + } + } + } + } + + best_pos +} + +// ============================================================================ +// StreamingToolParser +// ============================================================================ + /// Modern streaming tool parser that properly handles native tool calls and SSE chunks. #[derive(Debug)] pub struct StreamingToolParser { /// Buffer for accumulating text content text_buffer: String, /// Position in text_buffer up to which tool calls have been consumed/executed. - /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools. last_consumed_position: usize, /// Whether we've received a message_stop event message_stopped: bool, @@ -41,13 +306,8 @@ pub struct StreamingToolParser { in_json_tool_call: bool, /// Start position of JSON tool call (for fallback parsing) json_tool_start: Option, - /// Whether we're currently inside a code fence (``` block) - /// When true, tool call detection is disabled to prevent false positives - /// from JSON examples in code blocks. - in_code_fence: bool, - /// Buffer for the current incomplete line (text since last newline) - /// Used to detect code fence markers which must be at line start. - current_line: String, + /// Tracks code fence state during streaming + fence_tracker: CodeFenceTracker, } impl Default for StreamingToolParser { @@ -64,125 +324,28 @@ impl StreamingToolParser { message_stopped: false, in_json_tool_call: false, json_tool_start: None, - in_code_fence: false, - current_line: String::new(), + fence_tracker: CodeFenceTracker::new(), } } - /// Find a tool call pattern in text, searching in the specified direction. - /// Only matches patterns on their own line (at start or after newline + whitespace). - fn find_tool_call_start(text: &str, direction: SearchDirection) -> Option { - let mut best_start: Option = None; - - for pattern in &TOOL_CALL_PATTERNS { - match direction { - SearchDirection::Forward => { - let mut search_start = 0; - while search_start < text.len() { - if let Some(rel) = text[search_start..].find(pattern) { - let pos = search_start + rel; - if Self::is_on_own_line(text, pos) { - if best_start.map_or(true, |best| pos < best) { - best_start = Some(pos); - } - break; - } - search_start = pos + 1; - } else { - break; - } - } - } - SearchDirection::Backward => { - let mut search_end = text.len(); - while search_end > 0 { - if let Some(pos) = text[..search_end].rfind(pattern) { - if Self::is_on_own_line(text, pos) { - if best_start.map_or(true, |best| pos > best) { - best_start = Some(pos); - } - break; - } - search_end = pos; - } else { - break; - } - } - } - } - } - - best_start - } - - /// Find the starting position of the FIRST tool call pattern on its own line. - pub fn find_first_tool_call_start(text: &str) -> Option { - Self::find_tool_call_start(text, SearchDirection::Forward) - } - - /// Find the starting position of the LAST tool call pattern on its own line. - pub fn find_last_tool_call_start(text: &str) -> Option { - Self::find_tool_call_start(text, SearchDirection::Backward) - } - - /// Check if a position in text is "on its own line" - meaning it's either - /// at the start of the text, or preceded by a newline with only whitespace - /// between the newline and the position. - pub fn is_on_own_line(text: &str, pos: usize) -> bool { - if pos == 0 { - return true; - } - // Find the start of the current line (position after the last newline before pos) - let line_start = text[..pos].rfind('\n').map(|p| p + 1).unwrap_or(0); - // Check if everything between line_start and pos is whitespace - text[line_start..pos].chars().all(|c| c.is_whitespace()) - } - - /// Detect malformed tool calls where LLM prose leaked into JSON keys. - /// - /// When the LLM "stutters" or mixes formats, it sometimes emits JSON where - /// the keys are actually fragments of conversational text rather than valid - /// parameter names. This heuristic catches such cases by looking for: - /// - Unusually long keys (>100 chars) - /// - Newlines in keys (never valid in JSON keys) - /// - Common LLM response phrases that indicate prose, not parameters - fn args_contain_prose_fragments(args: &serde_json::Map) -> bool { - const PROSE_MARKERS: &[&str] = &[ - "I'll", "Let me", "Here's", "I can", "I need", "First", "Now", "The ", - ]; - - args.keys().any(|key| { - key.len() > 100 - || key.contains('\n') - || PROSE_MARKERS.iter().any(|marker| key.contains(marker)) - }) - } - /// Process a streaming chunk and return completed tool calls if any. pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec { let mut completed_tools = Vec::new(); // Add text content to buffer and track code fence state if !chunk.content.is_empty() { - // Update code fence state based on new content - self.update_code_fence_state(&chunk.content); - + self.fence_tracker.process(&chunk.content); self.text_buffer.push_str(&chunk.content); } - // Handle native tool calls - return them immediately when received. - // This allows tools to be executed as soon as they're fully parsed, - // preventing duplicate tool calls from being accumulated. + // Handle native tool calls - return them immediately when received if let Some(ref tool_calls) = chunk.tool_calls { debug!("Received native tool calls: {:?}", tool_calls); - - // Convert and return tool calls immediately for tool_call in tool_calls { - let converted_tool = ToolCall { + completed_tools.push(ToolCall { tool: tool_call.tool.clone(), args: tool_call.args.clone(), - }; - completed_tools.push(converted_tool); + }); } } @@ -193,7 +356,7 @@ impl StreamingToolParser { // When stream finishes, find ALL JSON tool calls in the accumulated buffer if completed_tools.is_empty() && !self.text_buffer.is_empty() { - let all_tools = self.try_parse_all_json_tool_calls_from_buffer(); + let all_tools = self.parse_all_json_tool_calls(); if !all_tools.is_empty() { debug!( "Found {} JSON tool calls in buffer at stream end", @@ -204,98 +367,72 @@ impl StreamingToolParser { } } - // Fallback: Try to parse JSON tool calls from current chunk content if no native tool calls - // IMPORTANT: Skip JSON fallback parsing when inside a code fence to prevent - // false positives from JSON examples in code blocks. - if completed_tools.is_empty() && !chunk.content.is_empty() && !chunk.finished { - if !self.in_code_fence { - if let Some(json_tool) = self.try_parse_json_tool_call(&chunk.content) { - completed_tools.push(json_tool); - } + // Fallback: Try to parse JSON tool calls from current chunk content if no native tool calls. + // Skip when inside a code fence to prevent false positives from JSON examples. + if completed_tools.is_empty() + && !chunk.content.is_empty() + && !chunk.finished + && !self.fence_tracker.is_in_fence() + { + if let Some(json_tool) = self.try_parse_streaming_json_tool_call() { + completed_tools.push(json_tool); } } completed_tools } - /// Fallback method to parse JSON tool calls from text content. - /// - /// This method maintains state (`in_json_tool_call`, `json_tool_start`) to track - /// partial JSON tool calls across streaming chunks. When a pattern like `{"tool":` - /// is found on its own line, we enter "in JSON tool call" mode and wait for the - /// JSON to complete. - /// - /// IMPORTANT: We must also detect when the JSON has been **invalidated** - i.e., - /// when subsequent content makes it clear this isn't a real tool call. For example: - /// - `{"tool": "read_file` followed by `\nsome regular text` is NOT a tool call - /// - The newline followed by non-JSON text invalidates the partial JSON - fn try_parse_json_tool_call(&mut self, _content: &str) -> Option { - // Pre-compute fence ranges to skip tool calls inside code fences - let fence_ranges = Self::find_code_fence_ranges(&self.text_buffer); - - // If we're not currently in a JSON tool call, look for the start + /// Try to parse a JSON tool call from the streaming buffer. + /// + /// Maintains state (`in_json_tool_call`, `json_tool_start`) to track + /// partial JSON tool calls across streaming chunks. + fn try_parse_streaming_json_tool_call(&mut self) -> Option { + let fence_ranges = find_code_fence_ranges(&self.text_buffer); + + // If not currently in a JSON tool call, look for the start if !self.in_json_tool_call { - // Only search in the unconsumed portion of the buffer to avoid - // re-parsing already-executed tool calls let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; - // Use find_first_tool_call_start to find the FIRST tool call, not the last. - // This ensures we process tool calls in order when multiple arrive together. - if let Some(relative_pos) = Self::find_first_tool_call_start(unchecked_buffer) { + if let Some(relative_pos) = find_first_tool_call_start(unchecked_buffer) { let pos = self.last_consumed_position + relative_pos; - - // Skip if this position is inside a code fence - if Self::is_position_in_fence_ranges(pos, &fence_ranges) { + + // Skip if inside a code fence + if is_position_in_fence_ranges(pos, &fence_ranges) { debug!("Skipping tool call at position {} - inside code fence", pos); return None; } - - debug!("Found JSON tool call pattern at position {} (relative: {})", pos, relative_pos); + + debug!( + "Found JSON tool call pattern at position {} (relative: {})", + pos, relative_pos + ); self.in_json_tool_call = true; self.json_tool_start = Some(pos); } } - // If we're in a JSON tool call, try to find the end and parse it + // If in a JSON tool call, try to find the end and parse it if self.in_json_tool_call { if let Some(start_pos) = self.json_tool_start { let json_text = &self.text_buffer[start_pos..]; // Try to find a complete JSON object - if let Some(end_pos) = Self::find_complete_json_object_end(json_text) { + if let Some(end_pos) = find_json_object_end(json_text) { let json_str = &json_text[..=end_pos]; debug!("Attempting to parse JSON tool call: {}", json_str); - // Try to parse as a ToolCall - if let Ok(tool_call) = serde_json::from_str::(json_str) { - // Validate that args is an object with reasonable keys - if let Some(args_obj) = tool_call.args.as_object() { - if Self::args_contain_prose_fragments(args_obj) { - debug!( - "Detected malformed tool call with message-like keys, skipping" - ); - self.in_json_tool_call = false; - self.json_tool_start = None; - return None; - } - - debug!("Successfully parsed valid JSON tool call: {:?}", tool_call); - self.in_json_tool_call = false; - self.json_tool_start = None; - return Some(tool_call); - } - debug!("Tool call args is not an object, skipping"); - } else { - debug!("Failed to parse JSON tool call: {}", json_str); + if let Some(tool_call) = self.try_parse_tool_call_json(json_str) { + self.in_json_tool_call = false; + self.json_tool_start = None; + return Some(tool_call); } - // Reset and continue looking + + // Parse failed, reset and continue looking self.in_json_tool_call = false; self.json_tool_start = None; } - - // If we didn't find a complete JSON object, check if the partial JSON - // has been invalidated by subsequent content (e.g., a newline followed - // by regular text when not inside a string). - if self.in_json_tool_call && Self::is_json_invalidated(json_text) { + + // Check if the partial JSON has been invalidated + if self.in_json_tool_call && is_json_invalidated(json_text) { debug!("JSON tool call invalidated by subsequent content, clearing state"); self.in_json_tool_call = false; self.json_tool_start = None; @@ -309,61 +446,64 @@ impl StreamingToolParser { } /// Parse ALL JSON tool calls from the accumulated text buffer. - /// This finds all complete tool calls, not just the last one. - fn try_parse_all_json_tool_calls_from_buffer(&self) -> Vec { + fn parse_all_json_tool_calls(&self) -> Vec { let mut tool_calls = Vec::new(); let mut search_start = 0; - - // Pre-compute fence ranges to know which positions are inside code fences - // This is more efficient than re-scanning for each potential tool call - let fence_ranges = Self::find_code_fence_ranges(&self.text_buffer); + let fence_ranges = find_code_fence_ranges(&self.text_buffer); while search_start < self.text_buffer.len() { let search_text = &self.text_buffer[search_start..]; - // Find the next tool call pattern - if let Some(relative_pos) = Self::find_first_tool_call_start(search_text) { - let abs_start = search_start + relative_pos; - let json_text = &self.text_buffer[abs_start..]; - - // Skip if this position is inside a code fence - if Self::is_position_in_fence_ranges(abs_start, &fence_ranges) { - // Move past this position and continue searching - search_start = abs_start + 1; - continue; - } - - // Try to find a complete JSON object - if let Some(end_pos) = Self::find_complete_json_object_end(json_text) { - let json_str = &json_text[..=end_pos]; - - if let Ok(tool_call) = serde_json::from_str::(json_str) { - if let Some(args_obj) = tool_call.args.as_object() { - if !Self::args_contain_prose_fragments(args_obj) { - debug!( - "Found tool call at position {}: {:?}", - abs_start, tool_call.tool - ); - tool_calls.push(tool_call); - } - } - } - // Move past this tool call - search_start = abs_start + end_pos + 1; - } else { - // Incomplete JSON, stop searching - break; - } - } else { - // No more tool call patterns found + let Some(relative_pos) = find_first_tool_call_start(search_text) else { break; + }; + + let abs_start = search_start + relative_pos; + let json_text = &self.text_buffer[abs_start..]; + + // Skip if inside a code fence + if is_position_in_fence_ranges(abs_start, &fence_ranges) { + search_start = abs_start + 1; + continue; } + + // Try to find a complete JSON object + let Some(end_pos) = find_json_object_end(json_text) else { + break; // Incomplete JSON, stop searching + }; + + let json_str = &json_text[..=end_pos]; + if let Some(tool_call) = self.try_parse_tool_call_json(json_str) { + debug!("Found tool call at position {}: {:?}", abs_start, tool_call.tool); + tool_calls.push(tool_call); + } + + search_start = abs_start + end_pos + 1; } tool_calls } - /// Get the accumulated text content (excluding tool calls). + /// Try to parse a JSON string as a ToolCall, validating the args. + fn try_parse_tool_call_json(&self, json_str: &str) -> Option { + let tool_call: ToolCall = serde_json::from_str(json_str).ok()?; + + // Validate that args is an object with reasonable keys + let args_obj = tool_call.args.as_object()?; + if args_contain_prose_fragments(args_obj) { + debug!("Detected malformed tool call with message-like keys, skipping"); + return None; + } + + debug!("Successfully parsed valid JSON tool call: {:?}", tool_call); + Some(tool_call) + } + + // ======================================================================== + // Public Accessors + // ======================================================================== + + /// Get the accumulated text content. pub fn get_text_content(&self) -> &str { &self.text_buffer } @@ -383,246 +523,43 @@ impl StreamingToolParser { } /// Check if the text buffer contains an incomplete JSON tool call. - /// This detects cases where the LLM started emitting a tool call but the stream ended - /// before the JSON was complete (truncated output). pub fn has_incomplete_tool_call(&self) -> bool { - // Only check the unconsumed portion of the buffer let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; - if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) { - let json_text = &unchecked_buffer[start_pos..]; - // If JSON is complete, it's not incomplete - if Self::find_complete_json_object_end(json_text).is_some() { - return false; - } - // If JSON has been invalidated by subsequent content, it's not a real tool call - if Self::is_json_invalidated(json_text) { - return false; - } - // Otherwise, it's a genuinely incomplete tool call - true - } else { - false + let Some(start_pos) = find_last_tool_call_start(unchecked_buffer) else { + return false; + }; + + let json_text = &unchecked_buffer[start_pos..]; + + // Complete or invalidated = not incomplete + if find_json_object_end(json_text).is_some() || is_json_invalidated(json_text) { + return false; } + + true } /// Check if the text buffer contains an unexecuted tool call. - /// This detects cases where the LLM emitted a complete tool call JSON - /// but it wasn't parsed/executed (e.g., due to parsing issues). pub fn has_unexecuted_tool_call(&self) -> bool { - // Only check the unconsumed portion of the buffer let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; - if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) { - let json_text = &unchecked_buffer[start_pos..]; - // If the JSON IS complete, it means there's an unexecuted tool call - if let Some(json_end) = Self::find_complete_json_object_end(json_text) { - let json_only = &json_text[..=json_end]; - return serde_json::from_str::(json_only).is_ok(); - } - } - false + let Some(start_pos) = find_last_tool_call_start(unchecked_buffer) else { + return false; + }; + + let json_text = &unchecked_buffer[start_pos..]; + let Some(json_end) = find_json_object_end(json_text) else { + return false; + }; + + let json_only = &json_text[..=json_end]; + serde_json::from_str::(json_only).is_ok() } /// Mark all tool calls up to the current buffer position as consumed/executed. - /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools. pub fn mark_tool_calls_consumed(&mut self) { self.last_consumed_position = self.text_buffer.len(); } - /// Check if a partial JSON tool call has been invalidated by subsequent content. - /// - /// Detects two invalidation cases: - /// 1. Unescaped newline inside a JSON string (invalid JSON) - /// 2. Newline followed by non-JSON prose (e.g., regular text, not `"`, `{`, `}`, etc.) - fn is_json_invalidated(json_text: &str) -> bool { - let mut in_string = false; - let mut escape_next = false; - let mut chars = json_text.char_indices().peekable(); - - while let Some((_, ch)) = chars.next() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' => in_string = !in_string, - // Unescaped newline inside a string is invalid JSON - '\n' if in_string => return true, - '\n' if !in_string => { - // Skip whitespace after newline - while let Some(&(_, next_ch)) = chars.peek() { - if next_ch != ' ' && next_ch != '\t' { - break - } - chars.next(); - } - - // Check if next char is valid JSON continuation - if let Some(&(_, next_ch)) = chars.peek() { - // Valid: ", {, }, [, ], :, ,, -, digits, t/f/n (true/false/null), newline - let valid_json_char = matches!( - next_ch, - '"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | '0'..='9' | 't' | 'f' | 'n' | '\n' - ); - if !valid_json_char { - return true; - } - } - } - _ => {} - } - } - - false - } - - /// Find the end position (byte index) of a complete JSON object in the text. - /// Returns None if no complete JSON object is found. - pub fn find_complete_json_object_end(text: &str) -> Option { - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; - let mut found_start = false; - - for (i, ch) in text.char_indices() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' => in_string = !in_string, - '{' if !in_string => { - brace_count += 1; - found_start = true; - } - '}' if !in_string => { - brace_count -= 1; - if brace_count == 0 && found_start { - return Some(i); // Return the byte index of the closing brace - } - } - _ => {} - } - } - - None // No complete JSON object found - } - - /// Find all code fence ranges in the given text. - /// - /// Returns a vector of (start, end) byte positions where code fences are. - /// Each range represents content INSIDE a fence (between ``` markers). - fn find_code_fence_ranges(text: &str) -> Vec<(usize, usize)> { - let mut ranges = Vec::new(); - let mut in_fence = false; - let mut fence_start = 0; - let mut line_start = 0; - - for (i, ch) in text.char_indices() { - if ch == '\n' { - // Check if the line we just finished is a fence marker - let line = &text[line_start..i]; - let trimmed = line.trim_start(); - - if trimmed.starts_with("```") { - let backtick_count = trimmed.chars().take_while(|&c| c == '`').count(); - if backtick_count >= 3 { - if in_fence { - // Closing fence - record the range - // The range is from after the opening fence line to before this line - ranges.push((fence_start, line_start)); - in_fence = false; - } else { - // Opening fence - mark the start (after this line) - fence_start = i + 1; // +1 to skip the newline - in_fence = true; - } - } - } - - line_start = i + 1; - } - } - - // If we ended while still in a fence, include everything to the end - if in_fence { - ranges.push((fence_start, text.len())); - } - - ranges - } - - /// Check if a position is inside any of the fence ranges. - fn is_position_in_fence_ranges(pos: usize, ranges: &[(usize, usize)]) -> bool { - ranges.iter().any(|(start, end)| pos >= *start && pos < *end) - } - - /// Update code fence state based on new content. - /// - /// Tracks whether we're inside a markdown code fence (``` block). - /// Code fences toggle state when we see a line that starts with ``` - /// (with optional leading whitespace). - /// - /// This is intentionally simple - we don't try to handle: - /// - Fences inside comments (// ```, # ```, etc.) - /// - Nested fences (which aren't valid markdown anyway) - /// - Indented code blocks (4 spaces) - /// - /// The simple approach handles 90%+ of real-world cases where LLMs - /// show JSON examples in code blocks. - fn update_code_fence_state(&mut self, content: &str) { - for ch in content.chars() { - if ch == '\n' { - // End of line - check if this line is a fence marker - self.check_and_toggle_fence(); - self.current_line.clear(); - } else { - self.current_line.push(ch); - } - } - } - - /// Check if current_line is a code fence marker and toggle state if so. - /// - /// A fence marker is a line that starts with ``` (after optional whitespace). - /// The fence can have a language tag (```json, ```rust, etc.) which we ignore. - fn check_and_toggle_fence(&mut self) { - let trimmed = self.current_line.trim_start(); - - // Must start with at least 3 backticks - if !trimmed.starts_with("```") { - return; - } - - // Count consecutive backticks - let backtick_count = trimmed.chars().take_while(|&c| c == '`').count(); - if backtick_count < 3 { - return; - } - - // This is a fence marker - toggle state - self.in_code_fence = !self.in_code_fence; - debug!( - "Code fence toggled: in_code_fence={} (line: {:?})", - self.in_code_fence, - self.current_line - ); - } - - /// Reset the parser state for a new message. - pub fn reset(&mut self) { - self.text_buffer.clear(); - self.last_consumed_position = 0; - self.message_stopped = false; - self.in_json_tool_call = false; - self.json_tool_start = None; - self.in_code_fence = false; - self.current_line = String::new(); - } - /// Get the current text buffer length (for position tracking). pub fn text_buffer_len(&self) -> usize { self.text_buffer.len() @@ -637,8 +574,46 @@ impl StreamingToolParser { pub fn json_tool_start_position(&self) -> Option { self.json_tool_start } + + /// Reset the parser state for a new message. + pub fn reset(&mut self) { + self.text_buffer.clear(); + self.last_consumed_position = 0; + self.message_stopped = false; + self.in_json_tool_call = false; + self.json_tool_start = None; + self.fence_tracker.reset(); + } + + // ======================================================================== + // Static Methods (for external use) + // ======================================================================== + + /// Find the starting position of the FIRST tool call pattern on its own line. + pub fn find_first_tool_call_start(text: &str) -> Option { + find_first_tool_call_start(text) + } + + /// Find the starting position of the LAST tool call pattern on its own line. + pub fn find_last_tool_call_start(text: &str) -> Option { + find_last_tool_call_start(text) + } + + /// Check if a position in text is "on its own line". + pub fn is_on_own_line(text: &str, pos: usize) -> bool { + is_on_own_line(text, pos) + } + + /// Find the end position of a complete JSON object. + pub fn find_complete_json_object_end(text: &str) -> Option { + find_json_object_end(text) + } } +// ============================================================================ +// Tests +// ============================================================================ + #[cfg(test)] mod tests { use super::*; @@ -646,34 +621,27 @@ mod tests { #[test] fn test_find_complete_json_object_end_simple() { let text = r#"{"tool":"shell","args":{"command":"ls"}}"#; - assert_eq!( - StreamingToolParser::find_complete_json_object_end(text), - Some(text.len() - 1) - ); + assert_eq!(find_json_object_end(text), Some(text.len() - 1)); } #[test] fn test_find_complete_json_object_end_nested() { let text = r#"{"tool":"write","args":{"content":"{nested}"}}"#; - assert_eq!( - StreamingToolParser::find_complete_json_object_end(text), - Some(text.len() - 1) - ); + assert_eq!(find_json_object_end(text), Some(text.len() - 1)); } #[test] fn test_find_complete_json_object_end_incomplete() { let text = r#"{"tool":"shell","args":{"command":"ls""#; - assert_eq!(StreamingToolParser::find_complete_json_object_end(text), None); + assert_eq!(find_json_object_end(text), None); } #[test] fn test_tool_call_patterns() { - // Test that all patterns are detected - assert!(StreamingToolParser::find_first_tool_call_start(r#"{"tool":"test"}"#).is_some()); - assert!(StreamingToolParser::find_first_tool_call_start(r#"{ "tool":"test"}"#).is_some()); - assert!(StreamingToolParser::find_first_tool_call_start(r#"{"tool" :"test"}"#).is_some()); - assert!(StreamingToolParser::find_first_tool_call_start(r#"{ "tool" :"test"}"#).is_some()); + assert!(find_first_tool_call_start(r#"{"tool":"test"}"#).is_some()); + assert!(find_first_tool_call_start(r#"{ "tool":"test"}"#).is_some()); + assert!(find_first_tool_call_start(r#"{"tool" :"test"}"#).is_some()); + assert!(find_first_tool_call_start(r#"{ "tool" :"test"}"#).is_some()); } #[test] @@ -692,11 +660,8 @@ mod tests { #[test] fn test_multiple_tool_calls_processed_in_order() { - // Test that when multiple tool calls arrive together, they are processed - // in order (first one first, not last one first) let mut parser = StreamingToolParser::new(); - - // Simulate two tool calls arriving in the same chunk + let content = r#"Some text before {"tool": "shell", "args": {"command": "first"}} @@ -704,7 +669,7 @@ mod tests { {"tool": "shell", "args": {"command": "second"}} Some text after"#; - + let chunk = g3_providers::CompletionChunk { content: content.to_string(), finished: true, @@ -713,167 +678,129 @@ Some text after"#; stop_reason: None, tool_call_streaming: None, }; - - let tools = parser.process_chunk(&chunk); - - // Should find both tool calls - assert_eq!(tools.len(), 2, "Expected 2 tool calls, got {}", tools.len()); - - // First tool call should be "first", not "second" - assert_eq!(tools[0].tool, "shell"); - assert_eq!(tools[0].args["command"], "first", - "First tool call should have command 'first', got {:?}", tools[0].args); - - // Second tool call should be "second" - assert_eq!(tools[1].tool, "shell"); - assert_eq!(tools[1].args["command"], "second", - "Second tool call should have command 'second', got {:?}", tools[1].args); - } + let tools = parser.process_chunk(&chunk); + + assert_eq!(tools.len(), 2, "Expected 2 tool calls, got {}", tools.len()); + assert_eq!(tools[0].args["command"], "first"); + assert_eq!(tools[1].args["command"], "second"); + } #[test] fn test_find_first_vs_last_tool_call() { - // Both tool calls are on their own lines let text = "{\"tool\": \"first\"}\n{\"tool\": \"second\"}"; - - let first_pos = StreamingToolParser::find_first_tool_call_start(text); - let last_pos = StreamingToolParser::find_last_tool_call_start(text); - - assert!(first_pos.is_some(), "Should find first tool call"); - assert!(last_pos.is_some(), "Should find last tool call"); - assert!(first_pos.unwrap() < last_pos.unwrap(), - "First position ({:?}) should be less than last position ({:?})", first_pos, last_pos); + + let first_pos = find_first_tool_call_start(text); + let last_pos = find_last_tool_call_start(text); + + assert!(first_pos.is_some()); + assert!(last_pos.is_some()); + assert!(first_pos.unwrap() < last_pos.unwrap()); } #[test] fn test_inline_tool_call_ignored() { - // Tool call pattern inline with other text should NOT be detected let text = "Here is an example: {\"tool\": \"shell\"} in text"; - assert!(StreamingToolParser::find_first_tool_call_start(text).is_none(), - "Inline tool call pattern should be ignored"); - assert!(StreamingToolParser::find_last_tool_call_start(text).is_none(), - "Inline tool call pattern should be ignored"); + assert!(find_first_tool_call_start(text).is_none()); + assert!(find_last_tool_call_start(text).is_none()); } #[test] fn test_standalone_tool_call_detected() { - // Tool call on its own line (at start of text) should be detected let text = r#"{"tool": "shell", "args": {"command": "ls"}}"#; - assert!(StreamingToolParser::find_first_tool_call_start(text).is_some(), - "Standalone tool call should be detected"); + assert!(find_first_tool_call_start(text).is_some()); } #[test] fn test_indented_tool_call_detected() { - // Tool call with leading whitespace should be detected let text = r#" {"tool": "shell", "args": {"command": "ls"}}"#; - assert!(StreamingToolParser::find_first_tool_call_start(text).is_some(), - "Indented tool call should be detected"); + assert!(find_first_tool_call_start(text).is_some()); } #[test] fn test_tool_call_after_newline_detected() { - // Tool call after a newline should be detected let text = "Some prose here\n{\"tool\": \"shell\", \"args\": {}}"; - let pos = StreamingToolParser::find_first_tool_call_start(text); - assert!(pos.is_some(), "Tool call after newline should be detected"); - assert_eq!(pos.unwrap(), 16, "Should find tool call at position after newline"); + let pos = find_first_tool_call_start(text); + assert!(pos.is_some()); + assert_eq!(pos.unwrap(), 16); } #[test] fn test_inline_ignored_but_standalone_detected() { - // Mixed: inline on first line (ignored), standalone on second line (detected) let text = "Some text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}"; - let pos = StreamingToolParser::find_first_tool_call_start(text); - assert!(pos.is_some(), "Should find the standalone tool call"); - // The standalone one starts after the newline - assert!(pos.unwrap() > 30, "Should skip the inline pattern and find the standalone one"); + let pos = find_first_tool_call_start(text); + assert!(pos.is_some()); + assert!(pos.unwrap() > 30); } #[test] fn test_multiple_inline_patterns_all_ignored() { - // Multiple inline patterns on same line - all should be ignored let text = "Compare {\"tool\": \"a\"} with {\"tool\": \"b\"}"; - assert!(StreamingToolParser::find_first_tool_call_start(text).is_none(), - "All inline patterns should be ignored"); + assert!(find_first_tool_call_start(text).is_none()); } #[test] fn test_is_on_own_line() { - // Test the is_on_own_line helper directly let text = "prefix {\"tool\":\n {\"tool\":"; - - // Position 0 is always on its own line - assert!(StreamingToolParser::is_on_own_line(text, 0)); - - // Position 7 (after "prefix ") is NOT on its own line - assert!(!StreamingToolParser::is_on_own_line(text, 7)); - - // Position after newline with only whitespace before pattern IS on its own line + + assert!(is_on_own_line(text, 0)); + assert!(!is_on_own_line(text, 7)); + let newline_pos = text.find('\n').unwrap(); - assert!(StreamingToolParser::is_on_own_line(text, newline_pos + 11)); // 10 spaces before { + assert!(is_on_own_line(text, newline_pos + 11)); } #[test] fn test_all_pattern_variants_require_own_line() { - // All whitespace variants should require their own line let patterns = [ "text { \"tool\":\"x\"}", "text {\"tool\" :\"x\"}", "text { \"tool\" :\"x\"}", ]; for pattern in patterns { - assert!(StreamingToolParser::find_first_tool_call_start(pattern).is_none(), - "Inline pattern '{}' should be ignored", pattern); + assert!( + find_first_tool_call_start(pattern).is_none(), + "Inline pattern '{}' should be ignored", + pattern + ); } } #[test] fn test_find_code_fence_ranges_simple() { let text = "Before\n```\ncode\n```\nAfter"; - let ranges = StreamingToolParser::find_code_fence_ranges(text); - - assert_eq!(ranges.len(), 1, "Should find one fence range"); - - // The range should cover "code\n" + let ranges = find_code_fence_ranges(text); + + assert_eq!(ranges.len(), 1); let (start, end) = ranges[0]; let inside = &text[start..end]; - assert!(inside.contains("code"), "Range should contain 'code', got: {:?}", inside); + assert!(inside.contains("code")); } #[test] fn test_find_code_fence_ranges_multiple() { let text = "First:\n```json\ncode1\n```\n\nSecond:\n```\ncode2\n```\nEnd"; - let ranges = StreamingToolParser::find_code_fence_ranges(text); - - assert_eq!(ranges.len(), 2, "Should find two fence ranges, got {:?}", ranges); + let ranges = find_code_fence_ranges(text); + assert_eq!(ranges.len(), 2); } #[test] fn test_find_code_fence_ranges_with_tool_json() { - // Use a simple string without backticks to avoid raw string issues let text = "Example:\n```json\n{\"tool\": \"shell\", \"args\": {}}\n```\nDone."; - let ranges = StreamingToolParser::find_code_fence_ranges(text); - - assert_eq!(ranges.len(), 1, "Should find one fence range"); - - // Find where the tool pattern would be + let ranges = find_code_fence_ranges(text); + + assert_eq!(ranges.len(), 1); + let tool_pos = text.find("{\"tool\"").unwrap(); - - // The tool position should be inside the fence range - assert!( - StreamingToolParser::is_position_in_fence_ranges(tool_pos, &ranges), - "Tool pattern at {} should be inside fence range {:?}", - tool_pos, ranges - ); + assert!(is_position_in_fence_ranges(tool_pos, &ranges)); } #[test] fn test_is_position_in_fence_ranges() { let ranges = vec![(10, 20), (30, 40)]; - assert!(!StreamingToolParser::is_position_in_fence_ranges(5, &ranges)); - assert!(StreamingToolParser::is_position_in_fence_ranges(15, &ranges)); - assert!(!StreamingToolParser::is_position_in_fence_ranges(25, &ranges)); - assert!(StreamingToolParser::is_position_in_fence_ranges(35, &ranges)); + assert!(!is_position_in_fence_ranges(5, &ranges)); + assert!(is_position_in_fence_ranges(15, &ranges)); + assert!(!is_position_in_fence_ranges(25, &ranges)); + assert!(is_position_in_fence_ranges(35, &ranges)); } }