diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 1a89c62..76362ce 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -267,7 +267,7 @@ use std::path::Path; use std::path::PathBuf; use std::process::exit; use tokio_util::sync::CancellationToken; -use tracing::{error, info}; +use tracing::{debug, error}; use g3_core::error_handling::{classify_error, ErrorType, RecoverableError}; mod simple_output; @@ -2693,7 +2693,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?; // Log the size of the feedback for debugging - info!( + debug!( "Coach feedback extracted: {} characters (from {} total)", coach_feedback_text.len(), coach_result.response.len() diff --git a/crates/g3-computer-control/build.rs b/crates/g3-computer-control/build.rs index b7760a2..60d5598 100644 --- a/crates/g3-computer-control/build.rs +++ b/crates/g3-computer-control/build.rs @@ -68,6 +68,18 @@ fn main() { dylib_dst.display() ); + // Re-sign the dylib with ad-hoc signature to fix code signing issues on Apple Silicon + // This is necessary because incremental compilation can invalidate signatures + let codesign_status = Command::new("codesign") + .args(&["-f", "-s", "-", dylib_dst.to_str().unwrap()]) + .status(); + + if let Ok(status) = codesign_status { + if !status.success() { + println!("cargo:warning=Failed to codesign libVisionBridge.dylib (non-fatal)"); + } + } + // Add rpath so the dylib can be found at runtime println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path"); println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path"); diff --git a/crates/g3-computer-control/src/platform/macos.rs b/crates/g3-computer-control/src/platform/macos.rs index dda2c0f..db64480 100644 --- a/crates/g3-computer-control/src/platform/macos.rs +++ b/crates/g3-computer-control/src/platform/macos.rs @@ -24,7 +24,7 @@ impl MacOSController { pub fn new() -> Result { let ocr = Box::new(DefaultOCR::new()?); let ocr_name = ocr.name().to_string(); - tracing::info!("Initialized macOS controller with OCR engine: {}", ocr_name); + tracing::debug!("Initialized macOS controller with OCR engine: {}", ocr_name); Ok(Self { ocr_engine: ocr, ocr_name, @@ -155,7 +155,7 @@ impl ComputerController for MacOSController { // 1. At layer 0 (normal windows, not menu bar) // 2. Have real bounds (width and height >= 100) if layer == 0 && has_real_bounds { - tracing::info!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer); + tracing::debug!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer); found_window_id = Some((id as u32, owner.clone())); break; } else { @@ -178,7 +178,7 @@ impl ComputerController for MacOSController { let (cg_window_id, matched_owner) = cg_window_id.ok_or_else(|| { anyhow::anyhow!("Could not find window for application '{}'. Use list_windows to see available windows.", app_name) })?; - tracing::info!( + tracing::debug!( "Taking screenshot of window ID {} for app '{}'", cg_window_id, matched_owner @@ -468,7 +468,7 @@ impl MacOSController { // Only accept windows with real bounds (>= 100x100 pixels) if w >= 100 && h >= 100 { - tracing::info!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer); + tracing::debug!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer); return Ok((x, y, w, h)); } else { tracing::debug!( diff --git a/crates/g3-console/src/api/control.rs b/crates/g3-console/src/api/control.rs index f5eb489..455849d 100644 --- a/crates/g3-console/src/api/control.rs +++ b/crates/g3-console/src/api/control.rs @@ -3,7 +3,7 @@ use crate::process::ProcessController; use axum::{extract::State, http::StatusCode, Json}; use std::sync::Arc; use tokio::sync::Mutex; -use tracing::{error, info}; +use tracing::{debug, error}; pub type ControllerState = Arc>; @@ -22,7 +22,7 @@ pub async fn kill_instance( match controller.kill_process(pid) { Ok(_) => { - info!("Successfully killed process {}", pid); + debug!("Successfully killed process {}", pid); Ok(Json(serde_json::json!({ "status": "terminating" }))) @@ -38,7 +38,7 @@ pub async fn restart_instance( State(controller): State, axum::extract::Path(id): axum::extract::Path, ) -> Result, StatusCode> { - info!("Restarting instance: {}", id); + debug!("Restarting instance: {}", id); // Extract PID from instance ID (format: pid_timestamp) let pid: u32 = id @@ -81,7 +81,7 @@ pub async fn launch_instance( State(controller): State, Json(request): Json, ) -> Result, (StatusCode, Json)> { - info!("Launching new g3 instance: {:?}", request); + debug!("Launching new g3 instance: {:?}", request); // Validate binary path if provided if let Some(ref binary_path) = request.g3_binary_path { @@ -149,7 +149,7 @@ pub async fn launch_instance( ) { Ok(pid) => { let id = format!("{}_{}", pid, chrono::Utc::now().timestamp()); - info!("Successfully launched g3 instance with PID {}", pid); + debug!("Successfully launched g3 instance with PID {}", pid); Ok(Json(LaunchResponse { id, status: "starting".to_string(), diff --git a/crates/g3-console/src/api/state.rs b/crates/g3-console/src/api/state.rs index fc31a19..2031a8a 100644 --- a/crates/g3-console/src/api/state.rs +++ b/crates/g3-console/src/api/state.rs @@ -3,7 +3,7 @@ use axum::{http::StatusCode, Json}; use serde::{Deserialize, Serialize}; use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; -use tracing::{error, info}; +use tracing::{debug, error}; pub async fn get_state() -> Result, StatusCode> { let state = ConsoleState::load(); @@ -15,7 +15,7 @@ pub async fn save_state( ) -> Result, StatusCode> { match state.save() { Ok(_) => { - info!("Console state saved successfully"); + debug!("Console state saved successfully"); Ok(Json(serde_json::json!({ "status": "saved" }))) diff --git a/crates/g3-console/src/launch.rs b/crates/g3-console/src/launch.rs index cd46f4f..505d509 100644 --- a/crates/g3-console/src/launch.rs +++ b/crates/g3-console/src/launch.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use std::fs; use std::path::PathBuf; -use tracing::info; +use tracing::debug; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ConsoleState { @@ -42,7 +42,7 @@ impl ConsoleState { pub fn save(&self) -> anyhow::Result<()> { let config_path = Self::config_path(); - info!("Saving console state to: {:?}", config_path); + debug!("Saving console state to: {:?}", config_path); // Create parent directory if it doesn't exist if let Some(parent) = config_path.parent() { @@ -51,7 +51,7 @@ impl ConsoleState { let content = serde_json::to_string_pretty(self)?; fs::write(&config_path, content)?; - info!("Console state saved successfully to: {:?}", config_path); + debug!("Console state saved successfully to: {:?}", config_path); Ok(()) } diff --git a/crates/g3-console/src/main.rs b/crates/g3-console/src/main.rs index 182a316..616b74f 100644 --- a/crates/g3-console/src/main.rs +++ b/crates/g3-console/src/main.rs @@ -16,7 +16,7 @@ use std::sync::Arc; use tokio::sync::Mutex; use tower_http::cors::CorsLayer; use tower_http::services::ServeDir; -use tracing::{info, Level}; +use tracing::{debug, Level}; use tracing_subscriber; #[derive(Parser, Debug)] @@ -84,12 +84,12 @@ async fn main() -> anyhow::Result<()> { .layer(CorsLayer::permissive()); let addr = format!("{}:{}", args.host, args.port); - info!("Starting g3-console on http://{}", addr); + debug!("Starting g3-console on http://{}", addr); // Auto-open browser if requested if args.open { let url = format!("http://{}", addr); - info!("Opening browser to {}", url); + debug!("Opening browser to {}", url); let _ = open::that(&url); } diff --git a/crates/g3-console/src/process/controller.rs b/crates/g3-console/src/process/controller.rs index 3a1f8f9..d6941c9 100644 --- a/crates/g3-console/src/process/controller.rs +++ b/crates/g3-console/src/process/controller.rs @@ -6,7 +6,7 @@ use std::path::PathBuf; use std::process::{Command, Stdio}; use std::sync::Mutex; use sysinfo::{Pid, Process, Signal, System}; -use tracing::{debug, info}; +use tracing::debug; pub struct ProcessController { system: System, @@ -26,7 +26,7 @@ impl ProcessController { self.system.refresh_processes(); if let Some(process) = self.system.process(sysinfo_pid) { - info!("Killing process {} ({})", pid, process.name()); + debug!("Killing process {} ({})", pid, process.name()); // Try SIGTERM first if process.kill_with(Signal::Term).is_some() { @@ -107,7 +107,7 @@ impl ProcessController { }); } - info!("Launching g3: {:?}", cmd); + debug!("Launching g3: {:?}", cmd); // Spawn and wait for the intermediate process to exit let mut child = cmd.spawn().context("Failed to spawn g3 process")?; @@ -120,7 +120,7 @@ impl ProcessController { // The actual g3 process is now running as orphan // We need to scan for it by matching workspace and recent start time - info!( + debug!( "Scanning for newly launched g3 process in workspace: {}", workspace ); @@ -171,7 +171,7 @@ impl ProcessController { found } else { // If we couldn't find it, try one more refresh after a longer delay - info!("Process not found on first scan, trying again..."); + debug!("Process not found on first scan, trying again..."); std::thread::sleep(std::time::Duration::from_millis(2000)); self.system.refresh_processes(); @@ -204,7 +204,7 @@ impl ProcessController { retry_found.unwrap_or(intermediate_pid) }; - info!("Launched g3 process with PID {}", pid); + debug!("Launched g3 process with PID {}", pid); // Store launch params for restart let params = LaunchParams { diff --git a/crates/g3-console/src/process/detector.rs b/crates/g3-console/src/process/detector.rs index 43b7b5c..97e545d 100644 --- a/crates/g3-console/src/process/detector.rs +++ b/crates/g3-console/src/process/detector.rs @@ -3,7 +3,7 @@ use anyhow::Result; use chrono::{DateTime, Utc}; use std::path::PathBuf; use sysinfo::{Pid, Process, System}; -use tracing::{debug, info, warn}; +use tracing::{debug, warn}; pub struct ProcessDetector { system: System, @@ -17,7 +17,7 @@ impl ProcessDetector { } pub fn detect_instances(&mut self) -> Result> { - info!("Scanning for g3 processes..."); + debug!("Scanning for g3 processes..."); // Refresh all processes to ensure we catch newly started ones // Using refresh_all() instead of just refresh_processes() to ensure // we get complete information about new processes @@ -37,7 +37,7 @@ impl ProcessDetector { } } - info!("Detected {} g3 instances", instances.len()); + debug!("Detected {} g3 instances", instances.len()); Ok(instances) } diff --git a/crates/g3-core/src/error_handling.rs b/crates/g3-core/src/error_handling.rs index 18d420e..9e582aa 100644 --- a/crates/g3-core/src/error_handling.rs +++ b/crates/g3-core/src/error_handling.rs @@ -9,7 +9,7 @@ use anyhow::Result; use serde::{Deserialize, Serialize}; use std::time::Duration; -use tracing::{error, info, warn}; +use tracing::{debug, error, warn}; /// Base delay for exponential backoff (in milliseconds) const BASE_RETRY_DELAY_MS: u64 = 1000; @@ -149,7 +149,7 @@ impl ErrorContext { if let Err(e) = std::fs::write(&filename, json_content) { error!("Failed to save error context to {:?}: {}", &filename, e); } else { - info!("Error details saved to: {:?}", &filename); + debug!("Error details saved to: {:?}", &filename); } } Err(e) => { @@ -328,7 +328,7 @@ where match operation().await { Ok(result) => { if attempt > 1 { - info!( + debug!( "Operation '{}' succeeded after {} attempts", operation_name, attempt ); @@ -357,7 +357,7 @@ where // Special handling for token limit errors if matches!(recoverable_type, RecoverableError::TokenLimit) { - info!("Token limit error detected. Consider triggering summarization."); + debug!("Token limit error detected. Consider triggering summarization."); } tokio::time::sleep(delay).await; diff --git a/crates/g3-core/src/feedback_extraction.rs b/crates/g3-core/src/feedback_extraction.rs index 3666d97..12f121b 100644 --- a/crates/g3-core/src/feedback_extraction.rs +++ b/crates/g3-core/src/feedback_extraction.rs @@ -12,7 +12,7 @@ use crate::{logs_dir, Agent, TaskResult}; use crate::ui_writer::UiWriter; use serde_json::Value; use std::path::PathBuf; -use tracing::{debug, info, warn}; +use tracing::{debug, warn}; /// Result of feedback extraction with source information #[derive(Debug, Clone)] @@ -103,21 +103,21 @@ where // Try session log first (most reliable) if let Some(session_id) = agent.get_session_id() { if let Some(feedback) = try_extract_from_session_log(&session_id, config) { - info!("Extracted coach feedback from session log: {} chars", feedback.len()); + debug!("Extracted coach feedback from session log: {} chars", feedback.len()); return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog); } } // Try native tool call JSON parsing if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) { - info!("Extracted coach feedback from native tool call: {} chars", feedback.len()); + debug!("Extracted coach feedback from native tool call: {} chars", feedback.len()); return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall); } // Try conversation history if let Some(session_id) = agent.get_session_id() { if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) { - info!("Extracted coach feedback from conversation history: {} chars", feedback.len()); + debug!("Extracted coach feedback from conversation history: {} chars", feedback.len()); return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory); } } @@ -125,7 +125,7 @@ where // Try TaskResult parsing let extracted = coach_result.extract_final_output(); if !extracted.is_empty() { - info!("Extracted coach feedback from task result: {} chars", extracted.len()); + debug!("Extracted coach feedback from task result: {} chars", extracted.len()); return ExtractedFeedback::new(extracted, FeedbackSource::TaskResultResponse); } diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 0536332..612c094 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -39,7 +39,7 @@ use serde_json::json; use std::io::Write; use std::time::{Duration, Instant}; use tokio_util::sync::CancellationToken; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, warn}; /// Get the path to the todo.g3.md file. /// @@ -246,13 +246,23 @@ pub enum StreamState { Resuming, } +/// Patterns used to detect JSON tool calls in text +/// These cover common whitespace variations in JSON formatting +const TOOL_CALL_PATTERNS: [&str; 4] = [ + r#"{"tool":"#, + r#"{ "tool":"#, + r#"{"tool" :"#, + r#"{ "tool" :"#, +]; + /// Modern streaming tool parser that properly handles native tool calls and SSE chunks #[derive(Debug)] pub struct StreamingToolParser { /// Buffer for accumulating text content text_buffer: String, - /// Buffer for accumulating native tool calls - native_tool_calls: Vec, + /// Position in text_buffer up to which tool calls have been consumed/executed + /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools + last_consumed_position: usize, /// Whether we've received a message_stop event message_stopped: bool, /// Whether we're currently in a JSON tool call (for fallback parsing) @@ -271,13 +281,58 @@ impl StreamingToolParser { pub fn new() -> Self { Self { text_buffer: String::new(), - native_tool_calls: Vec::new(), + last_consumed_position: 0, message_stopped: false, in_json_tool_call: false, json_tool_start: None, } } + /// Find the starting position of the last tool call pattern in the given text + /// Returns None if no tool call pattern is found + fn find_last_tool_call_start(text: &str) -> Option { + let mut best_start: Option = None; + for pattern in &TOOL_CALL_PATTERNS { + if let Some(pos) = text.rfind(pattern) { + if best_start.map_or(true, |best| pos > best) { + best_start = Some(pos); + } + } + } + best_start + } + + /// Find the starting position of the FIRST tool call pattern in the given text + /// Returns None if no tool call pattern is found + fn find_first_tool_call_start(text: &str) -> Option { + let mut best_start: Option = None; + for pattern in &TOOL_CALL_PATTERNS { + if let Some(pos) = text.find(pattern) { + if best_start.map_or(true, |best| pos < best) { + best_start = Some(pos); + } + } + } + best_start + } + + /// Validate that tool call args don't contain message-like content + /// This detects malformed tool calls where agent messages got mixed into args + fn has_message_like_keys(args: &serde_json::Map) -> bool { + args.keys().any(|key| { + key.len() > 100 + || key.contains('\n') + || key.contains("I'll") + || key.contains("Let me") + || key.contains("Here's") + || key.contains("I can") + || key.contains("I need") + || key.contains("First") + || key.contains("Now") + || key.contains("The ") + }) + } + /// Process a streaming chunk and return completed tool calls if any pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec { let mut completed_tools = Vec::new(); @@ -308,10 +363,12 @@ impl StreamingToolParser { self.message_stopped = true; debug!("Message finished, processing accumulated tool calls"); - // When stream finishes, do a final check for JSON tool calls in the accumulated buffer + // When stream finishes, find ALL JSON tool calls in the accumulated buffer if completed_tools.is_empty() && !self.text_buffer.is_empty() { - if let Some(json_tool) = self.try_parse_json_tool_call_from_buffer() { - completed_tools.push(json_tool); + let all_tools = self.try_parse_all_json_tool_calls_from_buffer(); + if !all_tools.is_empty() { + debug!("Found {} JSON tool calls in buffer at stream end", all_tools.len()); + completed_tools.extend(all_tools); } } } @@ -328,26 +385,12 @@ impl StreamingToolParser { /// Fallback method to parse JSON tool calls from text content fn try_parse_json_tool_call(&mut self, _content: &str) -> Option { - // Look for JSON tool call patterns - let patterns = [ - r#"{"tool":"#, - r#"{ "tool":"#, - r#"{"tool" :"#, - r#"{ "tool" :"#, - ]; - // If we're not currently in a JSON tool call, look for the start if !self.in_json_tool_call { - for pattern in &patterns { - if let Some(pos) = self.text_buffer.rfind(pattern) { - debug!( - "Found JSON tool call pattern '{}' at position {}", - pattern, pos - ); - self.in_json_tool_call = true; - self.json_tool_start = Some(pos); - break; - } + if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) { + debug!("Found JSON tool call pattern at position {}", pos); + self.in_json_tool_call = true; + self.json_tool_start = Some(pos); } } @@ -356,83 +399,34 @@ impl StreamingToolParser { if let Some(start_pos) = self.json_tool_start { let json_text = &self.text_buffer[start_pos..]; - // Try to find a complete JSON object - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; + // Try to find a complete JSON object using the shared helper + if let Some(end_pos) = Self::find_complete_json_object_end(json_text) { + let json_str = &json_text[..=end_pos]; + debug!("Attempting to parse JSON tool call: {}", json_str); - for (i, ch) in json_text.char_indices() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' if !escape_next => in_string = !in_string, - '{' if !in_string => brace_count += 1, - '}' if !in_string => { - brace_count -= 1; - if brace_count == 0 { - // Found complete JSON object - let json_str = &json_text[..=i]; - debug!("Attempting to parse JSON tool call: {}", json_str); - - // First try to parse as a ToolCall - if let Ok(tool_call) = serde_json::from_str::(json_str) { - // Validate that this is actually a proper tool call - // The args should be a JSON object with reasonable keys - if let Some(args_obj) = tool_call.args.as_object() { - // Check if any key looks like it contains agent message content - // This would indicate a malformed tool call where the message - // got mixed into the args - let has_message_like_key = args_obj.keys().any(|key| { - key.len() > 100 - || key.contains('\n') - || key.contains("I'll") - || key.contains("Let me") - || key.contains("Here's") - || key.contains("I can") - || key.contains("I need") - || key.contains("First") - || key.contains("Now") - || key.contains("The ") - }); - - if has_message_like_key { - debug!("Detected malformed tool call with message-like keys, skipping"); - // This looks like a malformed tool call, skip it - self.in_json_tool_call = false; - self.json_tool_start = None; - break; - } - - // Also check if the values look reasonable - // Tool arguments should typically be file paths, commands, or content - // Not entire agent messages - - debug!( - "Successfully parsed valid JSON tool call: {:?}", - tool_call - ); - // Reset JSON parsing state - self.in_json_tool_call = false; - self.json_tool_start = None; - return Some(tool_call); - } - // If args is not an object, skip this as invalid - debug!("Tool call args is not an object, skipping"); - } else { - debug!("Failed to parse JSON tool call: {}", json_str); - // Reset and continue looking - self.in_json_tool_call = false; - self.json_tool_start = None; - } - break; + // Try to parse as a ToolCall + if let Ok(tool_call) = serde_json::from_str::(json_str) { + // Validate that args is an object with reasonable keys + if let Some(args_obj) = tool_call.args.as_object() { + if Self::has_message_like_keys(args_obj) { + debug!("Detected malformed tool call with message-like keys, skipping"); + self.in_json_tool_call = false; + self.json_tool_start = None; + return None; } + + debug!("Successfully parsed valid JSON tool call: {:?}", tool_call); + self.in_json_tool_call = false; + self.json_tool_start = None; + return Some(tool_call); } - _ => {} + debug!("Tool call args is not an object, skipping"); + } else { + debug!("Failed to parse JSON tool call: {}", json_str); } + // Reset and continue looking + self.in_json_tool_call = false; + self.json_tool_start = None; } } } @@ -440,76 +434,45 @@ impl StreamingToolParser { None } - /// Parse JSON tool call from the accumulated text buffer (called when stream finishes) - /// This is similar to try_parse_json_tool_call but operates on the full buffer - fn try_parse_json_tool_call_from_buffer(&mut self) -> Option { - // Look for JSON tool call patterns in the accumulated buffer - let patterns = [ - r#"{"tool":"#, - r#"{ "tool":"#, - r#"{"tool" :"#, - r#"{ "tool" :"#, - ]; - - // Find the last occurrence of a tool call pattern (most likely to be complete) - let mut best_start: Option = None; - for pattern in &patterns { - if let Some(pos) = self.text_buffer.rfind(pattern) { - if best_start.map_or(true, |best| pos > best) { - best_start = Some(pos); - } - } - } - - if let Some(start_pos) = best_start { - let json_text = &self.text_buffer[start_pos..]; - debug!("Found potential JSON tool call at position {}: {:?}", start_pos, - if json_text.len() > 200 { &json_text[..200] } else { json_text }); - - // Try to find a complete JSON object - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; - - for (i, ch) in json_text.char_indices() { - if escape_next { - escape_next = false; - continue; - } - - match ch { - '\\' => escape_next = true, - '"' if !escape_next => in_string = !in_string, - '{' if !in_string => brace_count += 1, - '}' if !in_string => { - brace_count -= 1; - if brace_count == 0 { - // Found complete JSON object - let json_str = &json_text[..=i]; - debug!("Attempting to parse JSON tool call from buffer: {}", json_str); - - if let Ok(tool_call) = serde_json::from_str::(json_str) { - if let Some(args_obj) = tool_call.args.as_object() { - // Validate - check for message-like keys - let has_message_like_key = args_obj.keys().any(|key| { - key.len() > 100 || key.contains('\n') - }); - - if !has_message_like_key { - debug!("Successfully parsed JSON tool call from buffer: {:?}", tool_call); - return Some(tool_call); - } - } + /// Parse ALL JSON tool calls from the accumulated text buffer + /// This finds all complete tool calls, not just the last one + fn try_parse_all_json_tool_calls_from_buffer(&self) -> Vec { + let mut tool_calls = Vec::new(); + let mut search_start = 0; + + while search_start < self.text_buffer.len() { + let search_text = &self.text_buffer[search_start..]; + + // Find the next tool call pattern + if let Some(relative_pos) = Self::find_first_tool_call_start(search_text) { + let abs_start = search_start + relative_pos; + let json_text = &self.text_buffer[abs_start..]; + + // Try to find a complete JSON object + if let Some(end_pos) = Self::find_complete_json_object_end(json_text) { + let json_str = &json_text[..=end_pos]; + + if let Ok(tool_call) = serde_json::from_str::(json_str) { + if let Some(args_obj) = tool_call.args.as_object() { + if !Self::has_message_like_keys(args_obj) { + debug!("Found tool call at position {}: {:?}", abs_start, tool_call.tool); + tool_calls.push(tool_call); } - break; } } - _ => {} + // Move past this tool call + search_start = abs_start + end_pos + 1; + } else { + // Incomplete JSON, stop searching + break; } + } else { + // No more tool call patterns found + break; } } - - None + + tool_calls } /// Get the accumulated text content (excluding tool calls) @@ -531,10 +494,83 @@ impl StreamingToolParser { self.message_stopped } + /// Check if the text buffer contains an incomplete JSON tool call + /// This detects cases where the LLM started emitting a tool call but the stream ended + /// before the JSON was complete (truncated output) + pub fn has_incomplete_tool_call(&self) -> bool { + // Only check the unconsumed portion of the buffer + let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; + if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) { + let json_text = &unchecked_buffer[start_pos..]; + // If NOT complete, it's an incomplete tool call + Self::find_complete_json_object_end(json_text).is_none() + } else { + false + } + } + + /// Check if the text buffer contains an unexecuted tool call + /// This detects cases where the LLM emitted a complete tool call JSON + /// but it wasn't parsed/executed (e.g., due to parsing issues) + pub fn has_unexecuted_tool_call(&self) -> bool { + // Only check the unconsumed portion of the buffer + let unchecked_buffer = &self.text_buffer[self.last_consumed_position..]; + if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) { + let json_text = &unchecked_buffer[start_pos..]; + // If the JSON IS complete, it means there's an unexecuted tool call + if let Some(json_end) = Self::find_complete_json_object_end(json_text) { + let json_only = &json_text[..=json_end]; + return serde_json::from_str::(json_only).is_ok(); + } + } + false + } + + /// Mark all tool calls up to the current buffer position as consumed/executed + /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools + pub fn mark_tool_calls_consumed(&mut self) { + self.last_consumed_position = self.text_buffer.len(); + } + + /// Find the end position (byte index) of a complete JSON object in the text + /// Returns None if no complete JSON object is found + /// Find the end position (byte index) of a complete JSON object in the text + pub fn find_complete_json_object_end(text: &str) -> Option { + let mut brace_count = 0; + let mut in_string = false; + let mut escape_next = false; + let mut found_start = false; + + for (i, ch) in text.char_indices() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => escape_next = true, + '"' if !escape_next => in_string = !in_string, + '{' if !in_string => { + brace_count += 1; + found_start = true; + } + '}' if !in_string => { + brace_count -= 1; + if brace_count == 0 && found_start { + return Some(i); // Return the byte index of the closing brace + } + } + _ => {} + } + } + + None // No complete JSON object found + } + /// Reset the parser state for a new message pub fn reset(&mut self) { self.text_buffer.clear(); - self.native_tool_calls.clear(); + self.last_consumed_position = 0; self.message_stopped = false; self.in_json_tool_call = false; self.json_tool_start = None; @@ -2743,7 +2779,7 @@ impl Agent { /// Manually trigger context summarization regardless of context window size /// Returns Ok(true) if summarization was successful, Ok(false) if it failed pub async fn force_summarize(&mut self) -> Result { - info!("Manual summarization triggered"); + debug!("Manual summarization triggered"); self.ui_writer.print_context_status(&format!( "\nšŸ—œļø Manual summarization requested (current usage: {}%)...", @@ -2861,7 +2897,7 @@ impl Agent { /// Manually trigger context thinning regardless of thresholds pub fn force_thin(&mut self) -> String { - info!("Manual context thinning triggered"); + debug!("Manual context thinning triggered"); let (message, chars_saved) = self.context_window.thin_context(self.session_id.as_deref()); self.thinning_events.push(chars_saved); message @@ -2870,7 +2906,7 @@ impl Agent { /// Manually trigger context thinning for the ENTIRE context window /// Unlike force_thin which only processes the first third, this processes all messages pub fn force_thin_all(&mut self) -> String { - info!("Manual full context skinnifying triggered"); + debug!("Manual full context skinnifying triggered"); let (message, chars_saved) = self.context_window.thin_context_all(self.session_id.as_deref()); self.thinning_events.push(chars_saved); message @@ -2879,7 +2915,7 @@ impl Agent { /// Reload README.md and AGENTS.md and replace the first system message /// Returns Ok(true) if README was found and reloaded, Ok(false) if no README was present initially pub fn reload_readme(&mut self) -> Result { - info!("Manual README reload triggered"); + debug!("Manual README reload triggered"); // Check if the second message in conversation history is a system message with README content // (The first message should always be the system prompt) @@ -2922,7 +2958,7 @@ impl Agent { // Replace the second message (README) with the new content if let Some(first_msg) = self.context_window.conversation_history.get_mut(1) { first_msg.content = combined_content; - info!("README content reloaded successfully"); + debug!("README content reloaded successfully"); Ok(true) } else { Ok(false) @@ -3156,7 +3192,7 @@ impl Agent { error!("Failed to clear continuation artifacts: {}", e); } - info!("Session cleared"); + debug!("Session cleared"); } /// Restore session from a continuation artifact @@ -3201,7 +3237,7 @@ impl Agent { }); } - info!("Restored full context from session log"); + debug!("Restored full context from session log"); return Ok(true); } } @@ -3226,7 +3262,7 @@ impl Agent { }); } - info!("Restored session from summary"); + debug!("Restored session from summary"); Ok(false) } @@ -3836,7 +3872,7 @@ impl Agent { match provider.stream(request.clone()).await { Ok(stream) => { if attempt > 1 { - info!("Stream started successfully after {} attempts", attempt); + debug!("Stream started successfully after {} attempts", attempt); } debug!("Stream started successfully"); debug!( @@ -3886,9 +3922,9 @@ impl Agent { let mut response_started = false; let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts - const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 2; // Limit auto-summary retries + const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery) let mut final_output_called = false; // Track if final_output was called - let mut executed_tools_in_session: std::collections::HashSet = std::collections::HashSet::new(); // Track executed tools to prevent duplicates + // Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG) // Check if we need to summarize before starting if self.context_window.should_summarize() { @@ -4189,77 +4225,51 @@ impl Agent { }; // De-duplicate tool calls and track duplicates - let mut seen_in_chunk: Vec = Vec::new(); + let mut last_tool_in_chunk: Option = None; let mut deduplicated_tools: Vec<(ToolCall, Option)> = Vec::new(); for tool_call in tools_to_process { let mut duplicate_type = None; - // Check for duplicates in current chunk - if seen_in_chunk - .iter() - .any(|tc| are_duplicates(tc, &tool_call)) - { + // Check for IMMEDIATELY SEQUENTIAL duplicate in current chunk + // Only the immediately previous tool call counts as a duplicate + if let Some(ref last_tool) = last_tool_in_chunk { + if are_duplicates(last_tool, &tool_call) { duplicate_type = Some("DUP IN CHUNK".to_string()); + } } else { - // Check for duplicate against previous message in history - // Look at the last assistant message that contains tool calls + // Check for IMMEDIATELY SEQUENTIAL duplicate against previous message + // Only mark as duplicate if the LAST tool call in the previous message + // matches AND there's no significant text after it let mut found_in_prev = false; for msg in self.context_window.conversation_history.iter().rev() { if matches!(msg.role, MessageRole::Assistant) { - // Try to parse tool calls from the message content - if msg.content.contains(r#"\"tool\""#) { - // Simple JSON extraction for tool calls - let content = &msg.content; - let mut start_idx = 0; - while let Some(tool_start) = - content[start_idx..].find(r#"{\"tool\""#) - { - let tool_start = start_idx + tool_start; - // Find the end of this JSON object - let mut brace_count = 0; - let mut in_string = false; - let mut escape_next = false; - let mut end_idx = tool_start; - - for (i, ch) in content[tool_start..].char_indices() - { - if escape_next { - escape_next = false; - continue; - } - if ch == '\\' && in_string { - escape_next = true; - continue; - } - if ch == '"' && !escape_next { - in_string = !in_string; - } - if !in_string { - if ch == '{' { - brace_count += 1; - } else if ch == '}' { - brace_count -= 1; - if brace_count == 0 { - end_idx = tool_start + i + 1; - break; - } - } - } - } - - if end_idx > tool_start { - let tool_json = &content[tool_start..end_idx]; - if let Ok(prev_tool) = - serde_json::from_str::(tool_json) - { + // Find the LAST tool call in the message + let content = &msg.content; + + // Look for the last occurrence of a tool call pattern + if let Some(last_tool_start) = content.rfind(r#"{"tool""#) + .or_else(|| content.rfind(r#"{ "tool""#)) + { + // Find the end of this JSON object + if let Some(end_offset) = StreamingToolParser::find_complete_json_object_end(&content[last_tool_start..]) { + let end_idx = last_tool_start + end_offset + 1; + let tool_json = &content[last_tool_start..end_idx]; + + // Check if there's any non-whitespace text after this tool call + let text_after = content[end_idx..].trim(); + let has_text_after = !text_after.is_empty(); + + // Only consider it a duplicate if: + // 1. The tool call matches + // 2. There's no text after it (it was the last thing in the message) + if !has_text_after { + if let Ok(prev_tool) = serde_json::from_str::(tool_json) { if are_duplicates(&prev_tool, &tool_call) { found_in_prev = true; - break; } } } - start_idx = end_idx; } } // Only check the most recent assistant message @@ -4272,13 +4282,8 @@ impl Agent { } } - // Add to seen list if not a duplicate in chunk - if duplicate_type - .as_ref() - .map_or(true, |s| s != "DUP IN CHUNK") - { - seen_in_chunk.push(tool_call.clone()); - } + // Track the last tool call for sequential duplicate detection + last_tool_in_chunk = Some(tool_call.clone()); deduplicated_tools.push((tool_call, duplicate_type)); } @@ -4286,22 +4291,11 @@ impl Agent { // Process each tool call for (tool_call, duplicate_type) in deduplicated_tools { debug!("Processing completed tool call: {:?}", tool_call); + + // Mark that we detected a tool call - this prevents content from being printed + // even if the tool is skipped as a duplicate + tool_executed = true; - // Check if this tool was already executed in this session - let tool_key = format!("{}:{}", tool_call.tool, serde_json::to_string(&tool_call.args).unwrap_or_default()); - if executed_tools_in_session.contains(&tool_key) { - // Log the duplicate with red prefix - let prefixed_tool_name = format!("🟄 {} DUP IN SESSION", tool_call.tool); - let warning_msg = format!( - "āš ļø Duplicate tool call detected (already executed in session): Skipping {} with args {}", - tool_call.tool, - serde_json::to_string(&tool_call.args).unwrap_or_else(|_| "".to_string()) - ); - let mut modified_tool_call = tool_call.clone(); - modified_tool_call.tool = prefixed_tool_name; - debug!("{}", warning_msg); - continue; // Skip execution of duplicate - } // If it's a duplicate, log it and return a warning if let Some(dup_type) = &duplicate_type { @@ -4639,15 +4633,25 @@ impl Agent { tool_executed = true; any_tool_executed = true; // Track across all iterations - // Add to executed tools set to prevent re-execution in this session - executed_tools_in_session.insert(tool_key.clone()); + // Reset auto-continue attempts after successful tool execution + // This gives the LLM fresh attempts since it's making progress + auto_summary_attempts = 0; + // Reset the JSON tool call filter state after each tool execution // This ensures the filter doesn't stay in suppression mode for subsequent streaming content self.ui_writer.reset_json_filter(); - // Reset parser for next iteration - this clears the text buffer - parser.reset(); + // Only reset parser if there are no more unexecuted tool calls in the buffer + // This handles the case where the LLM emits multiple tool calls in one response + if parser.has_unexecuted_tool_call() { + debug!("Parser still has unexecuted tool calls, not resetting buffer"); + // Mark current tool as consumed so we don't re-detect it + parser.mark_tool_calls_consumed(); + } else { + // Reset parser for next iteration - this clears the text buffer + parser.reset(); + } // Clear current_response for next iteration to prevent buffered text // from being incorrectly displayed after tool execution @@ -4662,8 +4666,14 @@ impl Agent { } // End of for loop processing each tool call // If we processed any tools in multiple mode, break out to start new stream + // BUT only if there are no more unexecuted tool calls in the buffer if tool_executed && self.config.agent.allow_multiple_tool_calls { - break; + if parser.has_unexecuted_tool_call() { + debug!("Tool executed but parser still has unexecuted tool calls, continuing to process"); + // Don't break - continue processing to pick up remaining tool calls + } else { + break; + } } // If no tool calls were completed, continue streaming normally @@ -4753,7 +4763,7 @@ impl Agent { " - Text buffer content: {:?}", parser.get_text_content() ); - error!(" - Native tool calls: {:?}", parser.native_tool_calls); + error!(" - Has incomplete tool call: {}", parser.has_incomplete_tool_call()); error!(" - Message stopped: {}", parser.is_message_stopped()); error!(" - In JSON tool call: {}", parser.in_json_tool_call); error!(" - JSON tool start: {:?}", parser.json_tool_start); @@ -4831,6 +4841,17 @@ impl Agent { )); } + // If tools were executed in previous iterations but final_output wasn't called, + // break to let the outer loop's auto-continue logic handle it + if any_tool_executed && !final_output_called { + debug!("Tools were executed but final_output not called - breaking to auto-continue"); + // Add the text response to context before breaking + if has_text_response && !current_response.trim().is_empty() { + full_response = current_response.clone(); + } + break; + } + // Set full_response to current_response (don't append) // current_response already contains everything that was displayed // Don't set full_response here - it would duplicate the output @@ -4873,8 +4894,8 @@ impl Agent { ); error!("Error type: {}", std::any::type_name_of_val(&e)); - error!("Parser state at error: text_buffer_len={}, native_tool_calls={}, message_stopped={}", - parser.text_buffer_len(), parser.native_tool_calls.len(), parser.is_message_stopped()); + error!("Parser state at error: text_buffer_len={}, has_incomplete={}, message_stopped={}", + parser.text_buffer_len(), parser.has_incomplete_tool_call(), parser.is_message_stopped()); // Store the error for potential logging later _last_error = Some(error_details.clone()); @@ -4893,7 +4914,7 @@ impl Agent { // If we have any content or tool calls, treat this as a graceful end if chunks_received > 0 && (!parser.get_text_content().is_empty() - || parser.native_tool_calls.len() > 0) + || parser.has_unexecuted_tool_call()) { warn!("Stream terminated unexpectedly but we have content, continuing"); break; // Break to process what we have @@ -4941,18 +4962,77 @@ impl Agent { let has_response = !current_response.is_empty() || !full_response.is_empty(); + // Check if the response is essentially empty (just whitespace or timing lines) + // This detects cases where the LLM outputs nothing substantive + let response_text = if !current_response.is_empty() { + ¤t_response + } else { + &full_response + }; + let is_empty_response = response_text.trim().is_empty() + || response_text.lines().all(|line| line.trim().is_empty() || line.trim().starts_with("ā±ļø")); + + // Check if there's an incomplete tool call in the buffer + let has_incomplete_tool_call = parser.has_incomplete_tool_call(); + + // Check if there's a complete but unexecuted tool call in the buffer + let has_unexecuted_tool_call = parser.has_unexecuted_tool_call(); + + // Log when we detect unexecuted or incomplete tool calls for debugging + if has_incomplete_tool_call { + debug!("Detected incomplete tool call in buffer (buffer_len={}, consumed_up_to={})", + parser.text_buffer_len(), parser.text_buffer_len()); + } + if has_unexecuted_tool_call { + debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue"); + warn!("Unexecuted tool call detected in buffer after stream ended"); + } + // Auto-continue if tools were executed but final_output was never called - // This is the simple rule: LLM must call final_output before returning control - if any_tool_executed && !final_output_called { + // OR if the LLM emitted an incomplete tool call (truncated JSON) + // OR if the LLM emitted a complete tool call that wasn't executed + // This ensures we don't return control when the LLM clearly intended to call a tool + // Note: We removed the redundant condition (any_tool_executed && is_empty_response) + // because it's already covered by (any_tool_executed && !final_output_called) + let should_auto_continue = (any_tool_executed && !final_output_called) + || has_incomplete_tool_call + || has_unexecuted_tool_call; + if should_auto_continue { if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS { auto_summary_attempts += 1; - warn!( - "LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})", - iteration_count, auto_summary_attempts - ); - self.ui_writer.print_context_status( - "\nšŸ”„ Model stopped without calling final_output. Auto-continuing...\n" - ); + if has_incomplete_tool_call { + warn!( + "LLM emitted incomplete tool call ({} iterations, auto-continue attempt {}/{})", + iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS + ); + self.ui_writer.print_context_status( + "\nšŸ”„ Model emitted incomplete tool call. Auto-continuing...\n" + ); + } else if has_unexecuted_tool_call { + warn!( + "LLM emitted unexecuted tool call ({} iterations, auto-continue attempt {}/{})", + iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS + ); + self.ui_writer.print_context_status( + "\nšŸ”„ Model emitted tool call that wasn't executed. Auto-continuing...\n" + ); + } else if is_empty_response { + warn!( + "LLM emitted empty/trivial response ({} iterations, auto-continue attempt {}/{})", + iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS + ); + self.ui_writer.print_context_status( + "\nšŸ”„ Model emitted empty response. Auto-continuing...\n" + ); + } else { + warn!( + "LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {}/{})", + iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS + ); + self.ui_writer.print_context_status( + "\nšŸ”„ Model stopped without calling final_output. Auto-continuing...\n" + ); + } // Add any text response to context before prompting for continuation if has_response { @@ -4971,10 +5051,17 @@ impl Agent { } // Add a follow-up message asking for continuation - let continue_prompt = Message::new( - MessageRole::User, - "Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(), - ); + let continue_prompt = if has_incomplete_tool_call { + Message::new( + MessageRole::User, + "Your previous response was cut off mid-tool-call. Please complete the tool call and continue.".to_string(), + ) + } else { + Message::new( + MessageRole::User, + "Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(), + ) + }; self.context_window.add_message(continue_prompt); request.messages = self.context_window.conversation_history.clone(); @@ -4983,11 +5070,17 @@ impl Agent { } else { // Max attempts reached, give up gracefully warn!( - "Max auto-continue attempts ({}) reached, returning without final_output", - MAX_AUTO_SUMMARY_ATTEMPTS + "Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, final_output_called={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}", + MAX_AUTO_SUMMARY_ATTEMPTS, + iteration_count, + any_tool_executed, + final_output_called, + has_incomplete_tool_call, + has_unexecuted_tool_call, + is_empty_response ); self.ui_writer.print_agent_response( - "\nāš ļø The model stopped without calling final_output after multiple attempts.\n" + &format!("\nāš ļø The model stopped without calling final_output after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS) ); } } else if has_response { @@ -6434,7 +6527,7 @@ impl Agent { let driver = mutex.into_inner(); match driver.quit().await { Ok(_) => { - info!("WebDriver session closed successfully"); + debug!("WebDriver session closed successfully"); // Kill the safaridriver process if let Some(mut process) = @@ -6443,7 +6536,7 @@ impl Agent { if let Err(e) = process.kill().await { warn!("Failed to kill safaridriver process: {}", e); } else { - info!("Safaridriver process terminated"); + debug!("Safaridriver process terminated"); } } diff --git a/crates/g3-core/src/retry.rs b/crates/g3-core/src/retry.rs index 458d7e8..67f3369 100644 --- a/crates/g3-core/src/retry.rs +++ b/crates/g3-core/src/retry.rs @@ -10,7 +10,7 @@ use crate::ui_writer::UiWriter; use crate::{Agent, DiscoveryOptions, TaskResult}; use anyhow::Result; use std::time::Instant; -use tracing::{info, warn}; +use tracing::{debug, warn}; /// Configuration for retry behavior #[derive(Debug, Clone)] @@ -142,7 +142,7 @@ where match result { Ok(task_result) => { if retry_count > 0 { - info!( + debug!( "{} task succeeded after {} retries (elapsed: {:?})", config.role_name, retry_count, @@ -259,7 +259,7 @@ where match operation().await { Ok(result) => { if retry_count > 0 { - info!( + debug!( "Operation '{}' succeeded after {} retries", operation_name, retry_count ); diff --git a/crates/g3-core/src/session_continuation.rs b/crates/g3-core/src/session_continuation.rs index 2b25ca9..171fcbb 100644 --- a/crates/g3-core/src/session_continuation.rs +++ b/crates/g3-core/src/session_continuation.rs @@ -6,7 +6,7 @@ use anyhow::Result; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, warn}; /// Version of the session continuation format const CONTINUATION_VERSION: &str = "1.0"; @@ -89,7 +89,7 @@ pub fn save_continuation(continuation: &SessionContinuation) -> Result let json = serde_json::to_string_pretty(continuation)?; std::fs::write(&latest_path, &json)?; - info!("Saved session continuation to {:?}", latest_path); + debug!("Saved session continuation to {:?}", latest_path); Ok(latest_path) } @@ -113,7 +113,7 @@ pub fn load_continuation() -> Result> { ); } - info!("Loaded session continuation from {:?}", latest_path); + debug!("Loaded session continuation from {:?}", latest_path); Ok(Some(continuation)) } @@ -131,7 +131,7 @@ pub fn clear_continuation() -> Result<()> { debug!("Removed session file: {:?}", path); } } - info!("Cleared session continuation artifacts"); + debug!("Cleared session continuation artifacts"); } Ok(()) diff --git a/crates/g3-core/tests/auto_continue_test.rs b/crates/g3-core/tests/auto_continue_test.rs new file mode 100644 index 0000000..47caa9a --- /dev/null +++ b/crates/g3-core/tests/auto_continue_test.rs @@ -0,0 +1,234 @@ +//! Tests for the auto-continue detection features +//! +//! These tests verify the logic used to detect when the LLM should auto-continue: +//! 1. Empty/trivial responses (just timing lines) +//! 2. Incomplete tool calls +//! 3. Unexecuted tool calls +//! 4. Missing final_output after tool execution + +/// Helper function to check if a response is considered "empty" or trivial +/// This mirrors the logic in lib.rs for detecting empty responses +fn is_empty_response(response_text: &str) -> bool { + response_text.trim().is_empty() + || response_text.lines().all(|line| { + line.trim().is_empty() || line.trim().starts_with("ā±ļø") + }) +} + +#[test] +fn test_empty_response_detection_empty_string() { + assert!(is_empty_response("")); +} + +#[test] +fn test_empty_response_detection_whitespace_only() { + assert!(is_empty_response(" ")); + assert!(is_empty_response("\n\n\n")); + assert!(is_empty_response(" \n \t \n ")); +} + +#[test] +fn test_empty_response_detection_timing_line_only() { + assert!(is_empty_response("ā±ļø 43.0s | šŸ’­ 3.6s")); + assert!(is_empty_response(" ā±ļø 43.0s | šŸ’­ 3.6s ")); + assert!(is_empty_response("\nā±ļø 43.0s | šŸ’­ 3.6s\n")); +} + +#[test] +fn test_empty_response_detection_multiple_timing_lines() { + let response = "\nā±ļø 10.0s | šŸ’­ 1.0s\n\nā±ļø 20.0s | šŸ’­ 2.0s\n"; + assert!(is_empty_response(response)); +} + +#[test] +fn test_empty_response_detection_timing_with_empty_lines() { + let response = "\n\nā±ļø 43.0s | šŸ’­ 3.6s\n\n"; + assert!(is_empty_response(response)); +} + +#[test] +fn test_empty_response_detection_substantive_content() { + // These should NOT be considered empty + assert!(!is_empty_response("Hello, I will help you.")); + assert!(!is_empty_response("Let me read that file.")); + assert!(!is_empty_response("I've completed the task.")); +} + +#[test] +fn test_empty_response_detection_timing_with_text() { + // If there's any substantive text, it's not empty + let response = "ā±ļø 43.0s | šŸ’­ 3.6s\nHere is the result."; + assert!(!is_empty_response(response)); +} + +#[test] +fn test_empty_response_detection_text_before_timing() { + let response = "Done!\nā±ļø 43.0s | šŸ’­ 3.6s"; + assert!(!is_empty_response(response)); +} + +#[test] +fn test_empty_response_detection_json_tool_call() { + // A JSON tool call is definitely not empty + let response = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#; + assert!(!is_empty_response(response)); +} + +#[test] +fn test_empty_response_detection_partial_json() { + // Even partial JSON is not empty + let response = r#"{"tool": "read_file", "args": {"#; + assert!(!is_empty_response(response)); +} + +#[test] +fn test_empty_response_detection_markdown() { + // Markdown content is not empty + let response = "# Summary\n\nI completed the task."; + assert!(!is_empty_response(response)); +} + +#[test] +fn test_empty_response_detection_code_block() { + // Code blocks are not empty + let response = "```rust\nfn main() {}\n```"; + assert!(!is_empty_response(response)); +} + +// Test the MAX_AUTO_SUMMARY_ATTEMPTS constant value +// This is a compile-time check that the constant exists and has the expected value +#[test] +fn test_max_auto_summary_attempts_is_reasonable() { + // The constant should be at least 3 to give the LLM a fair chance to recover + // We can't directly access the constant from here, but we document the expected value + // Current value: 5 (increased from 2) + const EXPECTED_MIN_ATTEMPTS: usize = 3; + const EXPECTED_MAX_ATTEMPTS: usize = 10; + const CURRENT_VALUE: usize = 5; + + assert!(CURRENT_VALUE >= EXPECTED_MIN_ATTEMPTS, + "MAX_AUTO_SUMMARY_ATTEMPTS should be at least {} for reliable recovery", EXPECTED_MIN_ATTEMPTS); + assert!(CURRENT_VALUE <= EXPECTED_MAX_ATTEMPTS, + "MAX_AUTO_SUMMARY_ATTEMPTS should not exceed {} to avoid infinite loops", EXPECTED_MAX_ATTEMPTS); +} + +// ============================================================================= +// Test: Auto-continue condition logic +// ============================================================================= + +/// Simulates the should_auto_continue logic from lib.rs +fn should_auto_continue( + any_tool_executed: bool, + final_output_called: bool, + has_incomplete_tool_call: bool, + has_unexecuted_tool_call: bool, + is_empty_response: bool, +) -> bool { + (any_tool_executed && !final_output_called) + || has_incomplete_tool_call + || has_unexecuted_tool_call + || (any_tool_executed && is_empty_response) +} + +#[test] +fn test_auto_continue_after_tool_no_final_output() { + // Tool executed but no final_output - should continue + assert!(should_auto_continue( + true, // any_tool_executed + false, // final_output_called + false, // has_incomplete_tool_call + false, // has_unexecuted_tool_call + false, // is_empty_response + )); +} + +#[test] +fn test_auto_continue_with_final_output() { + // Tool executed AND final_output called - should NOT continue + assert!(!should_auto_continue( + true, // any_tool_executed + true, // final_output_called + false, // has_incomplete_tool_call + false, // has_unexecuted_tool_call + false, // is_empty_response + )); +} + +#[test] +fn test_auto_continue_incomplete_tool_call() { + // Incomplete tool call - should continue regardless of other flags + assert!(should_auto_continue( + false, // any_tool_executed + false, // final_output_called + true, // has_incomplete_tool_call + false, // has_unexecuted_tool_call + false, // is_empty_response + )); +} + +#[test] +fn test_auto_continue_unexecuted_tool_call() { + // Unexecuted tool call - should continue + assert!(should_auto_continue( + false, // any_tool_executed + false, // final_output_called + false, // has_incomplete_tool_call + true, // has_unexecuted_tool_call + false, // is_empty_response + )); +} + +#[test] +fn test_auto_continue_empty_response_after_tool() { + // Empty response after tool execution - should continue + assert!(should_auto_continue( + true, // any_tool_executed + false, // final_output_called + false, // has_incomplete_tool_call + false, // has_unexecuted_tool_call + true, // is_empty_response + )); +} + +#[test] +fn test_auto_continue_empty_response_no_tool() { + // Empty response but no tool executed - should NOT continue + // (This is a normal case where LLM just didn't respond) + assert!(!should_auto_continue( + false, // any_tool_executed + false, // final_output_called + false, // has_incomplete_tool_call + false, // has_unexecuted_tool_call + true, // is_empty_response + )); +} + +#[test] +fn test_auto_continue_no_conditions_met() { + // No tools, no incomplete calls, substantive response - should NOT continue + assert!(!should_auto_continue( + false, // any_tool_executed + false, // final_output_called + false, // has_incomplete_tool_call + false, // has_unexecuted_tool_call + false, // is_empty_response + )); +} + +// ============================================================================= +// Test: Redundant condition detection +// ============================================================================= + +#[test] +fn test_redundant_empty_response_condition() { + // This test documents that (any_tool_executed && is_empty_response) is redundant + // when (any_tool_executed && !final_output_called) is already true + + // Case: tool executed, no final_output, empty response + let result_with_empty = should_auto_continue(true, false, false, false, true); + let result_without_empty = should_auto_continue(true, false, false, false, false); + + // Both should be true because (any_tool_executed && !final_output_called) is true + assert_eq!(result_with_empty, result_without_empty, + "The is_empty_response condition is redundant when any_tool_executed && !final_output_called"); +} diff --git a/crates/g3-core/tests/duplicate_detection_test.rs b/crates/g3-core/tests/duplicate_detection_test.rs new file mode 100644 index 0000000..747a2b8 --- /dev/null +++ b/crates/g3-core/tests/duplicate_detection_test.rs @@ -0,0 +1,231 @@ +//! Tests for tool call duplicate detection +//! +//! These tests ensure that duplicate detection only catches IMMEDIATELY SEQUENTIAL +//! duplicates, not legitimate re-use of tools with text between them. + +use g3_core::StreamingToolParser; +use g3_providers::CompletionChunk; + +// Helper to create a chunk +fn chunk(content: &str, finished: bool) -> CompletionChunk { + CompletionChunk { + content: content.to_string(), + finished, + tool_calls: None, + usage: None, + } +} + +// ============================================================================= +// Test: find_complete_json_object_end helper function +// ============================================================================= + +#[test] +fn test_find_complete_json_object_end_simple() { + let json = r#"{"tool": "test", "args": {}}"#; + let end = StreamingToolParser::find_complete_json_object_end(json); + assert!(end.is_some(), "Should find end of complete JSON"); + assert_eq!(end.unwrap(), json.len() - 1, "End should be at last character"); +} + +#[test] +fn test_find_complete_json_object_end_nested() { + let json = r#"{"tool": "test", "args": {"nested": {"deep": true}}}"#; + let end = StreamingToolParser::find_complete_json_object_end(json); + assert!(end.is_some(), "Should find end of nested JSON"); + assert_eq!(end.unwrap(), json.len() - 1); +} + +#[test] +fn test_find_complete_json_object_end_with_trailing_text() { + let json = r#"{"tool": "test", "args": {}} some text after"#; + let end = StreamingToolParser::find_complete_json_object_end(json); + assert!(end.is_some(), "Should find end of JSON even with trailing text"); + // The end should be at the closing brace, not at the end of the string + let end_pos = end.unwrap(); + assert_eq!(&json[end_pos..end_pos+1], "}", "End should be at closing brace"); +} + +#[test] +fn test_find_complete_json_object_end_incomplete() { + let json = r#"{"tool": "test", "args": {"#; + let end = StreamingToolParser::find_complete_json_object_end(json); + assert!(end.is_none(), "Should return None for incomplete JSON"); +} + +// ============================================================================= +// Test: Tool calls separated by text should NOT be duplicates +// ============================================================================= + +#[test] +fn test_same_tool_with_text_between_not_duplicate() { + // This tests the scenario where the LLM calls the same tool twice + // but with explanatory text between them - this should NOT be a duplicate + let mut parser = StreamingToolParser::new(); + + // First tool call + let content1 = r#"{"tool": "todo_read", "args": {}}"#; + let tools1 = parser.process_chunk(&chunk(content1, true)); + assert_eq!(tools1.len(), 1, "First tool call should be detected"); + assert_eq!(tools1[0].tool, "todo_read"); + + // Reset parser (simulating what happens after tool execution) + parser.reset(); + + // Some text, then the same tool call again + let content2 = r#"Now let me check the TODO again to verify my changes. +{"tool": "todo_read", "args": {}}"#; + let tools2 = parser.process_chunk(&chunk(content2, true)); + + // The second tool call should be detected - it's NOT a duplicate + // because there's text before it + assert_eq!(tools2.len(), 1, "Second tool call should be detected (not a duplicate)"); + assert_eq!(tools2[0].tool, "todo_read"); +} + +#[test] +fn test_different_tools_back_to_back_not_duplicate() { + let mut parser = StreamingToolParser::new(); + + // Two different tool calls back to back + let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}} +{"tool": "shell", "args": {"command": "ls"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // Both should be detected - they're different tools + assert!(tools.len() >= 1, "Should detect tool calls"); + // At minimum, the first one should be detected + assert_eq!(tools[0].tool, "read_file"); +} + +#[test] +fn test_same_tool_different_args_not_duplicate() { + let mut parser = StreamingToolParser::new(); + + // Same tool but different arguments - NOT a duplicate + let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}} +{"tool": "read_file", "args": {"file_path": "b.txt"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // Both should be detected - different args means not a duplicate + assert!(tools.len() >= 1, "Should detect tool calls"); +} + +// ============================================================================= +// Test: Immediately sequential identical tool calls ARE duplicates +// ============================================================================= + +#[test] +fn test_identical_tool_calls_back_to_back_are_duplicates() { + // This tests the scenario where the LLM stutters and outputs + // the exact same tool call twice in a row - this IS a duplicate + let mut parser = StreamingToolParser::new(); + + // Two identical tool calls with no text between them + let content = r#"{"tool": "todo_read", "args": {}} +{"tool": "todo_read", "args": {}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // The parser should detect both, but the deduplication logic + // (which happens at a higher level in the agent) should mark + // the second one as a duplicate + // Here we just verify both are parsed + assert!(tools.len() >= 1, "Should detect at least one tool call"); +} + +// ============================================================================= +// Test: Text content detection for duplicate logic +// ============================================================================= + +#[test] +fn test_has_text_after_tool_call() { + // Helper test to verify we can detect text after a tool call + let content_with_text = r#"{"tool": "test", "args": {}} Some text after"#; + let content_without_text = r#"{"tool": "test", "args": {}}"#; + let content_with_whitespace_only = r#"{"tool": "test", "args": {}} + "#; + + // Find the end of the JSON in each case + let end1 = StreamingToolParser::find_complete_json_object_end(content_with_text).unwrap(); + let end2 = StreamingToolParser::find_complete_json_object_end(content_without_text).unwrap(); + let end3 = StreamingToolParser::find_complete_json_object_end(content_with_whitespace_only).unwrap(); + + // Check what's after the JSON + let after1 = content_with_text[end1 + 1..].trim(); + let after2 = content_without_text.get(end2 + 1..).unwrap_or("").trim(); + let after3 = content_with_whitespace_only[end3 + 1..].trim(); + + assert!(!after1.is_empty(), "Should have text after tool call"); + assert!(after2.is_empty(), "Should have no text after tool call"); + assert!(after3.is_empty(), "Whitespace-only should count as no text"); +} + +// ============================================================================= +// Test: Edge cases +// ============================================================================= + +#[test] +fn test_tool_call_with_newlines_between() { + let mut parser = StreamingToolParser::new(); + + // Tool calls separated by multiple newlines (but no actual text) + // This SHOULD be considered a duplicate since there's no meaningful text + let content = r#"{"tool": "todo_read", "args": {}} + + +{"tool": "todo_read", "args": {}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + assert!(tools.len() >= 1, "Should detect at least one tool call"); +} + +#[test] +fn test_tool_call_with_whitespace_text_between() { + let mut parser = StreamingToolParser::new(); + + // Tool calls separated by text that's just whitespace and punctuation + // The key is whether there's "meaningful" text + let content = r#"{"tool": "todo_read", "args": {}} +OK, now again: +{"tool": "todo_read", "args": {}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // Both should be detected since there's text between them + assert!(tools.len() >= 1, "Should detect tool calls"); +} + +#[test] +fn test_tool_call_in_middle_of_text() { + let mut parser = StreamingToolParser::new(); + + // Tool call surrounded by text + let content = r#"Let me read the file first. +{"tool": "read_file", "args": {"file_path": "test.txt"}} +Now I'll analyze the contents."#; + + let tools = parser.process_chunk(&chunk(content, true)); + assert_eq!(tools.len(), 1, "Should detect the tool call"); + assert_eq!(tools[0].tool, "read_file"); +} + +#[test] +fn test_multiple_different_tool_calls_with_text() { + let mut parser = StreamingToolParser::new(); + + // Multiple different tool calls with text between each + let content = r#"First, let me read the file: +{"tool": "read_file", "args": {"file_path": "test.txt"}} +Now let me check the TODO: +{"tool": "todo_read", "args": {}} +Finally, let me run a command: +{"tool": "shell", "args": {"command": "ls"}}"#; + + let tools = parser.process_chunk(&chunk(content, true)); + + // All three should be detected + assert!(tools.len() >= 1, "Should detect tool calls"); +} diff --git a/crates/g3-core/tests/incomplete_tool_call_test.rs b/crates/g3-core/tests/incomplete_tool_call_test.rs new file mode 100644 index 0000000..4366672 --- /dev/null +++ b/crates/g3-core/tests/incomplete_tool_call_test.rs @@ -0,0 +1,182 @@ +//! Tests for the incomplete tool call detection feature + +use g3_core::StreamingToolParser; +use g3_providers::CompletionChunk; + +#[test] +fn test_has_incomplete_tool_call_empty_buffer() { + let parser = StreamingToolParser::new(); + assert!(!parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_no_tool_pattern() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: "Hello, I will help you with that.".to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + assert!(!parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_complete_tool_call() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON should NOT be detected as incomplete + assert!(!parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_truncated_tool_call() { + let mut parser = StreamingToolParser::new(); + // Simulate truncated tool call - missing closing braces + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should be detected + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_truncated_mid_value() { + let mut parser = StreamingToolParser::new(); + // Simulate truncated tool call - cut off mid-value + let chunk = CompletionChunk { + content: r#"{"tool": "shell", "args": {"command": "cargo test --package g3-cli --test filter_json_test test_streaming -- --test-threads=1 2>&1 | tail"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should be detected + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_with_text_before() { + let mut parser = StreamingToolParser::new(); + // Text before the incomplete tool call + let chunk = CompletionChunk { + content: r#"Let me read that file for you. + +{"tool": "read_file", "args": {"file_path":"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should be detected + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_malformed_like_trace() { + let mut parser = StreamingToolParser::new(); + // This simulates a truncated tool call where the stream ended mid-JSON + // The actual trace showed truncated output, not malformed characters + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path":"src/engine.rkt""#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Truncated JSON (missing closing braces) should be detected as incomplete + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_empty_buffer() { + let parser = StreamingToolParser::new(); + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_no_tool_pattern() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: "Hello, I will help you with that.".to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_complete_tool_call() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON tool call that wasn't executed should be detected + assert!(parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_incomplete_json() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted) + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_with_trailing_text() { + let mut parser = StreamingToolParser::new(); + // Complete JSON tool call followed by trailing text + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}} + +Some trailing text after the JSON"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON tool call should be detected even with trailing text + assert!(parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_with_text_before_and_after() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"Let me read that file. + +{"tool": "shell", "args": {"command": "ls -la"}} + +I'll execute this command now."#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON tool call should be detected + assert!(parser.has_unexecuted_tool_call()); +} diff --git a/crates/g3-core/tests/streaming_parser_test.rs b/crates/g3-core/tests/streaming_parser_test.rs new file mode 100644 index 0000000..2d33777 --- /dev/null +++ b/crates/g3-core/tests/streaming_parser_test.rs @@ -0,0 +1,545 @@ +//! Comprehensive tests for StreamingToolParser +//! +//! Tests cover: +//! - Multiple tool calls in one response +//! - Tool call followed by text +//! - Incomplete tool calls at various truncation points +//! - Parser reset behavior +//! - Buffer management + +use g3_core::StreamingToolParser; +use g3_providers::CompletionChunk; + +// Helper to create a chunk +fn chunk(content: &str, finished: bool) -> CompletionChunk { + CompletionChunk { + content: content.to_string(), + finished, + tool_calls: None, + usage: None, + } +} + +// ============================================================================= +// Test: Multiple tool calls in one response +// ============================================================================= + +#[test] +fn test_multiple_tool_calls_in_single_chunk() { + let mut parser = StreamingToolParser::new(); + + // Two complete tool calls in one chunk + let content = r#"Let me do two things: +{"tool": "read_file", "args": {"file_path": "a.txt"}} +Now the second: +{"tool": "shell", "args": {"command": "ls"}}"#; + + let tools = parser.process_chunk(&chunk(content, false)); + + // Should detect at least one tool call + // Note: Current implementation may only return the first one found + assert!(!tools.is_empty(), "Should detect at least one tool call"); +} + +#[test] +fn test_multiple_tool_calls_across_chunks() { + let mut parser = StreamingToolParser::new(); + + // First tool call + let tools1 = parser.process_chunk(&chunk( + r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#, + false + )); + assert_eq!(tools1.len(), 1, "First tool call should be detected"); + assert_eq!(tools1[0].tool, "read_file"); + + // Reset parser (simulating what happens after tool execution) + parser.reset(); + + // Second tool call + let tools2 = parser.process_chunk(&chunk( + r#"{"tool": "shell", "args": {"command": "ls"}}"#, + false + )); + assert_eq!(tools2.len(), 1, "Second tool call should be detected"); + assert_eq!(tools2[0].tool, "shell"); +} + +#[test] +fn test_first_complete_second_incomplete() { + let mut parser = StreamingToolParser::new(); + + // First complete, second incomplete + let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}} +{"tool": "shell", "args": {"command": "ls"#; + + let tools = parser.process_chunk(&chunk(content, false)); + + // Should detect the first complete tool call + // The incomplete one should be detected by has_incomplete_tool_call + assert!(parser.has_incomplete_tool_call(), "Should detect incomplete tool call"); +} + +// ============================================================================= +// Test: Tool call followed by text +// ============================================================================= + +#[test] +fn test_tool_call_with_trailing_text() { + let mut parser = StreamingToolParser::new(); + + let content = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}} + +Here is the content of the file..."#; + + let tools = parser.process_chunk(&chunk(content, false)); + + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].tool, "read_file"); + + // The trailing text should be in the buffer + let text = parser.get_text_content(); + assert!(text.contains("Here is the content"), "Trailing text should be preserved"); +} + +#[test] +fn test_text_before_tool_call() { + let mut parser = StreamingToolParser::new(); + + let content = r#"Let me read that file for you. + +{"tool": "read_file", "args": {"file_path": "test.txt"}}"#; + + let tools = parser.process_chunk(&chunk(content, false)); + + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].tool, "read_file"); + + // The leading text should be in the buffer + let text = parser.get_text_content(); + assert!(text.contains("Let me read"), "Leading text should be preserved"); +} + +#[test] +fn test_text_before_and_after_tool_call() { + let mut parser = StreamingToolParser::new(); + + let content = r#"I'll check the file. + +{"tool": "read_file", "args": {"file_path": "test.txt"}} + +Done checking."#; + + let tools = parser.process_chunk(&chunk(content, false)); + + assert_eq!(tools.len(), 1); + + let text = parser.get_text_content(); + assert!(text.contains("I'll check"), "Leading text should be preserved"); + assert!(text.contains("Done checking"), "Trailing text should be preserved"); +} + +// ============================================================================= +// Test: Incomplete tool calls at various truncation points +// ============================================================================= + +#[test] +fn test_incomplete_after_tool_key() { + let mut parser = StreamingToolParser::new(); + parser.process_chunk(&chunk(r#"{"tool":"#, false)); + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_incomplete_after_tool_name() { + let mut parser = StreamingToolParser::new(); + parser.process_chunk(&chunk(r#"{"tool": "read_file""#, false)); + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_incomplete_after_args_key() { + let mut parser = StreamingToolParser::new(); + parser.process_chunk(&chunk(r#"{"tool": "read_file", "args":"#, false)); + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_incomplete_mid_args_object() { + let mut parser = StreamingToolParser::new(); + parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path":"#, false)); + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_incomplete_mid_string_value() { + let mut parser = StreamingToolParser::new(); + parser.process_chunk(&chunk(r#"{"tool": "shell", "args": {"command": "ls -la /very/long/path"#, false)); + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_incomplete_missing_final_brace() { + let mut parser = StreamingToolParser::new(); + parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}"#, false)); + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_complete_tool_call_not_incomplete() { + let mut parser = StreamingToolParser::new(); + parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false)); + assert!(!parser.has_incomplete_tool_call(), "Complete tool call should not be marked incomplete"); +} + +// ============================================================================= +// Test: Parser reset behavior +// ============================================================================= + +#[test] +fn test_reset_clears_buffer() { + let mut parser = StreamingToolParser::new(); + + parser.process_chunk(&chunk("Some content here", false)); + assert!(!parser.get_text_content().is_empty()); + + parser.reset(); + + assert!(parser.get_text_content().is_empty(), "Buffer should be empty after reset"); +} + +#[test] +fn test_reset_clears_incomplete_state() { + let mut parser = StreamingToolParser::new(); + + // Create incomplete tool call + parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"#, false)); + assert!(parser.has_incomplete_tool_call()); + + parser.reset(); + + assert!(!parser.has_incomplete_tool_call(), "Incomplete state should be cleared after reset"); +} + +#[test] +fn test_reset_clears_unexecuted_state() { + let mut parser = StreamingToolParser::new(); + + // Create complete but "unexecuted" tool call + parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false)); + assert!(parser.has_unexecuted_tool_call()); + + parser.reset(); + + assert!(!parser.has_unexecuted_tool_call(), "Unexecuted state should be cleared after reset"); +} + +#[test] +fn test_reset_allows_new_tool_calls() { + let mut parser = StreamingToolParser::new(); + + // First tool call + let tools1 = parser.process_chunk(&chunk( + r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#, + false + )); + assert_eq!(tools1.len(), 1); + + parser.reset(); + + // Second tool call after reset + let tools2 = parser.process_chunk(&chunk( + r#"{"tool": "shell", "args": {"command": "ls"}}"#, + false + )); + assert_eq!(tools2.len(), 1); + assert_eq!(tools2[0].tool, "shell"); +} + +// ============================================================================= +// Test: Buffer management and edge cases +// ============================================================================= + +#[test] +fn test_streaming_chunks_accumulate() { + let mut parser = StreamingToolParser::new(); + + // Stream in chunks + parser.process_chunk(&chunk(r#"{"tool": "#, false)); + parser.process_chunk(&chunk(r#""read_file", "#, false)); + parser.process_chunk(&chunk(r#""args": {"file_path": "#, false)); + parser.process_chunk(&chunk(r#""test.txt"}}"#, false)); + + // Should have accumulated the complete tool call + let text = parser.get_text_content(); + assert!(text.contains(r#""tool""#)); + assert!(text.contains(r#""read_file""#)); +} + +#[test] +fn test_finished_chunk_triggers_final_parse() { + let mut parser = StreamingToolParser::new(); + + // Incomplete chunks + parser.process_chunk(&chunk(r#"{"tool": "read_file", "#, false)); + let tools1 = parser.process_chunk(&chunk(r#""args": {"file_path": "test.txt"}}"#, false)); + + // Tool should be detected before finished + assert!(!tools1.is_empty() || !parser.has_unexecuted_tool_call(), + "Tool should be detected during streaming or marked as unexecuted"); +} + +#[test] +fn test_empty_chunks_ignored() { + let mut parser = StreamingToolParser::new(); + + parser.process_chunk(&chunk("", false)); + parser.process_chunk(&chunk("", false)); + + assert!(parser.get_text_content().is_empty()); + assert!(!parser.has_incomplete_tool_call()); + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_whitespace_only_chunks() { + let mut parser = StreamingToolParser::new(); + + parser.process_chunk(&chunk(" \n\t ", false)); + + assert!(!parser.has_incomplete_tool_call()); + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_json_with_escaped_quotes() { + let mut parser = StreamingToolParser::new(); + + let content = r#"{"tool": "shell", "args": {"command": "echo \"hello\""}}"#; + let tools = parser.process_chunk(&chunk(content, false)); + + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].tool, "shell"); +} + +#[test] +fn test_json_with_escaped_backslashes() { + let mut parser = StreamingToolParser::new(); + + let content = r#"{"tool": "write_file", "args": {"file_path": "C:\\Users\\test.txt", "content": "data"}}"#; + let tools = parser.process_chunk(&chunk(content, false)); + + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].tool, "write_file"); +} + +#[test] +fn test_json_with_nested_braces_in_string() { + let mut parser = StreamingToolParser::new(); + + let content = r#"{"tool": "write_file", "args": {"content": "{\"nested\": {\"json\": true}}"}}"#; + let tools = parser.process_chunk(&chunk(content, false)); + + assert_eq!(tools.len(), 1); + assert_eq!(tools[0].tool, "write_file"); +} + +#[test] +fn test_text_buffer_length_tracking() { + let mut parser = StreamingToolParser::new(); + + parser.process_chunk(&chunk("Hello", false)); + assert_eq!(parser.text_buffer_len(), 5); + + parser.process_chunk(&chunk(" World", false)); + assert_eq!(parser.text_buffer_len(), 11); + + parser.reset(); + assert_eq!(parser.text_buffer_len(), 0); +} + +#[test] +fn test_message_stopped_flag() { + let mut parser = StreamingToolParser::new(); + + parser.process_chunk(&chunk("Hello", false)); + assert!(!parser.is_message_stopped()); + + parser.process_chunk(&chunk(" World", true)); + assert!(parser.is_message_stopped()); + + parser.reset(); + assert!(!parser.is_message_stopped()); +} + +// ============================================================================= +// Test: Tool call pattern variations +// ============================================================================= + +#[test] +fn test_tool_pattern_no_spaces() { + let mut parser = StreamingToolParser::new(); + let tools = parser.process_chunk(&chunk( + r#"{"tool":"read_file","args":{"file_path":"test.txt"}}"#, + false + )); + assert_eq!(tools.len(), 1); +} + +// ============================================================================= +// Test: mark_tool_calls_consumed functionality +// ============================================================================= + +#[test] +fn test_mark_consumed_clears_unexecuted_state() { + let mut parser = StreamingToolParser::new(); + + // Add a complete tool call + parser.process_chunk(&chunk( + r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, + false + )); + + // Should be detected as unexecuted + assert!(parser.has_unexecuted_tool_call()); + + // Mark as consumed + parser.mark_tool_calls_consumed(); + + // Should no longer be detected as unexecuted + assert!(!parser.has_unexecuted_tool_call(), + "After marking consumed, has_unexecuted_tool_call should return false"); +} + +#[test] +fn test_mark_consumed_allows_new_tool_detection() { + let mut parser = StreamingToolParser::new(); + + // First tool call + parser.process_chunk(&chunk( + r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#, + false + )); + parser.mark_tool_calls_consumed(); + + // Second tool call (without reset) + parser.process_chunk(&chunk( + r#"{"tool": "shell", "args": {"command": "ls"}}"#, + false + )); + + // Should detect the new unexecuted tool call + assert!(parser.has_unexecuted_tool_call(), + "New tool call after consumed position should be detected"); +} + +#[test] +fn test_bare_brace_not_incomplete() { + let mut parser = StreamingToolParser::new(); + + // Just a bare opening brace - not a tool call pattern + parser.process_chunk(&chunk(r#"{""#, false)); + + // Should NOT be detected as incomplete because it doesn't match tool patterns + assert!(!parser.has_incomplete_tool_call(), + "Bare {{ should not be detected as incomplete tool call"); +} + +#[test] +fn test_duplicate_tool_call_pattern() { + let mut parser = StreamingToolParser::new(); + + // Simulate the problematic pattern: tool call, garbage, duplicate tool call + let content = concat!( + r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#, + "\n\n{\"\n\n", + r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"# + ); + let tools = parser.process_chunk(&chunk(content, false)); + + // Should detect at least one tool call + assert!(!tools.is_empty(), "Should detect at least one tool call"); + + // After processing, there should be an unexecuted tool call (the duplicate) + // because the parser only returns the first one it finds during streaming + assert!(parser.has_unexecuted_tool_call(), + "Should detect the duplicate as unexecuted"); +} + +#[test] +fn test_multiple_tool_calls_returned_on_finish() { + let mut parser = StreamingToolParser::new(); + + // Two complete tool calls in one chunk, with finished=true + let content = concat!( + r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#, + "\nSome text\n", + r#"{"tool": "shell", "args": {"command": "ls"}}"# + ); + + // First, add content without finishing + parser.process_chunk(&chunk(content, false)); + + // Now finish the stream - should return ALL tool calls + let tools = parser.process_chunk(&chunk("", true)); + + // Should return both tool calls + assert_eq!(tools.len(), 2, "Should return both tool calls when stream finishes"); + assert_eq!(tools[0].tool, "read_file"); + assert_eq!(tools[1].tool, "shell"); +} + +#[test] +fn test_tool_pattern_extra_spaces() { + let mut parser = StreamingToolParser::new(); + let tools = parser.process_chunk(&chunk( + r#"{ "tool" : "read_file" , "args" : { "file_path" : "test.txt" } }"#, + false + )); + assert_eq!(tools.len(), 1); +} + +#[test] +fn test_tool_pattern_with_newlines() { + let mut parser = StreamingToolParser::new(); + // Note: The parser looks for specific patterns like {"tool": or { "tool": + // Multi-line JSON with newlines between { and "tool" won't match + // This is expected behavior - the pattern matching is intentionally strict + let _tools = parser.process_chunk(&chunk( + r#"{ + "tool": "read_file", + "args": { + "file_path": "test.txt" + } +}"#, + false + )); + // This won't be detected as a tool call due to newline after { + // The has_unexecuted_tool_call check also won't find it + // This is a known limitation of the pattern-based detection +} + +// ============================================================================= +// Test: Edge cases for has_message_like_keys validation +// ============================================================================= + +#[test] +fn test_normal_args_accepted() { + let mut parser = StreamingToolParser::new(); + let tools = parser.process_chunk(&chunk( + r#"{"tool": "read_file", "args": {"file_path": "test.txt", "start": 0, "end": 100}}"#, + false + )); + assert_eq!(tools.len(), 1); +} + +#[test] +fn test_content_with_phrases_in_value_accepted() { + let mut parser = StreamingToolParser::new(); + // Phrases like "I'll" in VALUES should be fine (only keys are checked) + let tools = parser.process_chunk(&chunk( + r#"{"tool": "write_file", "args": {"file_path": "test.txt", "content": "I'll help you with that. Let me explain."}}"#, + false + )); + assert_eq!(tools.len(), 1); +} diff --git a/crates/g3-ensembles/src/flock.rs b/crates/g3-ensembles/src/flock.rs index 17eba1d..78802dd 100644 --- a/crates/g3-ensembles/src/flock.rs +++ b/crates/g3-ensembles/src/flock.rs @@ -7,7 +7,7 @@ use std::path::{Path, PathBuf}; use std::process::Stdio; use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::process::Command; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, warn}; use uuid::Uuid; use crate::status::{FlockStatus, SegmentState, SegmentStatus}; @@ -174,7 +174,7 @@ impl FlockMode { /// Run flock mode pub async fn run(&mut self) -> Result<()> { - info!( + debug!( "Starting flock mode with {} segments", self.config.num_segments ); @@ -625,7 +625,7 @@ async fn run_segment( status_file: PathBuf, session_id: String, ) -> Result { - info!( + debug!( "Starting segment {} in {}", segment_id, segment_dir.display() diff --git a/crates/g3-execution/src/lib.rs b/crates/g3-execution/src/lib.rs index 2629932..0b75d7a 100644 --- a/crates/g3-execution/src/lib.rs +++ b/crates/g3-execution/src/lib.rs @@ -3,7 +3,7 @@ use regex::Regex; use std::io::Write; use std::process::Command; use tempfile::NamedTempFile; -use tracing::{debug, error, info}; +use tracing::{debug, error}; /// Expand tilde (~) in a path to the user's home directory fn expand_tilde(path: &str) -> String { @@ -72,7 +72,7 @@ impl CodeExecutor { } for (language, code) in code_blocks { - info!("Executing {} code", language); + debug!("Executing {} code", language); if show_code { results.push(format!("šŸ“‹ Running {} code:", language)); @@ -459,7 +459,7 @@ pub fn is_cargo_llvm_cov_installed() -> Result { /// Install llvm-tools-preview via rustup pub fn install_llvm_tools() -> Result<()> { - info!("Installing llvm-tools-preview..."); + debug!("Installing llvm-tools-preview..."); let output = Command::new("rustup") .args(&["component", "add", "llvm-tools-preview"]) .output()?; @@ -469,13 +469,13 @@ pub fn install_llvm_tools() -> Result<()> { anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr); } - info!("āœ… llvm-tools-preview installed successfully"); + debug!("āœ… llvm-tools-preview installed successfully"); Ok(()) } /// Install cargo-llvm-cov via cargo install pub fn install_cargo_llvm_cov() -> Result<()> { - info!("Installing cargo-llvm-cov... (this may take a few minutes)"); + debug!("Installing cargo-llvm-cov... (this may take a few minutes)"); let output = Command::new("cargo") .args(&["install", "cargo-llvm-cov"]) .output()?; @@ -485,7 +485,7 @@ pub fn install_cargo_llvm_cov() -> Result<()> { anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr); } - info!("āœ… cargo-llvm-cov installed successfully"); + debug!("āœ… cargo-llvm-cov installed successfully"); Ok(()) } @@ -496,20 +496,20 @@ pub fn ensure_coverage_tools_installed() -> Result { // Check and install llvm-tools-preview if !is_llvm_tools_installed()? { - info!("llvm-tools-preview not found, installing..."); + debug!("llvm-tools-preview not found, installing..."); install_llvm_tools()?; already_installed = false; } else { - info!("āœ… llvm-tools-preview is already installed"); + debug!("āœ… llvm-tools-preview is already installed"); } // Check and install cargo-llvm-cov if !is_cargo_llvm_cov_installed()? { - info!("cargo-llvm-cov not found, installing..."); + debug!("cargo-llvm-cov not found, installing..."); install_cargo_llvm_cov()?; already_installed = false; } else { - info!("āœ… cargo-llvm-cov is already installed"); + debug!("āœ… cargo-llvm-cov is already installed"); } Ok(already_installed) diff --git a/crates/g3-providers/src/anthropic.rs b/crates/g3-providers/src/anthropic.rs index e84bfe1..2b26858 100644 --- a/crates/g3-providers/src/anthropic.rs +++ b/crates/g3-providers/src/anthropic.rs @@ -328,7 +328,7 @@ impl AnthropicProvider { tracing::debug!("create_request_body called: max_tokens={}, disable_thinking={}, thinking_budget_tokens={:?}", max_tokens, disable_thinking, self.thinking_budget_tokens); let thinking = if disable_thinking { - tracing::info!( + tracing::debug!( "Thinking mode explicitly disabled for this request (max_tokens={})", max_tokens ); diff --git a/crates/g3-providers/src/databricks.rs b/crates/g3-providers/src/databricks.rs index 18fe756..95fda57 100644 --- a/crates/g3-providers/src/databricks.rs +++ b/crates/g3-providers/src/databricks.rs @@ -64,7 +64,7 @@ use serde::{Deserialize, Serialize}; use std::time::Duration; use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; -use tracing::{debug, error, info, warn}; +use tracing::{debug, error, warn}; use crate::{ CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message, @@ -166,7 +166,7 @@ impl DatabricksProvider { .build() .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; - info!( + debug!( "Initialized Databricks provider with model: {} on host: {}", model, host ); @@ -196,7 +196,7 @@ impl DatabricksProvider { .build() .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; - info!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host); + debug!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host); Ok(Self { client, @@ -220,7 +220,7 @@ impl DatabricksProvider { .build() .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; - info!( + debug!( "Initialized Databricks provider with OAuth for model: {} on host: {}", model, host ); @@ -249,7 +249,7 @@ impl DatabricksProvider { .build() .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; - info!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host); + debug!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host); Ok(Self { client, @@ -857,7 +857,7 @@ impl LLMProvider for DatabricksProvider { if status == reqwest::StatusCode::FORBIDDEN && (error_text.contains("Invalid Token") || error_text.contains("invalid_token")) { - info!("Received 403 Invalid Token error, attempting to refresh OAuth token"); + debug!("Received 403 Invalid Token error, attempting to refresh OAuth token"); // Try to refresh the token if we're using OAuth if let DatabricksAuth::OAuth { .. } = &provider_clone.auth { @@ -867,7 +867,7 @@ impl LLMProvider for DatabricksProvider { // Try to get a new token (will attempt refresh or new OAuth flow) match provider_clone.auth.get_token().await { Ok(_new_token) => { - info!("Successfully refreshed OAuth token, retrying request"); + debug!("Successfully refreshed OAuth token, retrying request"); // Retry the request with the new token response = provider_clone @@ -1038,7 +1038,7 @@ impl LLMProvider for DatabricksProvider { if status == reqwest::StatusCode::FORBIDDEN && (error_text.contains("Invalid Token") || error_text.contains("invalid_token")) { - info!("Received 403 Invalid Token error, attempting to refresh OAuth token"); + debug!("Received 403 Invalid Token error, attempting to refresh OAuth token"); // Try to refresh the token if we're using OAuth if let DatabricksAuth::OAuth { .. } = &provider_clone.auth { @@ -1048,7 +1048,7 @@ impl LLMProvider for DatabricksProvider { // Try to get a new token (will attempt refresh or new OAuth flow) match provider_clone.auth.get_token().await { Ok(_new_token) => { - info!("Successfully refreshed OAuth token, retrying streaming request"); + debug!("Successfully refreshed OAuth token, retrying streaming request"); // Retry the request with the new token response = provider_clone diff --git a/crates/g3-providers/src/embedded.rs b/crates/g3-providers/src/embedded.rs index 3bf8e1b..999e489 100644 --- a/crates/g3-providers/src/embedded.rs +++ b/crates/g3-providers/src/embedded.rs @@ -12,7 +12,7 @@ use std::sync::Arc; use tokio::sync::mpsc; use tokio::sync::Mutex; use tokio_stream::wrappers::ReceiverStream; -use tracing::{debug, error, info}; +use tracing::{debug, error}; pub struct EmbeddedProvider { session: Arc>, @@ -32,7 +32,7 @@ impl EmbeddedProvider { gpu_layers: Option, threads: Option, ) -> Result { - info!("Loading embedded model from: {}", model_path); + debug!("Loading embedded model from: {}", model_path); // Expand tilde in path let expanded_path = shellexpand::tilde(&model_path); @@ -41,7 +41,7 @@ impl EmbeddedProvider { // If model doesn't exist and it's the default Qwen model, offer to download it if !model_path_buf.exists() { if model_path.contains("qwen2.5-7b-instruct-q3_k_m.gguf") { - info!("Model file not found. Attempting to download Qwen 2.5 7B model..."); + debug!("Model file not found. Attempting to download Qwen 2.5 7B model..."); Self::download_qwen_model(&model_path_buf)?; } else { anyhow::bail!("Model file not found: {}", model_path_buf.display()); @@ -55,14 +55,14 @@ impl EmbeddedProvider { if let Some(gpu_layers) = gpu_layers { params.n_gpu_layers = gpu_layers; - info!("Using {} GPU layers", gpu_layers); + debug!("Using {} GPU layers", gpu_layers); } let context_size = context_length.unwrap_or(4096); - info!("Using context length: {}", context_size); + debug!("Using context length: {}", context_size); // Load the model - info!("Loading model..."); + debug!("Loading model..."); let model = LlamaModel::load_from_file(model_path, params) .map_err(|e| anyhow::anyhow!("Failed to load model: {}", e))?; @@ -79,7 +79,7 @@ impl EmbeddedProvider { .create_session(session_params) .map_err(|e| anyhow::anyhow!("Failed to create session: {}", e))?; - info!("Successfully loaded {} model", model_type); + debug!("Successfully loaded {} model", model_type); Ok(Self { session: Arc::new(Mutex::new(session)), @@ -330,7 +330,7 @@ impl EmbeddedProvider { Ok(inner_result) => match inner_result { Ok(task_result) => match task_result { Ok((text, token_count)) => { - info!( + debug!( "Completed generation: {} tokens (dynamic limit was {})", token_count, dynamic_max_tokens ); @@ -448,9 +448,9 @@ impl EmbeddedProvider { fs::create_dir_all(parent)?; } - info!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)..."); - info!("This is a one-time download that may take several minutes depending on your connection."); - info!("Downloading to: {}", model_path.display()); + debug!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)..."); + debug!("This is a one-time download that may take several minutes depending on your connection."); + debug!("Downloading to: {}", model_path.display()); // Use curl with progress bar for download let output = Command::new("curl") @@ -497,7 +497,7 @@ impl EmbeddedProvider { ); } - info!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb); + debug!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb); Ok(()) } } diff --git a/crates/g3-providers/src/oauth.rs b/crates/g3-providers/src/oauth.rs index 75c9d50..893605a 100644 --- a/crates/g3-providers/src/oauth.rs +++ b/crates/g3-providers/src/oauth.rs @@ -392,7 +392,7 @@ pub async fn get_oauth_token_async( if let Err(e) = token_cache.save_token(&new_token) { tracing::warn!("Failed to save refreshed token: {}", e); } - tracing::info!("Successfully refreshed token"); + tracing::debug!("Successfully refreshed token"); return Ok(new_token.access_token); } Err(e) => { diff --git a/tmp/test_planner_ui.sh b/tmp/test_planner_ui.sh deleted file mode 100755 index 9cdf019..0000000 --- a/tmp/test_planner_ui.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -set -e - -# Clean logs first -rm -rf ~/RustroverProjects/g3/logs/*.log ~/RustroverProjects/g3/logs/*.txt 2>/dev/null || true - -# Create test requirements file -mkdir -p /tmp/g3-test-planning/g3-plan -cat > /tmp/g3-test-planning/g3-plan/new_requirements.md <<'EOF' -Simple test task: List all .rs files in the src directory. -EOF - -# Initialize git repo for test (planning mode requires git) -cd /tmp/g3-test-planning -if [ ! -d .git ]; then - git init - git config user.name "Test User" - git config user.email "test@example.com" - git add . - git commit -m "Initial commit" || true -fi - -echo "Test environment ready at /tmp/g3-test-planning" -echo "Run: cd /tmp && ~/RustroverProjects/g3/target/release/g3 --planning --codepath /tmp/g3-test-planning --no-git"