Merge branch 'dhanji/fix-auto-continue': Fix auto-continue and duplicate detection bugs

This commit is contained in:
Dhanji R. Prasanna
2025-12-22 17:12:24 +11:00
25 changed files with 1656 additions and 383 deletions

View File

@@ -267,7 +267,7 @@ use std::path::Path;
use std::path::PathBuf; use std::path::PathBuf;
use std::process::exit; use std::process::exit;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{error, info}; use tracing::{debug, error};
use g3_core::error_handling::{classify_error, ErrorType, RecoverableError}; use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
mod simple_output; mod simple_output;
@@ -2693,7 +2693,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?; extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?;
// Log the size of the feedback for debugging // Log the size of the feedback for debugging
info!( debug!(
"Coach feedback extracted: {} characters (from {} total)", "Coach feedback extracted: {} characters (from {} total)",
coach_feedback_text.len(), coach_feedback_text.len(),
coach_result.response.len() coach_result.response.len()

View File

@@ -68,6 +68,18 @@ fn main() {
dylib_dst.display() dylib_dst.display()
); );
// Re-sign the dylib with ad-hoc signature to fix code signing issues on Apple Silicon
// This is necessary because incremental compilation can invalidate signatures
let codesign_status = Command::new("codesign")
.args(&["-f", "-s", "-", dylib_dst.to_str().unwrap()])
.status();
if let Ok(status) = codesign_status {
if !status.success() {
println!("cargo:warning=Failed to codesign libVisionBridge.dylib (non-fatal)");
}
}
// Add rpath so the dylib can be found at runtime // Add rpath so the dylib can be found at runtime
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path"); println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path"); println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");

View File

@@ -24,7 +24,7 @@ impl MacOSController {
pub fn new() -> Result<Self> { pub fn new() -> Result<Self> {
let ocr = Box::new(DefaultOCR::new()?); let ocr = Box::new(DefaultOCR::new()?);
let ocr_name = ocr.name().to_string(); let ocr_name = ocr.name().to_string();
tracing::info!("Initialized macOS controller with OCR engine: {}", ocr_name); tracing::debug!("Initialized macOS controller with OCR engine: {}", ocr_name);
Ok(Self { Ok(Self {
ocr_engine: ocr, ocr_engine: ocr,
ocr_name, ocr_name,
@@ -155,7 +155,7 @@ impl ComputerController for MacOSController {
// 1. At layer 0 (normal windows, not menu bar) // 1. At layer 0 (normal windows, not menu bar)
// 2. Have real bounds (width and height >= 100) // 2. Have real bounds (width and height >= 100)
if layer == 0 && has_real_bounds { if layer == 0 && has_real_bounds {
tracing::info!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer); tracing::debug!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer);
found_window_id = Some((id as u32, owner.clone())); found_window_id = Some((id as u32, owner.clone()));
break; break;
} else { } else {
@@ -178,7 +178,7 @@ impl ComputerController for MacOSController {
let (cg_window_id, matched_owner) = cg_window_id.ok_or_else(|| { let (cg_window_id, matched_owner) = cg_window_id.ok_or_else(|| {
anyhow::anyhow!("Could not find window for application '{}'. Use list_windows to see available windows.", app_name) anyhow::anyhow!("Could not find window for application '{}'. Use list_windows to see available windows.", app_name)
})?; })?;
tracing::info!( tracing::debug!(
"Taking screenshot of window ID {} for app '{}'", "Taking screenshot of window ID {} for app '{}'",
cg_window_id, cg_window_id,
matched_owner matched_owner
@@ -468,7 +468,7 @@ impl MacOSController {
// Only accept windows with real bounds (>= 100x100 pixels) // Only accept windows with real bounds (>= 100x100 pixels)
if w >= 100 && h >= 100 { if w >= 100 && h >= 100 {
tracing::info!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer); tracing::debug!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
return Ok((x, y, w, h)); return Ok((x, y, w, h));
} else { } else {
tracing::debug!( tracing::debug!(

View File

@@ -3,7 +3,7 @@ use crate::process::ProcessController;
use axum::{extract::State, http::StatusCode, Json}; use axum::{extract::State, http::StatusCode, Json};
use std::sync::Arc; use std::sync::Arc;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use tracing::{error, info}; use tracing::{debug, error};
pub type ControllerState = Arc<Mutex<ProcessController>>; pub type ControllerState = Arc<Mutex<ProcessController>>;
@@ -22,7 +22,7 @@ pub async fn kill_instance(
match controller.kill_process(pid) { match controller.kill_process(pid) {
Ok(_) => { Ok(_) => {
info!("Successfully killed process {}", pid); debug!("Successfully killed process {}", pid);
Ok(Json(serde_json::json!({ Ok(Json(serde_json::json!({
"status": "terminating" "status": "terminating"
}))) })))
@@ -38,7 +38,7 @@ pub async fn restart_instance(
State(controller): State<ControllerState>, State(controller): State<ControllerState>,
axum::extract::Path(id): axum::extract::Path<String>, axum::extract::Path(id): axum::extract::Path<String>,
) -> Result<Json<LaunchResponse>, StatusCode> { ) -> Result<Json<LaunchResponse>, StatusCode> {
info!("Restarting instance: {}", id); debug!("Restarting instance: {}", id);
// Extract PID from instance ID (format: pid_timestamp) // Extract PID from instance ID (format: pid_timestamp)
let pid: u32 = id let pid: u32 = id
@@ -81,7 +81,7 @@ pub async fn launch_instance(
State(controller): State<ControllerState>, State(controller): State<ControllerState>,
Json(request): Json<LaunchRequest>, Json(request): Json<LaunchRequest>,
) -> Result<Json<LaunchResponse>, (StatusCode, Json<serde_json::Value>)> { ) -> Result<Json<LaunchResponse>, (StatusCode, Json<serde_json::Value>)> {
info!("Launching new g3 instance: {:?}", request); debug!("Launching new g3 instance: {:?}", request);
// Validate binary path if provided // Validate binary path if provided
if let Some(ref binary_path) = request.g3_binary_path { if let Some(ref binary_path) = request.g3_binary_path {
@@ -149,7 +149,7 @@ pub async fn launch_instance(
) { ) {
Ok(pid) => { Ok(pid) => {
let id = format!("{}_{}", pid, chrono::Utc::now().timestamp()); let id = format!("{}_{}", pid, chrono::Utc::now().timestamp());
info!("Successfully launched g3 instance with PID {}", pid); debug!("Successfully launched g3 instance with PID {}", pid);
Ok(Json(LaunchResponse { Ok(Json(LaunchResponse {
id, id,
status: "starting".to_string(), status: "starting".to_string(),

View File

@@ -3,7 +3,7 @@ use axum::{http::StatusCode, Json};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::os::unix::fs::PermissionsExt; use std::os::unix::fs::PermissionsExt;
use std::path::PathBuf; use std::path::PathBuf;
use tracing::{error, info}; use tracing::{debug, error};
pub async fn get_state() -> Result<Json<ConsoleState>, StatusCode> { pub async fn get_state() -> Result<Json<ConsoleState>, StatusCode> {
let state = ConsoleState::load(); let state = ConsoleState::load();
@@ -15,7 +15,7 @@ pub async fn save_state(
) -> Result<Json<serde_json::Value>, StatusCode> { ) -> Result<Json<serde_json::Value>, StatusCode> {
match state.save() { match state.save() {
Ok(_) => { Ok(_) => {
info!("Console state saved successfully"); debug!("Console state saved successfully");
Ok(Json(serde_json::json!({ Ok(Json(serde_json::json!({
"status": "saved" "status": "saved"
}))) })))

View File

@@ -1,7 +1,7 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::PathBuf;
use tracing::info; use tracing::debug;
#[derive(Debug, Clone, Serialize, Deserialize)] #[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ConsoleState { pub struct ConsoleState {
@@ -42,7 +42,7 @@ impl ConsoleState {
pub fn save(&self) -> anyhow::Result<()> { pub fn save(&self) -> anyhow::Result<()> {
let config_path = Self::config_path(); let config_path = Self::config_path();
info!("Saving console state to: {:?}", config_path); debug!("Saving console state to: {:?}", config_path);
// Create parent directory if it doesn't exist // Create parent directory if it doesn't exist
if let Some(parent) = config_path.parent() { if let Some(parent) = config_path.parent() {
@@ -51,7 +51,7 @@ impl ConsoleState {
let content = serde_json::to_string_pretty(self)?; let content = serde_json::to_string_pretty(self)?;
fs::write(&config_path, content)?; fs::write(&config_path, content)?;
info!("Console state saved successfully to: {:?}", config_path); debug!("Console state saved successfully to: {:?}", config_path);
Ok(()) Ok(())
} }

View File

@@ -16,7 +16,7 @@ use std::sync::Arc;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use tower_http::cors::CorsLayer; use tower_http::cors::CorsLayer;
use tower_http::services::ServeDir; use tower_http::services::ServeDir;
use tracing::{info, Level}; use tracing::{debug, Level};
use tracing_subscriber; use tracing_subscriber;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
@@ -84,12 +84,12 @@ async fn main() -> anyhow::Result<()> {
.layer(CorsLayer::permissive()); .layer(CorsLayer::permissive());
let addr = format!("{}:{}", args.host, args.port); let addr = format!("{}:{}", args.host, args.port);
info!("Starting g3-console on http://{}", addr); debug!("Starting g3-console on http://{}", addr);
// Auto-open browser if requested // Auto-open browser if requested
if args.open { if args.open {
let url = format!("http://{}", addr); let url = format!("http://{}", addr);
info!("Opening browser to {}", url); debug!("Opening browser to {}", url);
let _ = open::that(&url); let _ = open::that(&url);
} }

View File

@@ -6,7 +6,7 @@ use std::path::PathBuf;
use std::process::{Command, Stdio}; use std::process::{Command, Stdio};
use std::sync::Mutex; use std::sync::Mutex;
use sysinfo::{Pid, Process, Signal, System}; use sysinfo::{Pid, Process, Signal, System};
use tracing::{debug, info}; use tracing::debug;
pub struct ProcessController { pub struct ProcessController {
system: System, system: System,
@@ -26,7 +26,7 @@ impl ProcessController {
self.system.refresh_processes(); self.system.refresh_processes();
if let Some(process) = self.system.process(sysinfo_pid) { if let Some(process) = self.system.process(sysinfo_pid) {
info!("Killing process {} ({})", pid, process.name()); debug!("Killing process {} ({})", pid, process.name());
// Try SIGTERM first // Try SIGTERM first
if process.kill_with(Signal::Term).is_some() { if process.kill_with(Signal::Term).is_some() {
@@ -107,7 +107,7 @@ impl ProcessController {
}); });
} }
info!("Launching g3: {:?}", cmd); debug!("Launching g3: {:?}", cmd);
// Spawn and wait for the intermediate process to exit // Spawn and wait for the intermediate process to exit
let mut child = cmd.spawn().context("Failed to spawn g3 process")?; let mut child = cmd.spawn().context("Failed to spawn g3 process")?;
@@ -120,7 +120,7 @@ impl ProcessController {
// The actual g3 process is now running as orphan // The actual g3 process is now running as orphan
// We need to scan for it by matching workspace and recent start time // We need to scan for it by matching workspace and recent start time
info!( debug!(
"Scanning for newly launched g3 process in workspace: {}", "Scanning for newly launched g3 process in workspace: {}",
workspace workspace
); );
@@ -171,7 +171,7 @@ impl ProcessController {
found found
} else { } else {
// If we couldn't find it, try one more refresh after a longer delay // If we couldn't find it, try one more refresh after a longer delay
info!("Process not found on first scan, trying again..."); debug!("Process not found on first scan, trying again...");
std::thread::sleep(std::time::Duration::from_millis(2000)); std::thread::sleep(std::time::Duration::from_millis(2000));
self.system.refresh_processes(); self.system.refresh_processes();
@@ -204,7 +204,7 @@ impl ProcessController {
retry_found.unwrap_or(intermediate_pid) retry_found.unwrap_or(intermediate_pid)
}; };
info!("Launched g3 process with PID {}", pid); debug!("Launched g3 process with PID {}", pid);
// Store launch params for restart // Store launch params for restart
let params = LaunchParams { let params = LaunchParams {

View File

@@ -3,7 +3,7 @@ use anyhow::Result;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use std::path::PathBuf; use std::path::PathBuf;
use sysinfo::{Pid, Process, System}; use sysinfo::{Pid, Process, System};
use tracing::{debug, info, warn}; use tracing::{debug, warn};
pub struct ProcessDetector { pub struct ProcessDetector {
system: System, system: System,
@@ -17,7 +17,7 @@ impl ProcessDetector {
} }
pub fn detect_instances(&mut self) -> Result<Vec<Instance>> { pub fn detect_instances(&mut self) -> Result<Vec<Instance>> {
info!("Scanning for g3 processes..."); debug!("Scanning for g3 processes...");
// Refresh all processes to ensure we catch newly started ones // Refresh all processes to ensure we catch newly started ones
// Using refresh_all() instead of just refresh_processes() to ensure // Using refresh_all() instead of just refresh_processes() to ensure
// we get complete information about new processes // we get complete information about new processes
@@ -37,7 +37,7 @@ impl ProcessDetector {
} }
} }
info!("Detected {} g3 instances", instances.len()); debug!("Detected {} g3 instances", instances.len());
Ok(instances) Ok(instances)
} }

View File

@@ -9,7 +9,7 @@
use anyhow::Result; use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::time::Duration; use std::time::Duration;
use tracing::{error, info, warn}; use tracing::{debug, error, warn};
/// Base delay for exponential backoff (in milliseconds) /// Base delay for exponential backoff (in milliseconds)
const BASE_RETRY_DELAY_MS: u64 = 1000; const BASE_RETRY_DELAY_MS: u64 = 1000;
@@ -149,7 +149,7 @@ impl ErrorContext {
if let Err(e) = std::fs::write(&filename, json_content) { if let Err(e) = std::fs::write(&filename, json_content) {
error!("Failed to save error context to {:?}: {}", &filename, e); error!("Failed to save error context to {:?}: {}", &filename, e);
} else { } else {
info!("Error details saved to: {:?}", &filename); debug!("Error details saved to: {:?}", &filename);
} }
} }
Err(e) => { Err(e) => {
@@ -328,7 +328,7 @@ where
match operation().await { match operation().await {
Ok(result) => { Ok(result) => {
if attempt > 1 { if attempt > 1 {
info!( debug!(
"Operation '{}' succeeded after {} attempts", "Operation '{}' succeeded after {} attempts",
operation_name, attempt operation_name, attempt
); );
@@ -357,7 +357,7 @@ where
// Special handling for token limit errors // Special handling for token limit errors
if matches!(recoverable_type, RecoverableError::TokenLimit) { if matches!(recoverable_type, RecoverableError::TokenLimit) {
info!("Token limit error detected. Consider triggering summarization."); debug!("Token limit error detected. Consider triggering summarization.");
} }
tokio::time::sleep(delay).await; tokio::time::sleep(delay).await;

View File

@@ -12,7 +12,7 @@ use crate::{logs_dir, Agent, TaskResult};
use crate::ui_writer::UiWriter; use crate::ui_writer::UiWriter;
use serde_json::Value; use serde_json::Value;
use std::path::PathBuf; use std::path::PathBuf;
use tracing::{debug, info, warn}; use tracing::{debug, warn};
/// Result of feedback extraction with source information /// Result of feedback extraction with source information
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -103,21 +103,21 @@ where
// Try session log first (most reliable) // Try session log first (most reliable)
if let Some(session_id) = agent.get_session_id() { if let Some(session_id) = agent.get_session_id() {
if let Some(feedback) = try_extract_from_session_log(&session_id, config) { if let Some(feedback) = try_extract_from_session_log(&session_id, config) {
info!("Extracted coach feedback from session log: {} chars", feedback.len()); debug!("Extracted coach feedback from session log: {} chars", feedback.len());
return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog); return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog);
} }
} }
// Try native tool call JSON parsing // Try native tool call JSON parsing
if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) { if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) {
info!("Extracted coach feedback from native tool call: {} chars", feedback.len()); debug!("Extracted coach feedback from native tool call: {} chars", feedback.len());
return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall); return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall);
} }
// Try conversation history // Try conversation history
if let Some(session_id) = agent.get_session_id() { if let Some(session_id) = agent.get_session_id() {
if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) { if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) {
info!("Extracted coach feedback from conversation history: {} chars", feedback.len()); debug!("Extracted coach feedback from conversation history: {} chars", feedback.len());
return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory); return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory);
} }
} }
@@ -125,7 +125,7 @@ where
// Try TaskResult parsing // Try TaskResult parsing
let extracted = coach_result.extract_final_output(); let extracted = coach_result.extract_final_output();
if !extracted.is_empty() { if !extracted.is_empty() {
info!("Extracted coach feedback from task result: {} chars", extracted.len()); debug!("Extracted coach feedback from task result: {} chars", extracted.len());
return ExtractedFeedback::new(extracted, FeedbackSource::TaskResultResponse); return ExtractedFeedback::new(extracted, FeedbackSource::TaskResultResponse);
} }

View File

@@ -39,7 +39,7 @@ use serde_json::json;
use std::io::Write; use std::io::Write;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, warn};
/// Get the path to the todo.g3.md file. /// Get the path to the todo.g3.md file.
/// ///
@@ -246,13 +246,23 @@ pub enum StreamState {
Resuming, Resuming,
} }
/// Patterns used to detect JSON tool calls in text
/// These cover common whitespace variations in JSON formatting
const TOOL_CALL_PATTERNS: [&str; 4] = [
r#"{"tool":"#,
r#"{ "tool":"#,
r#"{"tool" :"#,
r#"{ "tool" :"#,
];
/// Modern streaming tool parser that properly handles native tool calls and SSE chunks /// Modern streaming tool parser that properly handles native tool calls and SSE chunks
#[derive(Debug)] #[derive(Debug)]
pub struct StreamingToolParser { pub struct StreamingToolParser {
/// Buffer for accumulating text content /// Buffer for accumulating text content
text_buffer: String, text_buffer: String,
/// Buffer for accumulating native tool calls /// Position in text_buffer up to which tool calls have been consumed/executed
native_tool_calls: Vec<g3_providers::ToolCall>, /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools
last_consumed_position: usize,
/// Whether we've received a message_stop event /// Whether we've received a message_stop event
message_stopped: bool, message_stopped: bool,
/// Whether we're currently in a JSON tool call (for fallback parsing) /// Whether we're currently in a JSON tool call (for fallback parsing)
@@ -271,13 +281,58 @@ impl StreamingToolParser {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
text_buffer: String::new(), text_buffer: String::new(),
native_tool_calls: Vec::new(), last_consumed_position: 0,
message_stopped: false, message_stopped: false,
in_json_tool_call: false, in_json_tool_call: false,
json_tool_start: None, json_tool_start: None,
} }
} }
/// Find the starting position of the last tool call pattern in the given text
/// Returns None if no tool call pattern is found
fn find_last_tool_call_start(text: &str) -> Option<usize> {
let mut best_start: Option<usize> = None;
for pattern in &TOOL_CALL_PATTERNS {
if let Some(pos) = text.rfind(pattern) {
if best_start.map_or(true, |best| pos > best) {
best_start = Some(pos);
}
}
}
best_start
}
/// Find the starting position of the FIRST tool call pattern in the given text
/// Returns None if no tool call pattern is found
fn find_first_tool_call_start(text: &str) -> Option<usize> {
let mut best_start: Option<usize> = None;
for pattern in &TOOL_CALL_PATTERNS {
if let Some(pos) = text.find(pattern) {
if best_start.map_or(true, |best| pos < best) {
best_start = Some(pos);
}
}
}
best_start
}
/// Validate that tool call args don't contain message-like content
/// This detects malformed tool calls where agent messages got mixed into args
fn has_message_like_keys(args: &serde_json::Map<String, serde_json::Value>) -> bool {
args.keys().any(|key| {
key.len() > 100
|| key.contains('\n')
|| key.contains("I'll")
|| key.contains("Let me")
|| key.contains("Here's")
|| key.contains("I can")
|| key.contains("I need")
|| key.contains("First")
|| key.contains("Now")
|| key.contains("The ")
})
}
/// Process a streaming chunk and return completed tool calls if any /// Process a streaming chunk and return completed tool calls if any
pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec<ToolCall> { pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec<ToolCall> {
let mut completed_tools = Vec::new(); let mut completed_tools = Vec::new();
@@ -308,10 +363,12 @@ impl StreamingToolParser {
self.message_stopped = true; self.message_stopped = true;
debug!("Message finished, processing accumulated tool calls"); debug!("Message finished, processing accumulated tool calls");
// When stream finishes, do a final check for JSON tool calls in the accumulated buffer // When stream finishes, find ALL JSON tool calls in the accumulated buffer
if completed_tools.is_empty() && !self.text_buffer.is_empty() { if completed_tools.is_empty() && !self.text_buffer.is_empty() {
if let Some(json_tool) = self.try_parse_json_tool_call_from_buffer() { let all_tools = self.try_parse_all_json_tool_calls_from_buffer();
completed_tools.push(json_tool); if !all_tools.is_empty() {
debug!("Found {} JSON tool calls in buffer at stream end", all_tools.len());
completed_tools.extend(all_tools);
} }
} }
} }
@@ -328,26 +385,12 @@ impl StreamingToolParser {
/// Fallback method to parse JSON tool calls from text content /// Fallback method to parse JSON tool calls from text content
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> { fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
// Look for JSON tool call patterns
let patterns = [
r#"{"tool":"#,
r#"{ "tool":"#,
r#"{"tool" :"#,
r#"{ "tool" :"#,
];
// If we're not currently in a JSON tool call, look for the start // If we're not currently in a JSON tool call, look for the start
if !self.in_json_tool_call { if !self.in_json_tool_call {
for pattern in &patterns { if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) {
if let Some(pos) = self.text_buffer.rfind(pattern) { debug!("Found JSON tool call pattern at position {}", pos);
debug!( self.in_json_tool_call = true;
"Found JSON tool call pattern '{}' at position {}", self.json_tool_start = Some(pos);
pattern, pos
);
self.in_json_tool_call = true;
self.json_tool_start = Some(pos);
break;
}
} }
} }
@@ -356,83 +399,34 @@ impl StreamingToolParser {
if let Some(start_pos) = self.json_tool_start { if let Some(start_pos) = self.json_tool_start {
let json_text = &self.text_buffer[start_pos..]; let json_text = &self.text_buffer[start_pos..];
// Try to find a complete JSON object // Try to find a complete JSON object using the shared helper
let mut brace_count = 0; if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
let mut in_string = false; let json_str = &json_text[..=end_pos];
let mut escape_next = false; debug!("Attempting to parse JSON tool call: {}", json_str);
for (i, ch) in json_text.char_indices() { // Try to parse as a ToolCall
if escape_next { if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
escape_next = false; // Validate that args is an object with reasonable keys
continue; if let Some(args_obj) = tool_call.args.as_object() {
} if Self::has_message_like_keys(args_obj) {
debug!("Detected malformed tool call with message-like keys, skipping");
match ch { self.in_json_tool_call = false;
'\\' => escape_next = true, self.json_tool_start = None;
'"' if !escape_next => in_string = !in_string, return None;
'{' if !in_string => brace_count += 1,
'}' if !in_string => {
brace_count -= 1;
if brace_count == 0 {
// Found complete JSON object
let json_str = &json_text[..=i];
debug!("Attempting to parse JSON tool call: {}", json_str);
// First try to parse as a ToolCall
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
// Validate that this is actually a proper tool call
// The args should be a JSON object with reasonable keys
if let Some(args_obj) = tool_call.args.as_object() {
// Check if any key looks like it contains agent message content
// This would indicate a malformed tool call where the message
// got mixed into the args
let has_message_like_key = args_obj.keys().any(|key| {
key.len() > 100
|| key.contains('\n')
|| key.contains("I'll")
|| key.contains("Let me")
|| key.contains("Here's")
|| key.contains("I can")
|| key.contains("I need")
|| key.contains("First")
|| key.contains("Now")
|| key.contains("The ")
});
if has_message_like_key {
debug!("Detected malformed tool call with message-like keys, skipping");
// This looks like a malformed tool call, skip it
self.in_json_tool_call = false;
self.json_tool_start = None;
break;
}
// Also check if the values look reasonable
// Tool arguments should typically be file paths, commands, or content
// Not entire agent messages
debug!(
"Successfully parsed valid JSON tool call: {:?}",
tool_call
);
// Reset JSON parsing state
self.in_json_tool_call = false;
self.json_tool_start = None;
return Some(tool_call);
}
// If args is not an object, skip this as invalid
debug!("Tool call args is not an object, skipping");
} else {
debug!("Failed to parse JSON tool call: {}", json_str);
// Reset and continue looking
self.in_json_tool_call = false;
self.json_tool_start = None;
}
break;
} }
debug!("Successfully parsed valid JSON tool call: {:?}", tool_call);
self.in_json_tool_call = false;
self.json_tool_start = None;
return Some(tool_call);
} }
_ => {} debug!("Tool call args is not an object, skipping");
} else {
debug!("Failed to parse JSON tool call: {}", json_str);
} }
// Reset and continue looking
self.in_json_tool_call = false;
self.json_tool_start = None;
} }
} }
} }
@@ -440,76 +434,45 @@ impl StreamingToolParser {
None None
} }
/// Parse JSON tool call from the accumulated text buffer (called when stream finishes) /// Parse ALL JSON tool calls from the accumulated text buffer
/// This is similar to try_parse_json_tool_call but operates on the full buffer /// This finds all complete tool calls, not just the last one
fn try_parse_json_tool_call_from_buffer(&mut self) -> Option<ToolCall> { fn try_parse_all_json_tool_calls_from_buffer(&self) -> Vec<ToolCall> {
// Look for JSON tool call patterns in the accumulated buffer let mut tool_calls = Vec::new();
let patterns = [ let mut search_start = 0;
r#"{"tool":"#,
r#"{ "tool":"#, while search_start < self.text_buffer.len() {
r#"{"tool" :"#, let search_text = &self.text_buffer[search_start..];
r#"{ "tool" :"#,
]; // Find the next tool call pattern
if let Some(relative_pos) = Self::find_first_tool_call_start(search_text) {
// Find the last occurrence of a tool call pattern (most likely to be complete) let abs_start = search_start + relative_pos;
let mut best_start: Option<usize> = None; let json_text = &self.text_buffer[abs_start..];
for pattern in &patterns {
if let Some(pos) = self.text_buffer.rfind(pattern) { // Try to find a complete JSON object
if best_start.map_or(true, |best| pos > best) { if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
best_start = Some(pos); let json_str = &json_text[..=end_pos];
}
} if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
} if let Some(args_obj) = tool_call.args.as_object() {
if !Self::has_message_like_keys(args_obj) {
if let Some(start_pos) = best_start { debug!("Found tool call at position {}: {:?}", abs_start, tool_call.tool);
let json_text = &self.text_buffer[start_pos..]; tool_calls.push(tool_call);
debug!("Found potential JSON tool call at position {}: {:?}", start_pos,
if json_text.len() > 200 { &json_text[..200] } else { json_text });
// Try to find a complete JSON object
let mut brace_count = 0;
let mut in_string = false;
let mut escape_next = false;
for (i, ch) in json_text.char_indices() {
if escape_next {
escape_next = false;
continue;
}
match ch {
'\\' => escape_next = true,
'"' if !escape_next => in_string = !in_string,
'{' if !in_string => brace_count += 1,
'}' if !in_string => {
brace_count -= 1;
if brace_count == 0 {
// Found complete JSON object
let json_str = &json_text[..=i];
debug!("Attempting to parse JSON tool call from buffer: {}", json_str);
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
if let Some(args_obj) = tool_call.args.as_object() {
// Validate - check for message-like keys
let has_message_like_key = args_obj.keys().any(|key| {
key.len() > 100 || key.contains('\n')
});
if !has_message_like_key {
debug!("Successfully parsed JSON tool call from buffer: {:?}", tool_call);
return Some(tool_call);
}
}
} }
break;
} }
} }
_ => {} // Move past this tool call
search_start = abs_start + end_pos + 1;
} else {
// Incomplete JSON, stop searching
break;
} }
} else {
// No more tool call patterns found
break;
} }
} }
None tool_calls
} }
/// Get the accumulated text content (excluding tool calls) /// Get the accumulated text content (excluding tool calls)
@@ -531,10 +494,83 @@ impl StreamingToolParser {
self.message_stopped self.message_stopped
} }
/// Check if the text buffer contains an incomplete JSON tool call
/// This detects cases where the LLM started emitting a tool call but the stream ended
/// before the JSON was complete (truncated output)
pub fn has_incomplete_tool_call(&self) -> bool {
// Only check the unconsumed portion of the buffer
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
let json_text = &unchecked_buffer[start_pos..];
// If NOT complete, it's an incomplete tool call
Self::find_complete_json_object_end(json_text).is_none()
} else {
false
}
}
/// Check if the text buffer contains an unexecuted tool call
/// This detects cases where the LLM emitted a complete tool call JSON
/// but it wasn't parsed/executed (e.g., due to parsing issues)
pub fn has_unexecuted_tool_call(&self) -> bool {
// Only check the unconsumed portion of the buffer
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
let json_text = &unchecked_buffer[start_pos..];
// If the JSON IS complete, it means there's an unexecuted tool call
if let Some(json_end) = Self::find_complete_json_object_end(json_text) {
let json_only = &json_text[..=json_end];
return serde_json::from_str::<serde_json::Value>(json_only).is_ok();
}
}
false
}
/// Mark all tool calls up to the current buffer position as consumed/executed
/// This prevents has_unexecuted_tool_call() from returning true for already-executed tools
pub fn mark_tool_calls_consumed(&mut self) {
self.last_consumed_position = self.text_buffer.len();
}
/// Find the end position (byte index) of a complete JSON object in the text
/// Returns None if no complete JSON object is found
/// Find the end position (byte index) of a complete JSON object in the text
pub fn find_complete_json_object_end(text: &str) -> Option<usize> {
let mut brace_count = 0;
let mut in_string = false;
let mut escape_next = false;
let mut found_start = false;
for (i, ch) in text.char_indices() {
if escape_next {
escape_next = false;
continue;
}
match ch {
'\\' => escape_next = true,
'"' if !escape_next => in_string = !in_string,
'{' if !in_string => {
brace_count += 1;
found_start = true;
}
'}' if !in_string => {
brace_count -= 1;
if brace_count == 0 && found_start {
return Some(i); // Return the byte index of the closing brace
}
}
_ => {}
}
}
None // No complete JSON object found
}
/// Reset the parser state for a new message /// Reset the parser state for a new message
pub fn reset(&mut self) { pub fn reset(&mut self) {
self.text_buffer.clear(); self.text_buffer.clear();
self.native_tool_calls.clear(); self.last_consumed_position = 0;
self.message_stopped = false; self.message_stopped = false;
self.in_json_tool_call = false; self.in_json_tool_call = false;
self.json_tool_start = None; self.json_tool_start = None;
@@ -2743,7 +2779,7 @@ impl<W: UiWriter> Agent<W> {
/// Manually trigger context summarization regardless of context window size /// Manually trigger context summarization regardless of context window size
/// Returns Ok(true) if summarization was successful, Ok(false) if it failed /// Returns Ok(true) if summarization was successful, Ok(false) if it failed
pub async fn force_summarize(&mut self) -> Result<bool> { pub async fn force_summarize(&mut self) -> Result<bool> {
info!("Manual summarization triggered"); debug!("Manual summarization triggered");
self.ui_writer.print_context_status(&format!( self.ui_writer.print_context_status(&format!(
"\n🗜️ Manual summarization requested (current usage: {}%)...", "\n🗜️ Manual summarization requested (current usage: {}%)...",
@@ -2861,7 +2897,7 @@ impl<W: UiWriter> Agent<W> {
/// Manually trigger context thinning regardless of thresholds /// Manually trigger context thinning regardless of thresholds
pub fn force_thin(&mut self) -> String { pub fn force_thin(&mut self) -> String {
info!("Manual context thinning triggered"); debug!("Manual context thinning triggered");
let (message, chars_saved) = self.context_window.thin_context(self.session_id.as_deref()); let (message, chars_saved) = self.context_window.thin_context(self.session_id.as_deref());
self.thinning_events.push(chars_saved); self.thinning_events.push(chars_saved);
message message
@@ -2870,7 +2906,7 @@ impl<W: UiWriter> Agent<W> {
/// Manually trigger context thinning for the ENTIRE context window /// Manually trigger context thinning for the ENTIRE context window
/// Unlike force_thin which only processes the first third, this processes all messages /// Unlike force_thin which only processes the first third, this processes all messages
pub fn force_thin_all(&mut self) -> String { pub fn force_thin_all(&mut self) -> String {
info!("Manual full context skinnifying triggered"); debug!("Manual full context skinnifying triggered");
let (message, chars_saved) = self.context_window.thin_context_all(self.session_id.as_deref()); let (message, chars_saved) = self.context_window.thin_context_all(self.session_id.as_deref());
self.thinning_events.push(chars_saved); self.thinning_events.push(chars_saved);
message message
@@ -2879,7 +2915,7 @@ impl<W: UiWriter> Agent<W> {
/// Reload README.md and AGENTS.md and replace the first system message /// Reload README.md and AGENTS.md and replace the first system message
/// Returns Ok(true) if README was found and reloaded, Ok(false) if no README was present initially /// Returns Ok(true) if README was found and reloaded, Ok(false) if no README was present initially
pub fn reload_readme(&mut self) -> Result<bool> { pub fn reload_readme(&mut self) -> Result<bool> {
info!("Manual README reload triggered"); debug!("Manual README reload triggered");
// Check if the second message in conversation history is a system message with README content // Check if the second message in conversation history is a system message with README content
// (The first message should always be the system prompt) // (The first message should always be the system prompt)
@@ -2922,7 +2958,7 @@ impl<W: UiWriter> Agent<W> {
// Replace the second message (README) with the new content // Replace the second message (README) with the new content
if let Some(first_msg) = self.context_window.conversation_history.get_mut(1) { if let Some(first_msg) = self.context_window.conversation_history.get_mut(1) {
first_msg.content = combined_content; first_msg.content = combined_content;
info!("README content reloaded successfully"); debug!("README content reloaded successfully");
Ok(true) Ok(true)
} else { } else {
Ok(false) Ok(false)
@@ -3156,7 +3192,7 @@ impl<W: UiWriter> Agent<W> {
error!("Failed to clear continuation artifacts: {}", e); error!("Failed to clear continuation artifacts: {}", e);
} }
info!("Session cleared"); debug!("Session cleared");
} }
/// Restore session from a continuation artifact /// Restore session from a continuation artifact
@@ -3201,7 +3237,7 @@ impl<W: UiWriter> Agent<W> {
}); });
} }
info!("Restored full context from session log"); debug!("Restored full context from session log");
return Ok(true); return Ok(true);
} }
} }
@@ -3226,7 +3262,7 @@ impl<W: UiWriter> Agent<W> {
}); });
} }
info!("Restored session from summary"); debug!("Restored session from summary");
Ok(false) Ok(false)
} }
@@ -3836,7 +3872,7 @@ impl<W: UiWriter> Agent<W> {
match provider.stream(request.clone()).await { match provider.stream(request.clone()).await {
Ok(stream) => { Ok(stream) => {
if attempt > 1 { if attempt > 1 {
info!("Stream started successfully after {} attempts", attempt); debug!("Stream started successfully after {} attempts", attempt);
} }
debug!("Stream started successfully"); debug!("Stream started successfully");
debug!( debug!(
@@ -3886,9 +3922,9 @@ impl<W: UiWriter> Agent<W> {
let mut response_started = false; let mut response_started = false;
let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 2; // Limit auto-summary retries const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery)
let mut final_output_called = false; // Track if final_output was called let mut final_output_called = false; // Track if final_output was called
let mut executed_tools_in_session: std::collections::HashSet<String> = std::collections::HashSet::new(); // Track executed tools to prevent duplicates // Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG)
// Check if we need to summarize before starting // Check if we need to summarize before starting
if self.context_window.should_summarize() { if self.context_window.should_summarize() {
@@ -4189,77 +4225,51 @@ impl<W: UiWriter> Agent<W> {
}; };
// De-duplicate tool calls and track duplicates // De-duplicate tool calls and track duplicates
let mut seen_in_chunk: Vec<ToolCall> = Vec::new(); let mut last_tool_in_chunk: Option<ToolCall> = None;
let mut deduplicated_tools: Vec<(ToolCall, Option<String>)> = Vec::new(); let mut deduplicated_tools: Vec<(ToolCall, Option<String>)> = Vec::new();
for tool_call in tools_to_process { for tool_call in tools_to_process {
let mut duplicate_type = None; let mut duplicate_type = None;
// Check for duplicates in current chunk // Check for IMMEDIATELY SEQUENTIAL duplicate in current chunk
if seen_in_chunk // Only the immediately previous tool call counts as a duplicate
.iter() if let Some(ref last_tool) = last_tool_in_chunk {
.any(|tc| are_duplicates(tc, &tool_call)) if are_duplicates(last_tool, &tool_call) {
{
duplicate_type = Some("DUP IN CHUNK".to_string()); duplicate_type = Some("DUP IN CHUNK".to_string());
}
} else { } else {
// Check for duplicate against previous message in history // Check for IMMEDIATELY SEQUENTIAL duplicate against previous message
// Look at the last assistant message that contains tool calls // Only mark as duplicate if the LAST tool call in the previous message
// matches AND there's no significant text after it
let mut found_in_prev = false; let mut found_in_prev = false;
for msg in self.context_window.conversation_history.iter().rev() { for msg in self.context_window.conversation_history.iter().rev() {
if matches!(msg.role, MessageRole::Assistant) { if matches!(msg.role, MessageRole::Assistant) {
// Try to parse tool calls from the message content // Find the LAST tool call in the message
if msg.content.contains(r#"\"tool\""#) { let content = &msg.content;
// Simple JSON extraction for tool calls
let content = &msg.content; // Look for the last occurrence of a tool call pattern
let mut start_idx = 0; if let Some(last_tool_start) = content.rfind(r#"{"tool""#)
while let Some(tool_start) = .or_else(|| content.rfind(r#"{ "tool""#))
content[start_idx..].find(r#"{\"tool\""#) {
{ // Find the end of this JSON object
let tool_start = start_idx + tool_start; if let Some(end_offset) = StreamingToolParser::find_complete_json_object_end(&content[last_tool_start..]) {
// Find the end of this JSON object let end_idx = last_tool_start + end_offset + 1;
let mut brace_count = 0; let tool_json = &content[last_tool_start..end_idx];
let mut in_string = false;
let mut escape_next = false; // Check if there's any non-whitespace text after this tool call
let mut end_idx = tool_start; let text_after = content[end_idx..].trim();
let has_text_after = !text_after.is_empty();
for (i, ch) in content[tool_start..].char_indices()
{ // Only consider it a duplicate if:
if escape_next { // 1. The tool call matches
escape_next = false; // 2. There's no text after it (it was the last thing in the message)
continue; if !has_text_after {
} if let Ok(prev_tool) = serde_json::from_str::<ToolCall>(tool_json) {
if ch == '\\' && in_string {
escape_next = true;
continue;
}
if ch == '"' && !escape_next {
in_string = !in_string;
}
if !in_string {
if ch == '{' {
brace_count += 1;
} else if ch == '}' {
brace_count -= 1;
if brace_count == 0 {
end_idx = tool_start + i + 1;
break;
}
}
}
}
if end_idx > tool_start {
let tool_json = &content[tool_start..end_idx];
if let Ok(prev_tool) =
serde_json::from_str::<ToolCall>(tool_json)
{
if are_duplicates(&prev_tool, &tool_call) { if are_duplicates(&prev_tool, &tool_call) {
found_in_prev = true; found_in_prev = true;
break;
} }
} }
} }
start_idx = end_idx;
} }
} }
// Only check the most recent assistant message // Only check the most recent assistant message
@@ -4272,13 +4282,8 @@ impl<W: UiWriter> Agent<W> {
} }
} }
// Add to seen list if not a duplicate in chunk // Track the last tool call for sequential duplicate detection
if duplicate_type last_tool_in_chunk = Some(tool_call.clone());
.as_ref()
.map_or(true, |s| s != "DUP IN CHUNK")
{
seen_in_chunk.push(tool_call.clone());
}
deduplicated_tools.push((tool_call, duplicate_type)); deduplicated_tools.push((tool_call, duplicate_type));
} }
@@ -4286,22 +4291,11 @@ impl<W: UiWriter> Agent<W> {
// Process each tool call // Process each tool call
for (tool_call, duplicate_type) in deduplicated_tools { for (tool_call, duplicate_type) in deduplicated_tools {
debug!("Processing completed tool call: {:?}", tool_call); debug!("Processing completed tool call: {:?}", tool_call);
// Mark that we detected a tool call - this prevents content from being printed
// even if the tool is skipped as a duplicate
tool_executed = true;
// Check if this tool was already executed in this session
let tool_key = format!("{}:{}", tool_call.tool, serde_json::to_string(&tool_call.args).unwrap_or_default());
if executed_tools_in_session.contains(&tool_key) {
// Log the duplicate with red prefix
let prefixed_tool_name = format!("🟥 {} DUP IN SESSION", tool_call.tool);
let warning_msg = format!(
"⚠️ Duplicate tool call detected (already executed in session): Skipping {} with args {}",
tool_call.tool,
serde_json::to_string(&tool_call.args).unwrap_or_else(|_| "<unserializable>".to_string())
);
let mut modified_tool_call = tool_call.clone();
modified_tool_call.tool = prefixed_tool_name;
debug!("{}", warning_msg);
continue; // Skip execution of duplicate
}
// If it's a duplicate, log it and return a warning // If it's a duplicate, log it and return a warning
if let Some(dup_type) = &duplicate_type { if let Some(dup_type) = &duplicate_type {
@@ -4639,15 +4633,25 @@ impl<W: UiWriter> Agent<W> {
tool_executed = true; tool_executed = true;
any_tool_executed = true; // Track across all iterations any_tool_executed = true; // Track across all iterations
// Add to executed tools set to prevent re-execution in this session // Reset auto-continue attempts after successful tool execution
executed_tools_in_session.insert(tool_key.clone()); // This gives the LLM fresh attempts since it's making progress
auto_summary_attempts = 0;
// Reset the JSON tool call filter state after each tool execution // Reset the JSON tool call filter state after each tool execution
// This ensures the filter doesn't stay in suppression mode for subsequent streaming content // This ensures the filter doesn't stay in suppression mode for subsequent streaming content
self.ui_writer.reset_json_filter(); self.ui_writer.reset_json_filter();
// Reset parser for next iteration - this clears the text buffer // Only reset parser if there are no more unexecuted tool calls in the buffer
parser.reset(); // This handles the case where the LLM emits multiple tool calls in one response
if parser.has_unexecuted_tool_call() {
debug!("Parser still has unexecuted tool calls, not resetting buffer");
// Mark current tool as consumed so we don't re-detect it
parser.mark_tool_calls_consumed();
} else {
// Reset parser for next iteration - this clears the text buffer
parser.reset();
}
// Clear current_response for next iteration to prevent buffered text // Clear current_response for next iteration to prevent buffered text
// from being incorrectly displayed after tool execution // from being incorrectly displayed after tool execution
@@ -4662,8 +4666,14 @@ impl<W: UiWriter> Agent<W> {
} // End of for loop processing each tool call } // End of for loop processing each tool call
// If we processed any tools in multiple mode, break out to start new stream // If we processed any tools in multiple mode, break out to start new stream
// BUT only if there are no more unexecuted tool calls in the buffer
if tool_executed && self.config.agent.allow_multiple_tool_calls { if tool_executed && self.config.agent.allow_multiple_tool_calls {
break; if parser.has_unexecuted_tool_call() {
debug!("Tool executed but parser still has unexecuted tool calls, continuing to process");
// Don't break - continue processing to pick up remaining tool calls
} else {
break;
}
} }
// If no tool calls were completed, continue streaming normally // If no tool calls were completed, continue streaming normally
@@ -4753,7 +4763,7 @@ impl<W: UiWriter> Agent<W> {
" - Text buffer content: {:?}", " - Text buffer content: {:?}",
parser.get_text_content() parser.get_text_content()
); );
error!(" - Native tool calls: {:?}", parser.native_tool_calls); error!(" - Has incomplete tool call: {}", parser.has_incomplete_tool_call());
error!(" - Message stopped: {}", parser.is_message_stopped()); error!(" - Message stopped: {}", parser.is_message_stopped());
error!(" - In JSON tool call: {}", parser.in_json_tool_call); error!(" - In JSON tool call: {}", parser.in_json_tool_call);
error!(" - JSON tool start: {:?}", parser.json_tool_start); error!(" - JSON tool start: {:?}", parser.json_tool_start);
@@ -4831,6 +4841,17 @@ impl<W: UiWriter> Agent<W> {
)); ));
} }
// If tools were executed in previous iterations but final_output wasn't called,
// break to let the outer loop's auto-continue logic handle it
if any_tool_executed && !final_output_called {
debug!("Tools were executed but final_output not called - breaking to auto-continue");
// Add the text response to context before breaking
if has_text_response && !current_response.trim().is_empty() {
full_response = current_response.clone();
}
break;
}
// Set full_response to current_response (don't append) // Set full_response to current_response (don't append)
// current_response already contains everything that was displayed // current_response already contains everything that was displayed
// Don't set full_response here - it would duplicate the output // Don't set full_response here - it would duplicate the output
@@ -4873,8 +4894,8 @@ impl<W: UiWriter> Agent<W> {
); );
error!("Error type: {}", std::any::type_name_of_val(&e)); error!("Error type: {}", std::any::type_name_of_val(&e));
error!("Parser state at error: text_buffer_len={}, native_tool_calls={}, message_stopped={}", error!("Parser state at error: text_buffer_len={}, has_incomplete={}, message_stopped={}",
parser.text_buffer_len(), parser.native_tool_calls.len(), parser.is_message_stopped()); parser.text_buffer_len(), parser.has_incomplete_tool_call(), parser.is_message_stopped());
// Store the error for potential logging later // Store the error for potential logging later
_last_error = Some(error_details.clone()); _last_error = Some(error_details.clone());
@@ -4893,7 +4914,7 @@ impl<W: UiWriter> Agent<W> {
// If we have any content or tool calls, treat this as a graceful end // If we have any content or tool calls, treat this as a graceful end
if chunks_received > 0 if chunks_received > 0
&& (!parser.get_text_content().is_empty() && (!parser.get_text_content().is_empty()
|| parser.native_tool_calls.len() > 0) || parser.has_unexecuted_tool_call())
{ {
warn!("Stream terminated unexpectedly but we have content, continuing"); warn!("Stream terminated unexpectedly but we have content, continuing");
break; // Break to process what we have break; // Break to process what we have
@@ -4941,18 +4962,77 @@ impl<W: UiWriter> Agent<W> {
let has_response = !current_response.is_empty() || !full_response.is_empty(); let has_response = !current_response.is_empty() || !full_response.is_empty();
// Check if the response is essentially empty (just whitespace or timing lines)
// This detects cases where the LLM outputs nothing substantive
let response_text = if !current_response.is_empty() {
&current_response
} else {
&full_response
};
let is_empty_response = response_text.trim().is_empty()
|| response_text.lines().all(|line| line.trim().is_empty() || line.trim().starts_with("⏱️"));
// Check if there's an incomplete tool call in the buffer
let has_incomplete_tool_call = parser.has_incomplete_tool_call();
// Check if there's a complete but unexecuted tool call in the buffer
let has_unexecuted_tool_call = parser.has_unexecuted_tool_call();
// Log when we detect unexecuted or incomplete tool calls for debugging
if has_incomplete_tool_call {
debug!("Detected incomplete tool call in buffer (buffer_len={}, consumed_up_to={})",
parser.text_buffer_len(), parser.text_buffer_len());
}
if has_unexecuted_tool_call {
debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue");
warn!("Unexecuted tool call detected in buffer after stream ended");
}
// Auto-continue if tools were executed but final_output was never called // Auto-continue if tools were executed but final_output was never called
// This is the simple rule: LLM must call final_output before returning control // OR if the LLM emitted an incomplete tool call (truncated JSON)
if any_tool_executed && !final_output_called { // OR if the LLM emitted a complete tool call that wasn't executed
// This ensures we don't return control when the LLM clearly intended to call a tool
// Note: We removed the redundant condition (any_tool_executed && is_empty_response)
// because it's already covered by (any_tool_executed && !final_output_called)
let should_auto_continue = (any_tool_executed && !final_output_called)
|| has_incomplete_tool_call
|| has_unexecuted_tool_call;
if should_auto_continue {
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS { if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
auto_summary_attempts += 1; auto_summary_attempts += 1;
warn!( if has_incomplete_tool_call {
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})", warn!(
iteration_count, auto_summary_attempts "LLM emitted incomplete tool call ({} iterations, auto-continue attempt {}/{})",
); iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
self.ui_writer.print_context_status( );
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n" self.ui_writer.print_context_status(
); "\n🔄 Model emitted incomplete tool call. Auto-continuing...\n"
);
} else if has_unexecuted_tool_call {
warn!(
"LLM emitted unexecuted tool call ({} iterations, auto-continue attempt {}/{})",
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
);
self.ui_writer.print_context_status(
"\n🔄 Model emitted tool call that wasn't executed. Auto-continuing...\n"
);
} else if is_empty_response {
warn!(
"LLM emitted empty/trivial response ({} iterations, auto-continue attempt {}/{})",
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
);
self.ui_writer.print_context_status(
"\n🔄 Model emitted empty response. Auto-continuing...\n"
);
} else {
warn!(
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {}/{})",
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
);
self.ui_writer.print_context_status(
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
);
}
// Add any text response to context before prompting for continuation // Add any text response to context before prompting for continuation
if has_response { if has_response {
@@ -4971,10 +5051,17 @@ impl<W: UiWriter> Agent<W> {
} }
// Add a follow-up message asking for continuation // Add a follow-up message asking for continuation
let continue_prompt = Message::new( let continue_prompt = if has_incomplete_tool_call {
MessageRole::User, Message::new(
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(), MessageRole::User,
); "Your previous response was cut off mid-tool-call. Please complete the tool call and continue.".to_string(),
)
} else {
Message::new(
MessageRole::User,
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
)
};
self.context_window.add_message(continue_prompt); self.context_window.add_message(continue_prompt);
request.messages = self.context_window.conversation_history.clone(); request.messages = self.context_window.conversation_history.clone();
@@ -4983,11 +5070,17 @@ impl<W: UiWriter> Agent<W> {
} else { } else {
// Max attempts reached, give up gracefully // Max attempts reached, give up gracefully
warn!( warn!(
"Max auto-continue attempts ({}) reached, returning without final_output", "Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, final_output_called={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
MAX_AUTO_SUMMARY_ATTEMPTS MAX_AUTO_SUMMARY_ATTEMPTS,
iteration_count,
any_tool_executed,
final_output_called,
has_incomplete_tool_call,
has_unexecuted_tool_call,
is_empty_response
); );
self.ui_writer.print_agent_response( self.ui_writer.print_agent_response(
"\n⚠️ The model stopped without calling final_output after multiple attempts.\n" &format!("\n⚠️ The model stopped without calling final_output after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
); );
} }
} else if has_response { } else if has_response {
@@ -6434,7 +6527,7 @@ impl<W: UiWriter> Agent<W> {
let driver = mutex.into_inner(); let driver = mutex.into_inner();
match driver.quit().await { match driver.quit().await {
Ok(_) => { Ok(_) => {
info!("WebDriver session closed successfully"); debug!("WebDriver session closed successfully");
// Kill the safaridriver process // Kill the safaridriver process
if let Some(mut process) = if let Some(mut process) =
@@ -6443,7 +6536,7 @@ impl<W: UiWriter> Agent<W> {
if let Err(e) = process.kill().await { if let Err(e) = process.kill().await {
warn!("Failed to kill safaridriver process: {}", e); warn!("Failed to kill safaridriver process: {}", e);
} else { } else {
info!("Safaridriver process terminated"); debug!("Safaridriver process terminated");
} }
} }

View File

@@ -10,7 +10,7 @@ use crate::ui_writer::UiWriter;
use crate::{Agent, DiscoveryOptions, TaskResult}; use crate::{Agent, DiscoveryOptions, TaskResult};
use anyhow::Result; use anyhow::Result;
use std::time::Instant; use std::time::Instant;
use tracing::{info, warn}; use tracing::{debug, warn};
/// Configuration for retry behavior /// Configuration for retry behavior
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -142,7 +142,7 @@ where
match result { match result {
Ok(task_result) => { Ok(task_result) => {
if retry_count > 0 { if retry_count > 0 {
info!( debug!(
"{} task succeeded after {} retries (elapsed: {:?})", "{} task succeeded after {} retries (elapsed: {:?})",
config.role_name, config.role_name,
retry_count, retry_count,
@@ -259,7 +259,7 @@ where
match operation().await { match operation().await {
Ok(result) => { Ok(result) => {
if retry_count > 0 { if retry_count > 0 {
info!( debug!(
"Operation '{}' succeeded after {} retries", "Operation '{}' succeeded after {} retries",
operation_name, retry_count operation_name, retry_count
); );

View File

@@ -6,7 +6,7 @@
use anyhow::Result; use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use tracing::{debug, error, info, warn}; use tracing::{debug, error, warn};
/// Version of the session continuation format /// Version of the session continuation format
const CONTINUATION_VERSION: &str = "1.0"; const CONTINUATION_VERSION: &str = "1.0";
@@ -89,7 +89,7 @@ pub fn save_continuation(continuation: &SessionContinuation) -> Result<PathBuf>
let json = serde_json::to_string_pretty(continuation)?; let json = serde_json::to_string_pretty(continuation)?;
std::fs::write(&latest_path, &json)?; std::fs::write(&latest_path, &json)?;
info!("Saved session continuation to {:?}", latest_path); debug!("Saved session continuation to {:?}", latest_path);
Ok(latest_path) Ok(latest_path)
} }
@@ -113,7 +113,7 @@ pub fn load_continuation() -> Result<Option<SessionContinuation>> {
); );
} }
info!("Loaded session continuation from {:?}", latest_path); debug!("Loaded session continuation from {:?}", latest_path);
Ok(Some(continuation)) Ok(Some(continuation))
} }
@@ -131,7 +131,7 @@ pub fn clear_continuation() -> Result<()> {
debug!("Removed session file: {:?}", path); debug!("Removed session file: {:?}", path);
} }
} }
info!("Cleared session continuation artifacts"); debug!("Cleared session continuation artifacts");
} }
Ok(()) Ok(())

View File

@@ -0,0 +1,234 @@
//! Tests for the auto-continue detection features
//!
//! These tests verify the logic used to detect when the LLM should auto-continue:
//! 1. Empty/trivial responses (just timing lines)
//! 2. Incomplete tool calls
//! 3. Unexecuted tool calls
//! 4. Missing final_output after tool execution
/// Helper function to check if a response is considered "empty" or trivial
/// This mirrors the logic in lib.rs for detecting empty responses
fn is_empty_response(response_text: &str) -> bool {
response_text.trim().is_empty()
|| response_text.lines().all(|line| {
line.trim().is_empty() || line.trim().starts_with("⏱️")
})
}
#[test]
fn test_empty_response_detection_empty_string() {
assert!(is_empty_response(""));
}
#[test]
fn test_empty_response_detection_whitespace_only() {
assert!(is_empty_response(" "));
assert!(is_empty_response("\n\n\n"));
assert!(is_empty_response(" \n \t \n "));
}
#[test]
fn test_empty_response_detection_timing_line_only() {
assert!(is_empty_response("⏱️ 43.0s | 💭 3.6s"));
assert!(is_empty_response(" ⏱️ 43.0s | 💭 3.6s "));
assert!(is_empty_response("\n⏱️ 43.0s | 💭 3.6s\n"));
}
#[test]
fn test_empty_response_detection_multiple_timing_lines() {
let response = "\n⏱️ 10.0s | 💭 1.0s\n\n⏱️ 20.0s | 💭 2.0s\n";
assert!(is_empty_response(response));
}
#[test]
fn test_empty_response_detection_timing_with_empty_lines() {
let response = "\n\n⏱️ 43.0s | 💭 3.6s\n\n";
assert!(is_empty_response(response));
}
#[test]
fn test_empty_response_detection_substantive_content() {
// These should NOT be considered empty
assert!(!is_empty_response("Hello, I will help you."));
assert!(!is_empty_response("Let me read that file."));
assert!(!is_empty_response("I've completed the task."));
}
#[test]
fn test_empty_response_detection_timing_with_text() {
// If there's any substantive text, it's not empty
let response = "⏱️ 43.0s | 💭 3.6s\nHere is the result.";
assert!(!is_empty_response(response));
}
#[test]
fn test_empty_response_detection_text_before_timing() {
let response = "Done!\n⏱️ 43.0s | 💭 3.6s";
assert!(!is_empty_response(response));
}
#[test]
fn test_empty_response_detection_json_tool_call() {
// A JSON tool call is definitely not empty
let response = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
assert!(!is_empty_response(response));
}
#[test]
fn test_empty_response_detection_partial_json() {
// Even partial JSON is not empty
let response = r#"{"tool": "read_file", "args": {"#;
assert!(!is_empty_response(response));
}
#[test]
fn test_empty_response_detection_markdown() {
// Markdown content is not empty
let response = "# Summary\n\nI completed the task.";
assert!(!is_empty_response(response));
}
#[test]
fn test_empty_response_detection_code_block() {
// Code blocks are not empty
let response = "```rust\nfn main() {}\n```";
assert!(!is_empty_response(response));
}
// Test the MAX_AUTO_SUMMARY_ATTEMPTS constant value
// This is a compile-time check that the constant exists and has the expected value
#[test]
fn test_max_auto_summary_attempts_is_reasonable() {
// The constant should be at least 3 to give the LLM a fair chance to recover
// We can't directly access the constant from here, but we document the expected value
// Current value: 5 (increased from 2)
const EXPECTED_MIN_ATTEMPTS: usize = 3;
const EXPECTED_MAX_ATTEMPTS: usize = 10;
const CURRENT_VALUE: usize = 5;
assert!(CURRENT_VALUE >= EXPECTED_MIN_ATTEMPTS,
"MAX_AUTO_SUMMARY_ATTEMPTS should be at least {} for reliable recovery", EXPECTED_MIN_ATTEMPTS);
assert!(CURRENT_VALUE <= EXPECTED_MAX_ATTEMPTS,
"MAX_AUTO_SUMMARY_ATTEMPTS should not exceed {} to avoid infinite loops", EXPECTED_MAX_ATTEMPTS);
}
// =============================================================================
// Test: Auto-continue condition logic
// =============================================================================
/// Simulates the should_auto_continue logic from lib.rs
fn should_auto_continue(
any_tool_executed: bool,
final_output_called: bool,
has_incomplete_tool_call: bool,
has_unexecuted_tool_call: bool,
is_empty_response: bool,
) -> bool {
(any_tool_executed && !final_output_called)
|| has_incomplete_tool_call
|| has_unexecuted_tool_call
|| (any_tool_executed && is_empty_response)
}
#[test]
fn test_auto_continue_after_tool_no_final_output() {
// Tool executed but no final_output - should continue
assert!(should_auto_continue(
true, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
));
}
#[test]
fn test_auto_continue_with_final_output() {
// Tool executed AND final_output called - should NOT continue
assert!(!should_auto_continue(
true, // any_tool_executed
true, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
));
}
#[test]
fn test_auto_continue_incomplete_tool_call() {
// Incomplete tool call - should continue regardless of other flags
assert!(should_auto_continue(
false, // any_tool_executed
false, // final_output_called
true, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
));
}
#[test]
fn test_auto_continue_unexecuted_tool_call() {
// Unexecuted tool call - should continue
assert!(should_auto_continue(
false, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
true, // has_unexecuted_tool_call
false, // is_empty_response
));
}
#[test]
fn test_auto_continue_empty_response_after_tool() {
// Empty response after tool execution - should continue
assert!(should_auto_continue(
true, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
true, // is_empty_response
));
}
#[test]
fn test_auto_continue_empty_response_no_tool() {
// Empty response but no tool executed - should NOT continue
// (This is a normal case where LLM just didn't respond)
assert!(!should_auto_continue(
false, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
true, // is_empty_response
));
}
#[test]
fn test_auto_continue_no_conditions_met() {
// No tools, no incomplete calls, substantive response - should NOT continue
assert!(!should_auto_continue(
false, // any_tool_executed
false, // final_output_called
false, // has_incomplete_tool_call
false, // has_unexecuted_tool_call
false, // is_empty_response
));
}
// =============================================================================
// Test: Redundant condition detection
// =============================================================================
#[test]
fn test_redundant_empty_response_condition() {
// This test documents that (any_tool_executed && is_empty_response) is redundant
// when (any_tool_executed && !final_output_called) is already true
// Case: tool executed, no final_output, empty response
let result_with_empty = should_auto_continue(true, false, false, false, true);
let result_without_empty = should_auto_continue(true, false, false, false, false);
// Both should be true because (any_tool_executed && !final_output_called) is true
assert_eq!(result_with_empty, result_without_empty,
"The is_empty_response condition is redundant when any_tool_executed && !final_output_called");
}

View File

@@ -0,0 +1,231 @@
//! Tests for tool call duplicate detection
//!
//! These tests ensure that duplicate detection only catches IMMEDIATELY SEQUENTIAL
//! duplicates, not legitimate re-use of tools with text between them.
use g3_core::StreamingToolParser;
use g3_providers::CompletionChunk;
// Helper to create a chunk
fn chunk(content: &str, finished: bool) -> CompletionChunk {
CompletionChunk {
content: content.to_string(),
finished,
tool_calls: None,
usage: None,
}
}
// =============================================================================
// Test: find_complete_json_object_end helper function
// =============================================================================
#[test]
fn test_find_complete_json_object_end_simple() {
let json = r#"{"tool": "test", "args": {}}"#;
let end = StreamingToolParser::find_complete_json_object_end(json);
assert!(end.is_some(), "Should find end of complete JSON");
assert_eq!(end.unwrap(), json.len() - 1, "End should be at last character");
}
#[test]
fn test_find_complete_json_object_end_nested() {
let json = r#"{"tool": "test", "args": {"nested": {"deep": true}}}"#;
let end = StreamingToolParser::find_complete_json_object_end(json);
assert!(end.is_some(), "Should find end of nested JSON");
assert_eq!(end.unwrap(), json.len() - 1);
}
#[test]
fn test_find_complete_json_object_end_with_trailing_text() {
let json = r#"{"tool": "test", "args": {}} some text after"#;
let end = StreamingToolParser::find_complete_json_object_end(json);
assert!(end.is_some(), "Should find end of JSON even with trailing text");
// The end should be at the closing brace, not at the end of the string
let end_pos = end.unwrap();
assert_eq!(&json[end_pos..end_pos+1], "}", "End should be at closing brace");
}
#[test]
fn test_find_complete_json_object_end_incomplete() {
let json = r#"{"tool": "test", "args": {"#;
let end = StreamingToolParser::find_complete_json_object_end(json);
assert!(end.is_none(), "Should return None for incomplete JSON");
}
// =============================================================================
// Test: Tool calls separated by text should NOT be duplicates
// =============================================================================
#[test]
fn test_same_tool_with_text_between_not_duplicate() {
// This tests the scenario where the LLM calls the same tool twice
// but with explanatory text between them - this should NOT be a duplicate
let mut parser = StreamingToolParser::new();
// First tool call
let content1 = r#"{"tool": "todo_read", "args": {}}"#;
let tools1 = parser.process_chunk(&chunk(content1, true));
assert_eq!(tools1.len(), 1, "First tool call should be detected");
assert_eq!(tools1[0].tool, "todo_read");
// Reset parser (simulating what happens after tool execution)
parser.reset();
// Some text, then the same tool call again
let content2 = r#"Now let me check the TODO again to verify my changes.
{"tool": "todo_read", "args": {}}"#;
let tools2 = parser.process_chunk(&chunk(content2, true));
// The second tool call should be detected - it's NOT a duplicate
// because there's text before it
assert_eq!(tools2.len(), 1, "Second tool call should be detected (not a duplicate)");
assert_eq!(tools2[0].tool, "todo_read");
}
#[test]
fn test_different_tools_back_to_back_not_duplicate() {
let mut parser = StreamingToolParser::new();
// Two different tool calls back to back
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
{"tool": "shell", "args": {"command": "ls"}}"#;
let tools = parser.process_chunk(&chunk(content, true));
// Both should be detected - they're different tools
assert!(tools.len() >= 1, "Should detect tool calls");
// At minimum, the first one should be detected
assert_eq!(tools[0].tool, "read_file");
}
#[test]
fn test_same_tool_different_args_not_duplicate() {
let mut parser = StreamingToolParser::new();
// Same tool but different arguments - NOT a duplicate
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
{"tool": "read_file", "args": {"file_path": "b.txt"}}"#;
let tools = parser.process_chunk(&chunk(content, true));
// Both should be detected - different args means not a duplicate
assert!(tools.len() >= 1, "Should detect tool calls");
}
// =============================================================================
// Test: Immediately sequential identical tool calls ARE duplicates
// =============================================================================
#[test]
fn test_identical_tool_calls_back_to_back_are_duplicates() {
// This tests the scenario where the LLM stutters and outputs
// the exact same tool call twice in a row - this IS a duplicate
let mut parser = StreamingToolParser::new();
// Two identical tool calls with no text between them
let content = r#"{"tool": "todo_read", "args": {}}
{"tool": "todo_read", "args": {}}"#;
let tools = parser.process_chunk(&chunk(content, true));
// The parser should detect both, but the deduplication logic
// (which happens at a higher level in the agent) should mark
// the second one as a duplicate
// Here we just verify both are parsed
assert!(tools.len() >= 1, "Should detect at least one tool call");
}
// =============================================================================
// Test: Text content detection for duplicate logic
// =============================================================================
#[test]
fn test_has_text_after_tool_call() {
// Helper test to verify we can detect text after a tool call
let content_with_text = r#"{"tool": "test", "args": {}} Some text after"#;
let content_without_text = r#"{"tool": "test", "args": {}}"#;
let content_with_whitespace_only = r#"{"tool": "test", "args": {}}
"#;
// Find the end of the JSON in each case
let end1 = StreamingToolParser::find_complete_json_object_end(content_with_text).unwrap();
let end2 = StreamingToolParser::find_complete_json_object_end(content_without_text).unwrap();
let end3 = StreamingToolParser::find_complete_json_object_end(content_with_whitespace_only).unwrap();
// Check what's after the JSON
let after1 = content_with_text[end1 + 1..].trim();
let after2 = content_without_text.get(end2 + 1..).unwrap_or("").trim();
let after3 = content_with_whitespace_only[end3 + 1..].trim();
assert!(!after1.is_empty(), "Should have text after tool call");
assert!(after2.is_empty(), "Should have no text after tool call");
assert!(after3.is_empty(), "Whitespace-only should count as no text");
}
// =============================================================================
// Test: Edge cases
// =============================================================================
#[test]
fn test_tool_call_with_newlines_between() {
let mut parser = StreamingToolParser::new();
// Tool calls separated by multiple newlines (but no actual text)
// This SHOULD be considered a duplicate since there's no meaningful text
let content = r#"{"tool": "todo_read", "args": {}}
{"tool": "todo_read", "args": {}}"#;
let tools = parser.process_chunk(&chunk(content, true));
assert!(tools.len() >= 1, "Should detect at least one tool call");
}
#[test]
fn test_tool_call_with_whitespace_text_between() {
let mut parser = StreamingToolParser::new();
// Tool calls separated by text that's just whitespace and punctuation
// The key is whether there's "meaningful" text
let content = r#"{"tool": "todo_read", "args": {}}
OK, now again:
{"tool": "todo_read", "args": {}}"#;
let tools = parser.process_chunk(&chunk(content, true));
// Both should be detected since there's text between them
assert!(tools.len() >= 1, "Should detect tool calls");
}
#[test]
fn test_tool_call_in_middle_of_text() {
let mut parser = StreamingToolParser::new();
// Tool call surrounded by text
let content = r#"Let me read the file first.
{"tool": "read_file", "args": {"file_path": "test.txt"}}
Now I'll analyze the contents."#;
let tools = parser.process_chunk(&chunk(content, true));
assert_eq!(tools.len(), 1, "Should detect the tool call");
assert_eq!(tools[0].tool, "read_file");
}
#[test]
fn test_multiple_different_tool_calls_with_text() {
let mut parser = StreamingToolParser::new();
// Multiple different tool calls with text between each
let content = r#"First, let me read the file:
{"tool": "read_file", "args": {"file_path": "test.txt"}}
Now let me check the TODO:
{"tool": "todo_read", "args": {}}
Finally, let me run a command:
{"tool": "shell", "args": {"command": "ls"}}"#;
let tools = parser.process_chunk(&chunk(content, true));
// All three should be detected
assert!(tools.len() >= 1, "Should detect tool calls");
}

View File

@@ -0,0 +1,182 @@
//! Tests for the incomplete tool call detection feature
use g3_core::StreamingToolParser;
use g3_providers::CompletionChunk;
#[test]
fn test_has_incomplete_tool_call_empty_buffer() {
let parser = StreamingToolParser::new();
assert!(!parser.has_incomplete_tool_call());
}
#[test]
fn test_has_incomplete_tool_call_no_tool_pattern() {
let mut parser = StreamingToolParser::new();
let chunk = CompletionChunk {
content: "Hello, I will help you with that.".to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
assert!(!parser.has_incomplete_tool_call());
}
#[test]
fn test_has_incomplete_tool_call_complete_tool_call() {
let mut parser = StreamingToolParser::new();
let chunk = CompletionChunk {
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Complete JSON should NOT be detected as incomplete
assert!(!parser.has_incomplete_tool_call());
}
#[test]
fn test_has_incomplete_tool_call_truncated_tool_call() {
let mut parser = StreamingToolParser::new();
// Simulate truncated tool call - missing closing braces
let chunk = CompletionChunk {
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Incomplete JSON should be detected
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_has_incomplete_tool_call_truncated_mid_value() {
let mut parser = StreamingToolParser::new();
// Simulate truncated tool call - cut off mid-value
let chunk = CompletionChunk {
content: r#"{"tool": "shell", "args": {"command": "cargo test --package g3-cli --test filter_json_test test_streaming -- --test-threads=1 2>&1 | tail"#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Incomplete JSON should be detected
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_has_incomplete_tool_call_with_text_before() {
let mut parser = StreamingToolParser::new();
// Text before the incomplete tool call
let chunk = CompletionChunk {
content: r#"Let me read that file for you.
{"tool": "read_file", "args": {"file_path":"#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Incomplete JSON should be detected
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_has_incomplete_tool_call_malformed_like_trace() {
let mut parser = StreamingToolParser::new();
// This simulates a truncated tool call where the stream ended mid-JSON
// The actual trace showed truncated output, not malformed characters
let chunk = CompletionChunk {
content: r#"{"tool": "read_file", "args": {"file_path":"src/engine.rkt""#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Truncated JSON (missing closing braces) should be detected as incomplete
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_has_unexecuted_tool_call_empty_buffer() {
let parser = StreamingToolParser::new();
assert!(!parser.has_unexecuted_tool_call());
}
#[test]
fn test_has_unexecuted_tool_call_no_tool_pattern() {
let mut parser = StreamingToolParser::new();
let chunk = CompletionChunk {
content: "Hello, I will help you with that.".to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
assert!(!parser.has_unexecuted_tool_call());
}
#[test]
fn test_has_unexecuted_tool_call_complete_tool_call() {
let mut parser = StreamingToolParser::new();
let chunk = CompletionChunk {
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Complete JSON tool call that wasn't executed should be detected
assert!(parser.has_unexecuted_tool_call());
}
#[test]
fn test_has_unexecuted_tool_call_incomplete_json() {
let mut parser = StreamingToolParser::new();
let chunk = CompletionChunk {
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted)
assert!(!parser.has_unexecuted_tool_call());
}
#[test]
fn test_has_unexecuted_tool_call_with_trailing_text() {
let mut parser = StreamingToolParser::new();
// Complete JSON tool call followed by trailing text
let chunk = CompletionChunk {
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
Some trailing text after the JSON"#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Complete JSON tool call should be detected even with trailing text
assert!(parser.has_unexecuted_tool_call());
}
#[test]
fn test_has_unexecuted_tool_call_with_text_before_and_after() {
let mut parser = StreamingToolParser::new();
let chunk = CompletionChunk {
content: r#"Let me read that file.
{"tool": "shell", "args": {"command": "ls -la"}}
I'll execute this command now."#.to_string(),
finished: false,
tool_calls: None,
usage: None,
};
parser.process_chunk(&chunk);
// Complete JSON tool call should be detected
assert!(parser.has_unexecuted_tool_call());
}

View File

@@ -0,0 +1,545 @@
//! Comprehensive tests for StreamingToolParser
//!
//! Tests cover:
//! - Multiple tool calls in one response
//! - Tool call followed by text
//! - Incomplete tool calls at various truncation points
//! - Parser reset behavior
//! - Buffer management
use g3_core::StreamingToolParser;
use g3_providers::CompletionChunk;
// Helper to create a chunk
fn chunk(content: &str, finished: bool) -> CompletionChunk {
CompletionChunk {
content: content.to_string(),
finished,
tool_calls: None,
usage: None,
}
}
// =============================================================================
// Test: Multiple tool calls in one response
// =============================================================================
#[test]
fn test_multiple_tool_calls_in_single_chunk() {
let mut parser = StreamingToolParser::new();
// Two complete tool calls in one chunk
let content = r#"Let me do two things:
{"tool": "read_file", "args": {"file_path": "a.txt"}}
Now the second:
{"tool": "shell", "args": {"command": "ls"}}"#;
let tools = parser.process_chunk(&chunk(content, false));
// Should detect at least one tool call
// Note: Current implementation may only return the first one found
assert!(!tools.is_empty(), "Should detect at least one tool call");
}
#[test]
fn test_multiple_tool_calls_across_chunks() {
let mut parser = StreamingToolParser::new();
// First tool call
let tools1 = parser.process_chunk(&chunk(
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
false
));
assert_eq!(tools1.len(), 1, "First tool call should be detected");
assert_eq!(tools1[0].tool, "read_file");
// Reset parser (simulating what happens after tool execution)
parser.reset();
// Second tool call
let tools2 = parser.process_chunk(&chunk(
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
false
));
assert_eq!(tools2.len(), 1, "Second tool call should be detected");
assert_eq!(tools2[0].tool, "shell");
}
#[test]
fn test_first_complete_second_incomplete() {
let mut parser = StreamingToolParser::new();
// First complete, second incomplete
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
{"tool": "shell", "args": {"command": "ls"#;
let tools = parser.process_chunk(&chunk(content, false));
// Should detect the first complete tool call
// The incomplete one should be detected by has_incomplete_tool_call
assert!(parser.has_incomplete_tool_call(), "Should detect incomplete tool call");
}
// =============================================================================
// Test: Tool call followed by text
// =============================================================================
#[test]
fn test_tool_call_with_trailing_text() {
let mut parser = StreamingToolParser::new();
let content = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
Here is the content of the file..."#;
let tools = parser.process_chunk(&chunk(content, false));
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].tool, "read_file");
// The trailing text should be in the buffer
let text = parser.get_text_content();
assert!(text.contains("Here is the content"), "Trailing text should be preserved");
}
#[test]
fn test_text_before_tool_call() {
let mut parser = StreamingToolParser::new();
let content = r#"Let me read that file for you.
{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
let tools = parser.process_chunk(&chunk(content, false));
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].tool, "read_file");
// The leading text should be in the buffer
let text = parser.get_text_content();
assert!(text.contains("Let me read"), "Leading text should be preserved");
}
#[test]
fn test_text_before_and_after_tool_call() {
let mut parser = StreamingToolParser::new();
let content = r#"I'll check the file.
{"tool": "read_file", "args": {"file_path": "test.txt"}}
Done checking."#;
let tools = parser.process_chunk(&chunk(content, false));
assert_eq!(tools.len(), 1);
let text = parser.get_text_content();
assert!(text.contains("I'll check"), "Leading text should be preserved");
assert!(text.contains("Done checking"), "Trailing text should be preserved");
}
// =============================================================================
// Test: Incomplete tool calls at various truncation points
// =============================================================================
#[test]
fn test_incomplete_after_tool_key() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(r#"{"tool":"#, false));
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_incomplete_after_tool_name() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(r#"{"tool": "read_file""#, false));
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_incomplete_after_args_key() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args":"#, false));
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_incomplete_mid_args_object() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path":"#, false));
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_incomplete_mid_string_value() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(r#"{"tool": "shell", "args": {"command": "ls -la /very/long/path"#, false));
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_incomplete_missing_final_brace() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}"#, false));
assert!(parser.has_incomplete_tool_call());
}
#[test]
fn test_complete_tool_call_not_incomplete() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
assert!(!parser.has_incomplete_tool_call(), "Complete tool call should not be marked incomplete");
}
// =============================================================================
// Test: Parser reset behavior
// =============================================================================
#[test]
fn test_reset_clears_buffer() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk("Some content here", false));
assert!(!parser.get_text_content().is_empty());
parser.reset();
assert!(parser.get_text_content().is_empty(), "Buffer should be empty after reset");
}
#[test]
fn test_reset_clears_incomplete_state() {
let mut parser = StreamingToolParser::new();
// Create incomplete tool call
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"#, false));
assert!(parser.has_incomplete_tool_call());
parser.reset();
assert!(!parser.has_incomplete_tool_call(), "Incomplete state should be cleared after reset");
}
#[test]
fn test_reset_clears_unexecuted_state() {
let mut parser = StreamingToolParser::new();
// Create complete but "unexecuted" tool call
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
assert!(parser.has_unexecuted_tool_call());
parser.reset();
assert!(!parser.has_unexecuted_tool_call(), "Unexecuted state should be cleared after reset");
}
#[test]
fn test_reset_allows_new_tool_calls() {
let mut parser = StreamingToolParser::new();
// First tool call
let tools1 = parser.process_chunk(&chunk(
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
false
));
assert_eq!(tools1.len(), 1);
parser.reset();
// Second tool call after reset
let tools2 = parser.process_chunk(&chunk(
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
false
));
assert_eq!(tools2.len(), 1);
assert_eq!(tools2[0].tool, "shell");
}
// =============================================================================
// Test: Buffer management and edge cases
// =============================================================================
#[test]
fn test_streaming_chunks_accumulate() {
let mut parser = StreamingToolParser::new();
// Stream in chunks
parser.process_chunk(&chunk(r#"{"tool": "#, false));
parser.process_chunk(&chunk(r#""read_file", "#, false));
parser.process_chunk(&chunk(r#""args": {"file_path": "#, false));
parser.process_chunk(&chunk(r#""test.txt"}}"#, false));
// Should have accumulated the complete tool call
let text = parser.get_text_content();
assert!(text.contains(r#""tool""#));
assert!(text.contains(r#""read_file""#));
}
#[test]
fn test_finished_chunk_triggers_final_parse() {
let mut parser = StreamingToolParser::new();
// Incomplete chunks
parser.process_chunk(&chunk(r#"{"tool": "read_file", "#, false));
let tools1 = parser.process_chunk(&chunk(r#""args": {"file_path": "test.txt"}}"#, false));
// Tool should be detected before finished
assert!(!tools1.is_empty() || !parser.has_unexecuted_tool_call(),
"Tool should be detected during streaming or marked as unexecuted");
}
#[test]
fn test_empty_chunks_ignored() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk("", false));
parser.process_chunk(&chunk("", false));
assert!(parser.get_text_content().is_empty());
assert!(!parser.has_incomplete_tool_call());
assert!(!parser.has_unexecuted_tool_call());
}
#[test]
fn test_whitespace_only_chunks() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk(" \n\t ", false));
assert!(!parser.has_incomplete_tool_call());
assert!(!parser.has_unexecuted_tool_call());
}
#[test]
fn test_json_with_escaped_quotes() {
let mut parser = StreamingToolParser::new();
let content = r#"{"tool": "shell", "args": {"command": "echo \"hello\""}}"#;
let tools = parser.process_chunk(&chunk(content, false));
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].tool, "shell");
}
#[test]
fn test_json_with_escaped_backslashes() {
let mut parser = StreamingToolParser::new();
let content = r#"{"tool": "write_file", "args": {"file_path": "C:\\Users\\test.txt", "content": "data"}}"#;
let tools = parser.process_chunk(&chunk(content, false));
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].tool, "write_file");
}
#[test]
fn test_json_with_nested_braces_in_string() {
let mut parser = StreamingToolParser::new();
let content = r#"{"tool": "write_file", "args": {"content": "{\"nested\": {\"json\": true}}"}}"#;
let tools = parser.process_chunk(&chunk(content, false));
assert_eq!(tools.len(), 1);
assert_eq!(tools[0].tool, "write_file");
}
#[test]
fn test_text_buffer_length_tracking() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk("Hello", false));
assert_eq!(parser.text_buffer_len(), 5);
parser.process_chunk(&chunk(" World", false));
assert_eq!(parser.text_buffer_len(), 11);
parser.reset();
assert_eq!(parser.text_buffer_len(), 0);
}
#[test]
fn test_message_stopped_flag() {
let mut parser = StreamingToolParser::new();
parser.process_chunk(&chunk("Hello", false));
assert!(!parser.is_message_stopped());
parser.process_chunk(&chunk(" World", true));
assert!(parser.is_message_stopped());
parser.reset();
assert!(!parser.is_message_stopped());
}
// =============================================================================
// Test: Tool call pattern variations
// =============================================================================
#[test]
fn test_tool_pattern_no_spaces() {
let mut parser = StreamingToolParser::new();
let tools = parser.process_chunk(&chunk(
r#"{"tool":"read_file","args":{"file_path":"test.txt"}}"#,
false
));
assert_eq!(tools.len(), 1);
}
// =============================================================================
// Test: mark_tool_calls_consumed functionality
// =============================================================================
#[test]
fn test_mark_consumed_clears_unexecuted_state() {
let mut parser = StreamingToolParser::new();
// Add a complete tool call
parser.process_chunk(&chunk(
r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#,
false
));
// Should be detected as unexecuted
assert!(parser.has_unexecuted_tool_call());
// Mark as consumed
parser.mark_tool_calls_consumed();
// Should no longer be detected as unexecuted
assert!(!parser.has_unexecuted_tool_call(),
"After marking consumed, has_unexecuted_tool_call should return false");
}
#[test]
fn test_mark_consumed_allows_new_tool_detection() {
let mut parser = StreamingToolParser::new();
// First tool call
parser.process_chunk(&chunk(
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
false
));
parser.mark_tool_calls_consumed();
// Second tool call (without reset)
parser.process_chunk(&chunk(
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
false
));
// Should detect the new unexecuted tool call
assert!(parser.has_unexecuted_tool_call(),
"New tool call after consumed position should be detected");
}
#[test]
fn test_bare_brace_not_incomplete() {
let mut parser = StreamingToolParser::new();
// Just a bare opening brace - not a tool call pattern
parser.process_chunk(&chunk(r#"{""#, false));
// Should NOT be detected as incomplete because it doesn't match tool patterns
assert!(!parser.has_incomplete_tool_call(),
"Bare {{ should not be detected as incomplete tool call");
}
#[test]
fn test_duplicate_tool_call_pattern() {
let mut parser = StreamingToolParser::new();
// Simulate the problematic pattern: tool call, garbage, duplicate tool call
let content = concat!(
r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#,
"\n\n{\"\n\n",
r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#
);
let tools = parser.process_chunk(&chunk(content, false));
// Should detect at least one tool call
assert!(!tools.is_empty(), "Should detect at least one tool call");
// After processing, there should be an unexecuted tool call (the duplicate)
// because the parser only returns the first one it finds during streaming
assert!(parser.has_unexecuted_tool_call(),
"Should detect the duplicate as unexecuted");
}
#[test]
fn test_multiple_tool_calls_returned_on_finish() {
let mut parser = StreamingToolParser::new();
// Two complete tool calls in one chunk, with finished=true
let content = concat!(
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
"\nSome text\n",
r#"{"tool": "shell", "args": {"command": "ls"}}"#
);
// First, add content without finishing
parser.process_chunk(&chunk(content, false));
// Now finish the stream - should return ALL tool calls
let tools = parser.process_chunk(&chunk("", true));
// Should return both tool calls
assert_eq!(tools.len(), 2, "Should return both tool calls when stream finishes");
assert_eq!(tools[0].tool, "read_file");
assert_eq!(tools[1].tool, "shell");
}
#[test]
fn test_tool_pattern_extra_spaces() {
let mut parser = StreamingToolParser::new();
let tools = parser.process_chunk(&chunk(
r#"{ "tool" : "read_file" , "args" : { "file_path" : "test.txt" } }"#,
false
));
assert_eq!(tools.len(), 1);
}
#[test]
fn test_tool_pattern_with_newlines() {
let mut parser = StreamingToolParser::new();
// Note: The parser looks for specific patterns like {"tool": or { "tool":
// Multi-line JSON with newlines between { and "tool" won't match
// This is expected behavior - the pattern matching is intentionally strict
let _tools = parser.process_chunk(&chunk(
r#"{
"tool": "read_file",
"args": {
"file_path": "test.txt"
}
}"#,
false
));
// This won't be detected as a tool call due to newline after {
// The has_unexecuted_tool_call check also won't find it
// This is a known limitation of the pattern-based detection
}
// =============================================================================
// Test: Edge cases for has_message_like_keys validation
// =============================================================================
#[test]
fn test_normal_args_accepted() {
let mut parser = StreamingToolParser::new();
let tools = parser.process_chunk(&chunk(
r#"{"tool": "read_file", "args": {"file_path": "test.txt", "start": 0, "end": 100}}"#,
false
));
assert_eq!(tools.len(), 1);
}
#[test]
fn test_content_with_phrases_in_value_accepted() {
let mut parser = StreamingToolParser::new();
// Phrases like "I'll" in VALUES should be fine (only keys are checked)
let tools = parser.process_chunk(&chunk(
r#"{"tool": "write_file", "args": {"file_path": "test.txt", "content": "I'll help you with that. Let me explain."}}"#,
false
));
assert_eq!(tools.len(), 1);
}

View File

@@ -7,7 +7,7 @@ use std::path::{Path, PathBuf};
use std::process::Stdio; use std::process::Stdio;
use tokio::io::{AsyncBufReadExt, BufReader}; use tokio::io::{AsyncBufReadExt, BufReader};
use tokio::process::Command; use tokio::process::Command;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, warn};
use uuid::Uuid; use uuid::Uuid;
use crate::status::{FlockStatus, SegmentState, SegmentStatus}; use crate::status::{FlockStatus, SegmentState, SegmentStatus};
@@ -174,7 +174,7 @@ impl FlockMode {
/// Run flock mode /// Run flock mode
pub async fn run(&mut self) -> Result<()> { pub async fn run(&mut self) -> Result<()> {
info!( debug!(
"Starting flock mode with {} segments", "Starting flock mode with {} segments",
self.config.num_segments self.config.num_segments
); );
@@ -625,7 +625,7 @@ async fn run_segment(
status_file: PathBuf, status_file: PathBuf,
session_id: String, session_id: String,
) -> Result<SegmentStatus> { ) -> Result<SegmentStatus> {
info!( debug!(
"Starting segment {} in {}", "Starting segment {} in {}",
segment_id, segment_id,
segment_dir.display() segment_dir.display()

View File

@@ -3,7 +3,7 @@ use regex::Regex;
use std::io::Write; use std::io::Write;
use std::process::Command; use std::process::Command;
use tempfile::NamedTempFile; use tempfile::NamedTempFile;
use tracing::{debug, error, info}; use tracing::{debug, error};
/// Expand tilde (~) in a path to the user's home directory /// Expand tilde (~) in a path to the user's home directory
fn expand_tilde(path: &str) -> String { fn expand_tilde(path: &str) -> String {
@@ -72,7 +72,7 @@ impl CodeExecutor {
} }
for (language, code) in code_blocks { for (language, code) in code_blocks {
info!("Executing {} code", language); debug!("Executing {} code", language);
if show_code { if show_code {
results.push(format!("📋 Running {} code:", language)); results.push(format!("📋 Running {} code:", language));
@@ -459,7 +459,7 @@ pub fn is_cargo_llvm_cov_installed() -> Result<bool> {
/// Install llvm-tools-preview via rustup /// Install llvm-tools-preview via rustup
pub fn install_llvm_tools() -> Result<()> { pub fn install_llvm_tools() -> Result<()> {
info!("Installing llvm-tools-preview..."); debug!("Installing llvm-tools-preview...");
let output = Command::new("rustup") let output = Command::new("rustup")
.args(&["component", "add", "llvm-tools-preview"]) .args(&["component", "add", "llvm-tools-preview"])
.output()?; .output()?;
@@ -469,13 +469,13 @@ pub fn install_llvm_tools() -> Result<()> {
anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr); anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr);
} }
info!("✅ llvm-tools-preview installed successfully"); debug!("✅ llvm-tools-preview installed successfully");
Ok(()) Ok(())
} }
/// Install cargo-llvm-cov via cargo install /// Install cargo-llvm-cov via cargo install
pub fn install_cargo_llvm_cov() -> Result<()> { pub fn install_cargo_llvm_cov() -> Result<()> {
info!("Installing cargo-llvm-cov... (this may take a few minutes)"); debug!("Installing cargo-llvm-cov... (this may take a few minutes)");
let output = Command::new("cargo") let output = Command::new("cargo")
.args(&["install", "cargo-llvm-cov"]) .args(&["install", "cargo-llvm-cov"])
.output()?; .output()?;
@@ -485,7 +485,7 @@ pub fn install_cargo_llvm_cov() -> Result<()> {
anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr); anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr);
} }
info!("✅ cargo-llvm-cov installed successfully"); debug!("✅ cargo-llvm-cov installed successfully");
Ok(()) Ok(())
} }
@@ -496,20 +496,20 @@ pub fn ensure_coverage_tools_installed() -> Result<bool> {
// Check and install llvm-tools-preview // Check and install llvm-tools-preview
if !is_llvm_tools_installed()? { if !is_llvm_tools_installed()? {
info!("llvm-tools-preview not found, installing..."); debug!("llvm-tools-preview not found, installing...");
install_llvm_tools()?; install_llvm_tools()?;
already_installed = false; already_installed = false;
} else { } else {
info!("✅ llvm-tools-preview is already installed"); debug!("✅ llvm-tools-preview is already installed");
} }
// Check and install cargo-llvm-cov // Check and install cargo-llvm-cov
if !is_cargo_llvm_cov_installed()? { if !is_cargo_llvm_cov_installed()? {
info!("cargo-llvm-cov not found, installing..."); debug!("cargo-llvm-cov not found, installing...");
install_cargo_llvm_cov()?; install_cargo_llvm_cov()?;
already_installed = false; already_installed = false;
} else { } else {
info!("✅ cargo-llvm-cov is already installed"); debug!("✅ cargo-llvm-cov is already installed");
} }
Ok(already_installed) Ok(already_installed)

View File

@@ -328,7 +328,7 @@ impl AnthropicProvider {
tracing::debug!("create_request_body called: max_tokens={}, disable_thinking={}, thinking_budget_tokens={:?}", max_tokens, disable_thinking, self.thinking_budget_tokens); tracing::debug!("create_request_body called: max_tokens={}, disable_thinking={}, thinking_budget_tokens={:?}", max_tokens, disable_thinking, self.thinking_budget_tokens);
let thinking = if disable_thinking { let thinking = if disable_thinking {
tracing::info!( tracing::debug!(
"Thinking mode explicitly disabled for this request (max_tokens={})", "Thinking mode explicitly disabled for this request (max_tokens={})",
max_tokens max_tokens
); );

View File

@@ -64,7 +64,7 @@ use serde::{Deserialize, Serialize};
use std::time::Duration; use std::time::Duration;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio_stream::wrappers::ReceiverStream; use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, error, info, warn}; use tracing::{debug, error, warn};
use crate::{ use crate::{
CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message, CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
@@ -166,7 +166,7 @@ impl DatabricksProvider {
.build() .build()
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
info!( debug!(
"Initialized Databricks provider with model: {} on host: {}", "Initialized Databricks provider with model: {} on host: {}",
model, host model, host
); );
@@ -196,7 +196,7 @@ impl DatabricksProvider {
.build() .build()
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
info!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host); debug!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host);
Ok(Self { Ok(Self {
client, client,
@@ -220,7 +220,7 @@ impl DatabricksProvider {
.build() .build()
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
info!( debug!(
"Initialized Databricks provider with OAuth for model: {} on host: {}", "Initialized Databricks provider with OAuth for model: {} on host: {}",
model, host model, host
); );
@@ -249,7 +249,7 @@ impl DatabricksProvider {
.build() .build()
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?; .map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
info!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host); debug!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host);
Ok(Self { Ok(Self {
client, client,
@@ -857,7 +857,7 @@ impl LLMProvider for DatabricksProvider {
if status == reqwest::StatusCode::FORBIDDEN if status == reqwest::StatusCode::FORBIDDEN
&& (error_text.contains("Invalid Token") || error_text.contains("invalid_token")) && (error_text.contains("Invalid Token") || error_text.contains("invalid_token"))
{ {
info!("Received 403 Invalid Token error, attempting to refresh OAuth token"); debug!("Received 403 Invalid Token error, attempting to refresh OAuth token");
// Try to refresh the token if we're using OAuth // Try to refresh the token if we're using OAuth
if let DatabricksAuth::OAuth { .. } = &provider_clone.auth { if let DatabricksAuth::OAuth { .. } = &provider_clone.auth {
@@ -867,7 +867,7 @@ impl LLMProvider for DatabricksProvider {
// Try to get a new token (will attempt refresh or new OAuth flow) // Try to get a new token (will attempt refresh or new OAuth flow)
match provider_clone.auth.get_token().await { match provider_clone.auth.get_token().await {
Ok(_new_token) => { Ok(_new_token) => {
info!("Successfully refreshed OAuth token, retrying request"); debug!("Successfully refreshed OAuth token, retrying request");
// Retry the request with the new token // Retry the request with the new token
response = provider_clone response = provider_clone
@@ -1038,7 +1038,7 @@ impl LLMProvider for DatabricksProvider {
if status == reqwest::StatusCode::FORBIDDEN if status == reqwest::StatusCode::FORBIDDEN
&& (error_text.contains("Invalid Token") || error_text.contains("invalid_token")) && (error_text.contains("Invalid Token") || error_text.contains("invalid_token"))
{ {
info!("Received 403 Invalid Token error, attempting to refresh OAuth token"); debug!("Received 403 Invalid Token error, attempting to refresh OAuth token");
// Try to refresh the token if we're using OAuth // Try to refresh the token if we're using OAuth
if let DatabricksAuth::OAuth { .. } = &provider_clone.auth { if let DatabricksAuth::OAuth { .. } = &provider_clone.auth {
@@ -1048,7 +1048,7 @@ impl LLMProvider for DatabricksProvider {
// Try to get a new token (will attempt refresh or new OAuth flow) // Try to get a new token (will attempt refresh or new OAuth flow)
match provider_clone.auth.get_token().await { match provider_clone.auth.get_token().await {
Ok(_new_token) => { Ok(_new_token) => {
info!("Successfully refreshed OAuth token, retrying streaming request"); debug!("Successfully refreshed OAuth token, retrying streaming request");
// Retry the request with the new token // Retry the request with the new token
response = provider_clone response = provider_clone

View File

@@ -12,7 +12,7 @@ use std::sync::Arc;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio::sync::Mutex; use tokio::sync::Mutex;
use tokio_stream::wrappers::ReceiverStream; use tokio_stream::wrappers::ReceiverStream;
use tracing::{debug, error, info}; use tracing::{debug, error};
pub struct EmbeddedProvider { pub struct EmbeddedProvider {
session: Arc<Mutex<LlamaSession>>, session: Arc<Mutex<LlamaSession>>,
@@ -32,7 +32,7 @@ impl EmbeddedProvider {
gpu_layers: Option<u32>, gpu_layers: Option<u32>,
threads: Option<u32>, threads: Option<u32>,
) -> Result<Self> { ) -> Result<Self> {
info!("Loading embedded model from: {}", model_path); debug!("Loading embedded model from: {}", model_path);
// Expand tilde in path // Expand tilde in path
let expanded_path = shellexpand::tilde(&model_path); let expanded_path = shellexpand::tilde(&model_path);
@@ -41,7 +41,7 @@ impl EmbeddedProvider {
// If model doesn't exist and it's the default Qwen model, offer to download it // If model doesn't exist and it's the default Qwen model, offer to download it
if !model_path_buf.exists() { if !model_path_buf.exists() {
if model_path.contains("qwen2.5-7b-instruct-q3_k_m.gguf") { if model_path.contains("qwen2.5-7b-instruct-q3_k_m.gguf") {
info!("Model file not found. Attempting to download Qwen 2.5 7B model..."); debug!("Model file not found. Attempting to download Qwen 2.5 7B model...");
Self::download_qwen_model(&model_path_buf)?; Self::download_qwen_model(&model_path_buf)?;
} else { } else {
anyhow::bail!("Model file not found: {}", model_path_buf.display()); anyhow::bail!("Model file not found: {}", model_path_buf.display());
@@ -55,14 +55,14 @@ impl EmbeddedProvider {
if let Some(gpu_layers) = gpu_layers { if let Some(gpu_layers) = gpu_layers {
params.n_gpu_layers = gpu_layers; params.n_gpu_layers = gpu_layers;
info!("Using {} GPU layers", gpu_layers); debug!("Using {} GPU layers", gpu_layers);
} }
let context_size = context_length.unwrap_or(4096); let context_size = context_length.unwrap_or(4096);
info!("Using context length: {}", context_size); debug!("Using context length: {}", context_size);
// Load the model // Load the model
info!("Loading model..."); debug!("Loading model...");
let model = LlamaModel::load_from_file(model_path, params) let model = LlamaModel::load_from_file(model_path, params)
.map_err(|e| anyhow::anyhow!("Failed to load model: {}", e))?; .map_err(|e| anyhow::anyhow!("Failed to load model: {}", e))?;
@@ -79,7 +79,7 @@ impl EmbeddedProvider {
.create_session(session_params) .create_session(session_params)
.map_err(|e| anyhow::anyhow!("Failed to create session: {}", e))?; .map_err(|e| anyhow::anyhow!("Failed to create session: {}", e))?;
info!("Successfully loaded {} model", model_type); debug!("Successfully loaded {} model", model_type);
Ok(Self { Ok(Self {
session: Arc::new(Mutex::new(session)), session: Arc::new(Mutex::new(session)),
@@ -330,7 +330,7 @@ impl EmbeddedProvider {
Ok(inner_result) => match inner_result { Ok(inner_result) => match inner_result {
Ok(task_result) => match task_result { Ok(task_result) => match task_result {
Ok((text, token_count)) => { Ok((text, token_count)) => {
info!( debug!(
"Completed generation: {} tokens (dynamic limit was {})", "Completed generation: {} tokens (dynamic limit was {})",
token_count, dynamic_max_tokens token_count, dynamic_max_tokens
); );
@@ -448,9 +448,9 @@ impl EmbeddedProvider {
fs::create_dir_all(parent)?; fs::create_dir_all(parent)?;
} }
info!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)..."); debug!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)...");
info!("This is a one-time download that may take several minutes depending on your connection."); debug!("This is a one-time download that may take several minutes depending on your connection.");
info!("Downloading to: {}", model_path.display()); debug!("Downloading to: {}", model_path.display());
// Use curl with progress bar for download // Use curl with progress bar for download
let output = Command::new("curl") let output = Command::new("curl")
@@ -497,7 +497,7 @@ impl EmbeddedProvider {
); );
} }
info!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb); debug!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb);
Ok(()) Ok(())
} }
} }

View File

@@ -392,7 +392,7 @@ pub async fn get_oauth_token_async(
if let Err(e) = token_cache.save_token(&new_token) { if let Err(e) = token_cache.save_token(&new_token) {
tracing::warn!("Failed to save refreshed token: {}", e); tracing::warn!("Failed to save refreshed token: {}", e);
} }
tracing::info!("Successfully refreshed token"); tracing::debug!("Successfully refreshed token");
return Ok(new_token.access_token); return Ok(new_token.access_token);
} }
Err(e) => { Err(e) => {

View File

@@ -1,24 +0,0 @@
#!/bin/bash
set -e
# Clean logs first
rm -rf ~/RustroverProjects/g3/logs/*.log ~/RustroverProjects/g3/logs/*.txt 2>/dev/null || true
# Create test requirements file
mkdir -p /tmp/g3-test-planning/g3-plan
cat > /tmp/g3-test-planning/g3-plan/new_requirements.md <<'EOF'
Simple test task: List all .rs files in the src directory.
EOF
# Initialize git repo for test (planning mode requires git)
cd /tmp/g3-test-planning
if [ ! -d .git ]; then
git init
git config user.name "Test User"
git config user.email "test@example.com"
git add .
git commit -m "Initial commit" || true
fi
echo "Test environment ready at /tmp/g3-test-planning"
echo "Run: cd /tmp && ~/RustroverProjects/g3/target/release/g3 --planning --codepath /tmp/g3-test-planning --no-git"