Merge branch 'dhanji/fix-auto-continue': Fix auto-continue and duplicate detection bugs
This commit is contained in:
@@ -267,7 +267,7 @@ use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::process::exit;
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info};
|
||||
use tracing::{debug, error};
|
||||
|
||||
use g3_core::error_handling::{classify_error, ErrorType, RecoverableError};
|
||||
mod simple_output;
|
||||
@@ -2693,7 +2693,7 @@ Remember: Be clear in your review and concise in your feedback. APPROVE iff the
|
||||
extract_coach_feedback_from_logs(&coach_result, &coach_agent, &output)?;
|
||||
|
||||
// Log the size of the feedback for debugging
|
||||
info!(
|
||||
debug!(
|
||||
"Coach feedback extracted: {} characters (from {} total)",
|
||||
coach_feedback_text.len(),
|
||||
coach_result.response.len()
|
||||
|
||||
@@ -68,6 +68,18 @@ fn main() {
|
||||
dylib_dst.display()
|
||||
);
|
||||
|
||||
// Re-sign the dylib with ad-hoc signature to fix code signing issues on Apple Silicon
|
||||
// This is necessary because incremental compilation can invalidate signatures
|
||||
let codesign_status = Command::new("codesign")
|
||||
.args(&["-f", "-s", "-", dylib_dst.to_str().unwrap()])
|
||||
.status();
|
||||
|
||||
if let Ok(status) = codesign_status {
|
||||
if !status.success() {
|
||||
println!("cargo:warning=Failed to codesign libVisionBridge.dylib (non-fatal)");
|
||||
}
|
||||
}
|
||||
|
||||
// Add rpath so the dylib can be found at runtime
|
||||
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
|
||||
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
||||
|
||||
@@ -24,7 +24,7 @@ impl MacOSController {
|
||||
pub fn new() -> Result<Self> {
|
||||
let ocr = Box::new(DefaultOCR::new()?);
|
||||
let ocr_name = ocr.name().to_string();
|
||||
tracing::info!("Initialized macOS controller with OCR engine: {}", ocr_name);
|
||||
tracing::debug!("Initialized macOS controller with OCR engine: {}", ocr_name);
|
||||
Ok(Self {
|
||||
ocr_engine: ocr,
|
||||
ocr_name,
|
||||
@@ -155,7 +155,7 @@ impl ComputerController for MacOSController {
|
||||
// 1. At layer 0 (normal windows, not menu bar)
|
||||
// 2. Have real bounds (width and height >= 100)
|
||||
if layer == 0 && has_real_bounds {
|
||||
tracing::info!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer);
|
||||
tracing::debug!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer);
|
||||
found_window_id = Some((id as u32, owner.clone()));
|
||||
break;
|
||||
} else {
|
||||
@@ -178,7 +178,7 @@ impl ComputerController for MacOSController {
|
||||
let (cg_window_id, matched_owner) = cg_window_id.ok_or_else(|| {
|
||||
anyhow::anyhow!("Could not find window for application '{}'. Use list_windows to see available windows.", app_name)
|
||||
})?;
|
||||
tracing::info!(
|
||||
tracing::debug!(
|
||||
"Taking screenshot of window ID {} for app '{}'",
|
||||
cg_window_id,
|
||||
matched_owner
|
||||
@@ -468,7 +468,7 @@ impl MacOSController {
|
||||
|
||||
// Only accept windows with real bounds (>= 100x100 pixels)
|
||||
if w >= 100 && h >= 100 {
|
||||
tracing::info!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
|
||||
tracing::debug!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
|
||||
return Ok((x, y, w, h));
|
||||
} else {
|
||||
tracing::debug!(
|
||||
|
||||
@@ -3,7 +3,7 @@ use crate::process::ProcessController;
|
||||
use axum::{extract::State, http::StatusCode, Json};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tracing::{error, info};
|
||||
use tracing::{debug, error};
|
||||
|
||||
pub type ControllerState = Arc<Mutex<ProcessController>>;
|
||||
|
||||
@@ -22,7 +22,7 @@ pub async fn kill_instance(
|
||||
|
||||
match controller.kill_process(pid) {
|
||||
Ok(_) => {
|
||||
info!("Successfully killed process {}", pid);
|
||||
debug!("Successfully killed process {}", pid);
|
||||
Ok(Json(serde_json::json!({
|
||||
"status": "terminating"
|
||||
})))
|
||||
@@ -38,7 +38,7 @@ pub async fn restart_instance(
|
||||
State(controller): State<ControllerState>,
|
||||
axum::extract::Path(id): axum::extract::Path<String>,
|
||||
) -> Result<Json<LaunchResponse>, StatusCode> {
|
||||
info!("Restarting instance: {}", id);
|
||||
debug!("Restarting instance: {}", id);
|
||||
|
||||
// Extract PID from instance ID (format: pid_timestamp)
|
||||
let pid: u32 = id
|
||||
@@ -81,7 +81,7 @@ pub async fn launch_instance(
|
||||
State(controller): State<ControllerState>,
|
||||
Json(request): Json<LaunchRequest>,
|
||||
) -> Result<Json<LaunchResponse>, (StatusCode, Json<serde_json::Value>)> {
|
||||
info!("Launching new g3 instance: {:?}", request);
|
||||
debug!("Launching new g3 instance: {:?}", request);
|
||||
|
||||
// Validate binary path if provided
|
||||
if let Some(ref binary_path) = request.g3_binary_path {
|
||||
@@ -149,7 +149,7 @@ pub async fn launch_instance(
|
||||
) {
|
||||
Ok(pid) => {
|
||||
let id = format!("{}_{}", pid, chrono::Utc::now().timestamp());
|
||||
info!("Successfully launched g3 instance with PID {}", pid);
|
||||
debug!("Successfully launched g3 instance with PID {}", pid);
|
||||
Ok(Json(LaunchResponse {
|
||||
id,
|
||||
status: "starting".to_string(),
|
||||
|
||||
@@ -3,7 +3,7 @@ use axum::{http::StatusCode, Json};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
use std::path::PathBuf;
|
||||
use tracing::{error, info};
|
||||
use tracing::{debug, error};
|
||||
|
||||
pub async fn get_state() -> Result<Json<ConsoleState>, StatusCode> {
|
||||
let state = ConsoleState::load();
|
||||
@@ -15,7 +15,7 @@ pub async fn save_state(
|
||||
) -> Result<Json<serde_json::Value>, StatusCode> {
|
||||
match state.save() {
|
||||
Ok(_) => {
|
||||
info!("Console state saved successfully");
|
||||
debug!("Console state saved successfully");
|
||||
Ok(Json(serde_json::json!({
|
||||
"status": "saved"
|
||||
})))
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use tracing::info;
|
||||
use tracing::debug;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ConsoleState {
|
||||
@@ -42,7 +42,7 @@ impl ConsoleState {
|
||||
|
||||
pub fn save(&self) -> anyhow::Result<()> {
|
||||
let config_path = Self::config_path();
|
||||
info!("Saving console state to: {:?}", config_path);
|
||||
debug!("Saving console state to: {:?}", config_path);
|
||||
|
||||
// Create parent directory if it doesn't exist
|
||||
if let Some(parent) = config_path.parent() {
|
||||
@@ -51,7 +51,7 @@ impl ConsoleState {
|
||||
|
||||
let content = serde_json::to_string_pretty(self)?;
|
||||
fs::write(&config_path, content)?;
|
||||
info!("Console state saved successfully to: {:?}", config_path);
|
||||
debug!("Console state saved successfully to: {:?}", config_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ use std::sync::Arc;
|
||||
use tokio::sync::Mutex;
|
||||
use tower_http::cors::CorsLayer;
|
||||
use tower_http::services::ServeDir;
|
||||
use tracing::{info, Level};
|
||||
use tracing::{debug, Level};
|
||||
use tracing_subscriber;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
@@ -84,12 +84,12 @@ async fn main() -> anyhow::Result<()> {
|
||||
.layer(CorsLayer::permissive());
|
||||
|
||||
let addr = format!("{}:{}", args.host, args.port);
|
||||
info!("Starting g3-console on http://{}", addr);
|
||||
debug!("Starting g3-console on http://{}", addr);
|
||||
|
||||
// Auto-open browser if requested
|
||||
if args.open {
|
||||
let url = format!("http://{}", addr);
|
||||
info!("Opening browser to {}", url);
|
||||
debug!("Opening browser to {}", url);
|
||||
let _ = open::that(&url);
|
||||
}
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ use std::path::PathBuf;
|
||||
use std::process::{Command, Stdio};
|
||||
use std::sync::Mutex;
|
||||
use sysinfo::{Pid, Process, Signal, System};
|
||||
use tracing::{debug, info};
|
||||
use tracing::debug;
|
||||
|
||||
pub struct ProcessController {
|
||||
system: System,
|
||||
@@ -26,7 +26,7 @@ impl ProcessController {
|
||||
self.system.refresh_processes();
|
||||
|
||||
if let Some(process) = self.system.process(sysinfo_pid) {
|
||||
info!("Killing process {} ({})", pid, process.name());
|
||||
debug!("Killing process {} ({})", pid, process.name());
|
||||
|
||||
// Try SIGTERM first
|
||||
if process.kill_with(Signal::Term).is_some() {
|
||||
@@ -107,7 +107,7 @@ impl ProcessController {
|
||||
});
|
||||
}
|
||||
|
||||
info!("Launching g3: {:?}", cmd);
|
||||
debug!("Launching g3: {:?}", cmd);
|
||||
|
||||
// Spawn and wait for the intermediate process to exit
|
||||
let mut child = cmd.spawn().context("Failed to spawn g3 process")?;
|
||||
@@ -120,7 +120,7 @@ impl ProcessController {
|
||||
|
||||
// The actual g3 process is now running as orphan
|
||||
// We need to scan for it by matching workspace and recent start time
|
||||
info!(
|
||||
debug!(
|
||||
"Scanning for newly launched g3 process in workspace: {}",
|
||||
workspace
|
||||
);
|
||||
@@ -171,7 +171,7 @@ impl ProcessController {
|
||||
found
|
||||
} else {
|
||||
// If we couldn't find it, try one more refresh after a longer delay
|
||||
info!("Process not found on first scan, trying again...");
|
||||
debug!("Process not found on first scan, trying again...");
|
||||
std::thread::sleep(std::time::Duration::from_millis(2000));
|
||||
self.system.refresh_processes();
|
||||
|
||||
@@ -204,7 +204,7 @@ impl ProcessController {
|
||||
retry_found.unwrap_or(intermediate_pid)
|
||||
};
|
||||
|
||||
info!("Launched g3 process with PID {}", pid);
|
||||
debug!("Launched g3 process with PID {}", pid);
|
||||
|
||||
// Store launch params for restart
|
||||
let params = LaunchParams {
|
||||
|
||||
@@ -3,7 +3,7 @@ use anyhow::Result;
|
||||
use chrono::{DateTime, Utc};
|
||||
use std::path::PathBuf;
|
||||
use sysinfo::{Pid, Process, System};
|
||||
use tracing::{debug, info, warn};
|
||||
use tracing::{debug, warn};
|
||||
|
||||
pub struct ProcessDetector {
|
||||
system: System,
|
||||
@@ -17,7 +17,7 @@ impl ProcessDetector {
|
||||
}
|
||||
|
||||
pub fn detect_instances(&mut self) -> Result<Vec<Instance>> {
|
||||
info!("Scanning for g3 processes...");
|
||||
debug!("Scanning for g3 processes...");
|
||||
// Refresh all processes to ensure we catch newly started ones
|
||||
// Using refresh_all() instead of just refresh_processes() to ensure
|
||||
// we get complete information about new processes
|
||||
@@ -37,7 +37,7 @@ impl ProcessDetector {
|
||||
}
|
||||
}
|
||||
|
||||
info!("Detected {} g3 instances", instances.len());
|
||||
debug!("Detected {} g3 instances", instances.len());
|
||||
Ok(instances)
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tracing::{error, info, warn};
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
/// Base delay for exponential backoff (in milliseconds)
|
||||
const BASE_RETRY_DELAY_MS: u64 = 1000;
|
||||
@@ -149,7 +149,7 @@ impl ErrorContext {
|
||||
if let Err(e) = std::fs::write(&filename, json_content) {
|
||||
error!("Failed to save error context to {:?}: {}", &filename, e);
|
||||
} else {
|
||||
info!("Error details saved to: {:?}", &filename);
|
||||
debug!("Error details saved to: {:?}", &filename);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -328,7 +328,7 @@ where
|
||||
match operation().await {
|
||||
Ok(result) => {
|
||||
if attempt > 1 {
|
||||
info!(
|
||||
debug!(
|
||||
"Operation '{}' succeeded after {} attempts",
|
||||
operation_name, attempt
|
||||
);
|
||||
@@ -357,7 +357,7 @@ where
|
||||
|
||||
// Special handling for token limit errors
|
||||
if matches!(recoverable_type, RecoverableError::TokenLimit) {
|
||||
info!("Token limit error detected. Consider triggering summarization.");
|
||||
debug!("Token limit error detected. Consider triggering summarization.");
|
||||
}
|
||||
|
||||
tokio::time::sleep(delay).await;
|
||||
|
||||
@@ -12,7 +12,7 @@ use crate::{logs_dir, Agent, TaskResult};
|
||||
use crate::ui_writer::UiWriter;
|
||||
use serde_json::Value;
|
||||
use std::path::PathBuf;
|
||||
use tracing::{debug, info, warn};
|
||||
use tracing::{debug, warn};
|
||||
|
||||
/// Result of feedback extraction with source information
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -103,21 +103,21 @@ where
|
||||
// Try session log first (most reliable)
|
||||
if let Some(session_id) = agent.get_session_id() {
|
||||
if let Some(feedback) = try_extract_from_session_log(&session_id, config) {
|
||||
info!("Extracted coach feedback from session log: {} chars", feedback.len());
|
||||
debug!("Extracted coach feedback from session log: {} chars", feedback.len());
|
||||
return ExtractedFeedback::new(feedback, FeedbackSource::SessionLog);
|
||||
}
|
||||
}
|
||||
|
||||
// Try native tool call JSON parsing
|
||||
if let Some(feedback) = try_extract_from_native_tool_call(&coach_result.response) {
|
||||
info!("Extracted coach feedback from native tool call: {} chars", feedback.len());
|
||||
debug!("Extracted coach feedback from native tool call: {} chars", feedback.len());
|
||||
return ExtractedFeedback::new(feedback, FeedbackSource::NativeToolCall);
|
||||
}
|
||||
|
||||
// Try conversation history
|
||||
if let Some(session_id) = agent.get_session_id() {
|
||||
if let Some(feedback) = try_extract_from_conversation_history(&session_id, config) {
|
||||
info!("Extracted coach feedback from conversation history: {} chars", feedback.len());
|
||||
debug!("Extracted coach feedback from conversation history: {} chars", feedback.len());
|
||||
return ExtractedFeedback::new(feedback, FeedbackSource::ConversationHistory);
|
||||
}
|
||||
}
|
||||
@@ -125,7 +125,7 @@ where
|
||||
// Try TaskResult parsing
|
||||
let extracted = coach_result.extract_final_output();
|
||||
if !extracted.is_empty() {
|
||||
info!("Extracted coach feedback from task result: {} chars", extracted.len());
|
||||
debug!("Extracted coach feedback from task result: {} chars", extracted.len());
|
||||
return ExtractedFeedback::new(extracted, FeedbackSource::TaskResultResponse);
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ use serde_json::json;
|
||||
use std::io::Write;
|
||||
use std::time::{Duration, Instant};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
/// Get the path to the todo.g3.md file.
|
||||
///
|
||||
@@ -246,13 +246,23 @@ pub enum StreamState {
|
||||
Resuming,
|
||||
}
|
||||
|
||||
/// Patterns used to detect JSON tool calls in text
|
||||
/// These cover common whitespace variations in JSON formatting
|
||||
const TOOL_CALL_PATTERNS: [&str; 4] = [
|
||||
r#"{"tool":"#,
|
||||
r#"{ "tool":"#,
|
||||
r#"{"tool" :"#,
|
||||
r#"{ "tool" :"#,
|
||||
];
|
||||
|
||||
/// Modern streaming tool parser that properly handles native tool calls and SSE chunks
|
||||
#[derive(Debug)]
|
||||
pub struct StreamingToolParser {
|
||||
/// Buffer for accumulating text content
|
||||
text_buffer: String,
|
||||
/// Buffer for accumulating native tool calls
|
||||
native_tool_calls: Vec<g3_providers::ToolCall>,
|
||||
/// Position in text_buffer up to which tool calls have been consumed/executed
|
||||
/// This prevents has_unexecuted_tool_call() from returning true for already-executed tools
|
||||
last_consumed_position: usize,
|
||||
/// Whether we've received a message_stop event
|
||||
message_stopped: bool,
|
||||
/// Whether we're currently in a JSON tool call (for fallback parsing)
|
||||
@@ -271,13 +281,58 @@ impl StreamingToolParser {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
text_buffer: String::new(),
|
||||
native_tool_calls: Vec::new(),
|
||||
last_consumed_position: 0,
|
||||
message_stopped: false,
|
||||
in_json_tool_call: false,
|
||||
json_tool_start: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the starting position of the last tool call pattern in the given text
|
||||
/// Returns None if no tool call pattern is found
|
||||
fn find_last_tool_call_start(text: &str) -> Option<usize> {
|
||||
let mut best_start: Option<usize> = None;
|
||||
for pattern in &TOOL_CALL_PATTERNS {
|
||||
if let Some(pos) = text.rfind(pattern) {
|
||||
if best_start.map_or(true, |best| pos > best) {
|
||||
best_start = Some(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
best_start
|
||||
}
|
||||
|
||||
/// Find the starting position of the FIRST tool call pattern in the given text
|
||||
/// Returns None if no tool call pattern is found
|
||||
fn find_first_tool_call_start(text: &str) -> Option<usize> {
|
||||
let mut best_start: Option<usize> = None;
|
||||
for pattern in &TOOL_CALL_PATTERNS {
|
||||
if let Some(pos) = text.find(pattern) {
|
||||
if best_start.map_or(true, |best| pos < best) {
|
||||
best_start = Some(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
best_start
|
||||
}
|
||||
|
||||
/// Validate that tool call args don't contain message-like content
|
||||
/// This detects malformed tool calls where agent messages got mixed into args
|
||||
fn has_message_like_keys(args: &serde_json::Map<String, serde_json::Value>) -> bool {
|
||||
args.keys().any(|key| {
|
||||
key.len() > 100
|
||||
|| key.contains('\n')
|
||||
|| key.contains("I'll")
|
||||
|| key.contains("Let me")
|
||||
|| key.contains("Here's")
|
||||
|| key.contains("I can")
|
||||
|| key.contains("I need")
|
||||
|| key.contains("First")
|
||||
|| key.contains("Now")
|
||||
|| key.contains("The ")
|
||||
})
|
||||
}
|
||||
|
||||
/// Process a streaming chunk and return completed tool calls if any
|
||||
pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec<ToolCall> {
|
||||
let mut completed_tools = Vec::new();
|
||||
@@ -308,10 +363,12 @@ impl StreamingToolParser {
|
||||
self.message_stopped = true;
|
||||
debug!("Message finished, processing accumulated tool calls");
|
||||
|
||||
// When stream finishes, do a final check for JSON tool calls in the accumulated buffer
|
||||
// When stream finishes, find ALL JSON tool calls in the accumulated buffer
|
||||
if completed_tools.is_empty() && !self.text_buffer.is_empty() {
|
||||
if let Some(json_tool) = self.try_parse_json_tool_call_from_buffer() {
|
||||
completed_tools.push(json_tool);
|
||||
let all_tools = self.try_parse_all_json_tool_calls_from_buffer();
|
||||
if !all_tools.is_empty() {
|
||||
debug!("Found {} JSON tool calls in buffer at stream end", all_tools.len());
|
||||
completed_tools.extend(all_tools);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -328,26 +385,12 @@ impl StreamingToolParser {
|
||||
|
||||
/// Fallback method to parse JSON tool calls from text content
|
||||
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
|
||||
// Look for JSON tool call patterns
|
||||
let patterns = [
|
||||
r#"{"tool":"#,
|
||||
r#"{ "tool":"#,
|
||||
r#"{"tool" :"#,
|
||||
r#"{ "tool" :"#,
|
||||
];
|
||||
|
||||
// If we're not currently in a JSON tool call, look for the start
|
||||
if !self.in_json_tool_call {
|
||||
for pattern in &patterns {
|
||||
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
||||
debug!(
|
||||
"Found JSON tool call pattern '{}' at position {}",
|
||||
pattern, pos
|
||||
);
|
||||
self.in_json_tool_call = true;
|
||||
self.json_tool_start = Some(pos);
|
||||
break;
|
||||
}
|
||||
if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) {
|
||||
debug!("Found JSON tool call pattern at position {}", pos);
|
||||
self.in_json_tool_call = true;
|
||||
self.json_tool_start = Some(pos);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -356,83 +399,34 @@ impl StreamingToolParser {
|
||||
if let Some(start_pos) = self.json_tool_start {
|
||||
let json_text = &self.text_buffer[start_pos..];
|
||||
|
||||
// Try to find a complete JSON object
|
||||
let mut brace_count = 0;
|
||||
let mut in_string = false;
|
||||
let mut escape_next = false;
|
||||
// Try to find a complete JSON object using the shared helper
|
||||
if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
|
||||
let json_str = &json_text[..=end_pos];
|
||||
debug!("Attempting to parse JSON tool call: {}", json_str);
|
||||
|
||||
for (i, ch) in json_text.char_indices() {
|
||||
if escape_next {
|
||||
escape_next = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
'\\' => escape_next = true,
|
||||
'"' if !escape_next => in_string = !in_string,
|
||||
'{' if !in_string => brace_count += 1,
|
||||
'}' if !in_string => {
|
||||
brace_count -= 1;
|
||||
if brace_count == 0 {
|
||||
// Found complete JSON object
|
||||
let json_str = &json_text[..=i];
|
||||
debug!("Attempting to parse JSON tool call: {}", json_str);
|
||||
|
||||
// First try to parse as a ToolCall
|
||||
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
||||
// Validate that this is actually a proper tool call
|
||||
// The args should be a JSON object with reasonable keys
|
||||
if let Some(args_obj) = tool_call.args.as_object() {
|
||||
// Check if any key looks like it contains agent message content
|
||||
// This would indicate a malformed tool call where the message
|
||||
// got mixed into the args
|
||||
let has_message_like_key = args_obj.keys().any(|key| {
|
||||
key.len() > 100
|
||||
|| key.contains('\n')
|
||||
|| key.contains("I'll")
|
||||
|| key.contains("Let me")
|
||||
|| key.contains("Here's")
|
||||
|| key.contains("I can")
|
||||
|| key.contains("I need")
|
||||
|| key.contains("First")
|
||||
|| key.contains("Now")
|
||||
|| key.contains("The ")
|
||||
});
|
||||
|
||||
if has_message_like_key {
|
||||
debug!("Detected malformed tool call with message-like keys, skipping");
|
||||
// This looks like a malformed tool call, skip it
|
||||
self.in_json_tool_call = false;
|
||||
self.json_tool_start = None;
|
||||
break;
|
||||
}
|
||||
|
||||
// Also check if the values look reasonable
|
||||
// Tool arguments should typically be file paths, commands, or content
|
||||
// Not entire agent messages
|
||||
|
||||
debug!(
|
||||
"Successfully parsed valid JSON tool call: {:?}",
|
||||
tool_call
|
||||
);
|
||||
// Reset JSON parsing state
|
||||
self.in_json_tool_call = false;
|
||||
self.json_tool_start = None;
|
||||
return Some(tool_call);
|
||||
}
|
||||
// If args is not an object, skip this as invalid
|
||||
debug!("Tool call args is not an object, skipping");
|
||||
} else {
|
||||
debug!("Failed to parse JSON tool call: {}", json_str);
|
||||
// Reset and continue looking
|
||||
self.in_json_tool_call = false;
|
||||
self.json_tool_start = None;
|
||||
}
|
||||
break;
|
||||
// Try to parse as a ToolCall
|
||||
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
||||
// Validate that args is an object with reasonable keys
|
||||
if let Some(args_obj) = tool_call.args.as_object() {
|
||||
if Self::has_message_like_keys(args_obj) {
|
||||
debug!("Detected malformed tool call with message-like keys, skipping");
|
||||
self.in_json_tool_call = false;
|
||||
self.json_tool_start = None;
|
||||
return None;
|
||||
}
|
||||
|
||||
debug!("Successfully parsed valid JSON tool call: {:?}", tool_call);
|
||||
self.in_json_tool_call = false;
|
||||
self.json_tool_start = None;
|
||||
return Some(tool_call);
|
||||
}
|
||||
_ => {}
|
||||
debug!("Tool call args is not an object, skipping");
|
||||
} else {
|
||||
debug!("Failed to parse JSON tool call: {}", json_str);
|
||||
}
|
||||
// Reset and continue looking
|
||||
self.in_json_tool_call = false;
|
||||
self.json_tool_start = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -440,76 +434,45 @@ impl StreamingToolParser {
|
||||
None
|
||||
}
|
||||
|
||||
/// Parse JSON tool call from the accumulated text buffer (called when stream finishes)
|
||||
/// This is similar to try_parse_json_tool_call but operates on the full buffer
|
||||
fn try_parse_json_tool_call_from_buffer(&mut self) -> Option<ToolCall> {
|
||||
// Look for JSON tool call patterns in the accumulated buffer
|
||||
let patterns = [
|
||||
r#"{"tool":"#,
|
||||
r#"{ "tool":"#,
|
||||
r#"{"tool" :"#,
|
||||
r#"{ "tool" :"#,
|
||||
];
|
||||
|
||||
// Find the last occurrence of a tool call pattern (most likely to be complete)
|
||||
let mut best_start: Option<usize> = None;
|
||||
for pattern in &patterns {
|
||||
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
||||
if best_start.map_or(true, |best| pos > best) {
|
||||
best_start = Some(pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(start_pos) = best_start {
|
||||
let json_text = &self.text_buffer[start_pos..];
|
||||
debug!("Found potential JSON tool call at position {}: {:?}", start_pos,
|
||||
if json_text.len() > 200 { &json_text[..200] } else { json_text });
|
||||
|
||||
// Try to find a complete JSON object
|
||||
let mut brace_count = 0;
|
||||
let mut in_string = false;
|
||||
let mut escape_next = false;
|
||||
|
||||
for (i, ch) in json_text.char_indices() {
|
||||
if escape_next {
|
||||
escape_next = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
'\\' => escape_next = true,
|
||||
'"' if !escape_next => in_string = !in_string,
|
||||
'{' if !in_string => brace_count += 1,
|
||||
'}' if !in_string => {
|
||||
brace_count -= 1;
|
||||
if brace_count == 0 {
|
||||
// Found complete JSON object
|
||||
let json_str = &json_text[..=i];
|
||||
debug!("Attempting to parse JSON tool call from buffer: {}", json_str);
|
||||
|
||||
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
||||
if let Some(args_obj) = tool_call.args.as_object() {
|
||||
// Validate - check for message-like keys
|
||||
let has_message_like_key = args_obj.keys().any(|key| {
|
||||
key.len() > 100 || key.contains('\n')
|
||||
});
|
||||
|
||||
if !has_message_like_key {
|
||||
debug!("Successfully parsed JSON tool call from buffer: {:?}", tool_call);
|
||||
return Some(tool_call);
|
||||
}
|
||||
}
|
||||
/// Parse ALL JSON tool calls from the accumulated text buffer
|
||||
/// This finds all complete tool calls, not just the last one
|
||||
fn try_parse_all_json_tool_calls_from_buffer(&self) -> Vec<ToolCall> {
|
||||
let mut tool_calls = Vec::new();
|
||||
let mut search_start = 0;
|
||||
|
||||
while search_start < self.text_buffer.len() {
|
||||
let search_text = &self.text_buffer[search_start..];
|
||||
|
||||
// Find the next tool call pattern
|
||||
if let Some(relative_pos) = Self::find_first_tool_call_start(search_text) {
|
||||
let abs_start = search_start + relative_pos;
|
||||
let json_text = &self.text_buffer[abs_start..];
|
||||
|
||||
// Try to find a complete JSON object
|
||||
if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
|
||||
let json_str = &json_text[..=end_pos];
|
||||
|
||||
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
||||
if let Some(args_obj) = tool_call.args.as_object() {
|
||||
if !Self::has_message_like_keys(args_obj) {
|
||||
debug!("Found tool call at position {}: {:?}", abs_start, tool_call.tool);
|
||||
tool_calls.push(tool_call);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
// Move past this tool call
|
||||
search_start = abs_start + end_pos + 1;
|
||||
} else {
|
||||
// Incomplete JSON, stop searching
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// No more tool call patterns found
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
|
||||
tool_calls
|
||||
}
|
||||
|
||||
/// Get the accumulated text content (excluding tool calls)
|
||||
@@ -531,10 +494,83 @@ impl StreamingToolParser {
|
||||
self.message_stopped
|
||||
}
|
||||
|
||||
/// Check if the text buffer contains an incomplete JSON tool call
|
||||
/// This detects cases where the LLM started emitting a tool call but the stream ended
|
||||
/// before the JSON was complete (truncated output)
|
||||
pub fn has_incomplete_tool_call(&self) -> bool {
|
||||
// Only check the unconsumed portion of the buffer
|
||||
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
|
||||
if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
|
||||
let json_text = &unchecked_buffer[start_pos..];
|
||||
// If NOT complete, it's an incomplete tool call
|
||||
Self::find_complete_json_object_end(json_text).is_none()
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the text buffer contains an unexecuted tool call
|
||||
/// This detects cases where the LLM emitted a complete tool call JSON
|
||||
/// but it wasn't parsed/executed (e.g., due to parsing issues)
|
||||
pub fn has_unexecuted_tool_call(&self) -> bool {
|
||||
// Only check the unconsumed portion of the buffer
|
||||
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
|
||||
if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
|
||||
let json_text = &unchecked_buffer[start_pos..];
|
||||
// If the JSON IS complete, it means there's an unexecuted tool call
|
||||
if let Some(json_end) = Self::find_complete_json_object_end(json_text) {
|
||||
let json_only = &json_text[..=json_end];
|
||||
return serde_json::from_str::<serde_json::Value>(json_only).is_ok();
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Mark all tool calls up to the current buffer position as consumed/executed
|
||||
/// This prevents has_unexecuted_tool_call() from returning true for already-executed tools
|
||||
pub fn mark_tool_calls_consumed(&mut self) {
|
||||
self.last_consumed_position = self.text_buffer.len();
|
||||
}
|
||||
|
||||
/// Find the end position (byte index) of a complete JSON object in the text
|
||||
/// Returns None if no complete JSON object is found
|
||||
/// Find the end position (byte index) of a complete JSON object in the text
|
||||
pub fn find_complete_json_object_end(text: &str) -> Option<usize> {
|
||||
let mut brace_count = 0;
|
||||
let mut in_string = false;
|
||||
let mut escape_next = false;
|
||||
let mut found_start = false;
|
||||
|
||||
for (i, ch) in text.char_indices() {
|
||||
if escape_next {
|
||||
escape_next = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
'\\' => escape_next = true,
|
||||
'"' if !escape_next => in_string = !in_string,
|
||||
'{' if !in_string => {
|
||||
brace_count += 1;
|
||||
found_start = true;
|
||||
}
|
||||
'}' if !in_string => {
|
||||
brace_count -= 1;
|
||||
if brace_count == 0 && found_start {
|
||||
return Some(i); // Return the byte index of the closing brace
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
None // No complete JSON object found
|
||||
}
|
||||
|
||||
/// Reset the parser state for a new message
|
||||
pub fn reset(&mut self) {
|
||||
self.text_buffer.clear();
|
||||
self.native_tool_calls.clear();
|
||||
self.last_consumed_position = 0;
|
||||
self.message_stopped = false;
|
||||
self.in_json_tool_call = false;
|
||||
self.json_tool_start = None;
|
||||
@@ -2743,7 +2779,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
/// Manually trigger context summarization regardless of context window size
|
||||
/// Returns Ok(true) if summarization was successful, Ok(false) if it failed
|
||||
pub async fn force_summarize(&mut self) -> Result<bool> {
|
||||
info!("Manual summarization triggered");
|
||||
debug!("Manual summarization triggered");
|
||||
|
||||
self.ui_writer.print_context_status(&format!(
|
||||
"\n🗜️ Manual summarization requested (current usage: {}%)...",
|
||||
@@ -2861,7 +2897,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
|
||||
/// Manually trigger context thinning regardless of thresholds
|
||||
pub fn force_thin(&mut self) -> String {
|
||||
info!("Manual context thinning triggered");
|
||||
debug!("Manual context thinning triggered");
|
||||
let (message, chars_saved) = self.context_window.thin_context(self.session_id.as_deref());
|
||||
self.thinning_events.push(chars_saved);
|
||||
message
|
||||
@@ -2870,7 +2906,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
/// Manually trigger context thinning for the ENTIRE context window
|
||||
/// Unlike force_thin which only processes the first third, this processes all messages
|
||||
pub fn force_thin_all(&mut self) -> String {
|
||||
info!("Manual full context skinnifying triggered");
|
||||
debug!("Manual full context skinnifying triggered");
|
||||
let (message, chars_saved) = self.context_window.thin_context_all(self.session_id.as_deref());
|
||||
self.thinning_events.push(chars_saved);
|
||||
message
|
||||
@@ -2879,7 +2915,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
/// Reload README.md and AGENTS.md and replace the first system message
|
||||
/// Returns Ok(true) if README was found and reloaded, Ok(false) if no README was present initially
|
||||
pub fn reload_readme(&mut self) -> Result<bool> {
|
||||
info!("Manual README reload triggered");
|
||||
debug!("Manual README reload triggered");
|
||||
|
||||
// Check if the second message in conversation history is a system message with README content
|
||||
// (The first message should always be the system prompt)
|
||||
@@ -2922,7 +2958,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
// Replace the second message (README) with the new content
|
||||
if let Some(first_msg) = self.context_window.conversation_history.get_mut(1) {
|
||||
first_msg.content = combined_content;
|
||||
info!("README content reloaded successfully");
|
||||
debug!("README content reloaded successfully");
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
@@ -3156,7 +3192,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
error!("Failed to clear continuation artifacts: {}", e);
|
||||
}
|
||||
|
||||
info!("Session cleared");
|
||||
debug!("Session cleared");
|
||||
}
|
||||
|
||||
/// Restore session from a continuation artifact
|
||||
@@ -3201,7 +3237,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
});
|
||||
}
|
||||
|
||||
info!("Restored full context from session log");
|
||||
debug!("Restored full context from session log");
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
@@ -3226,7 +3262,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
});
|
||||
}
|
||||
|
||||
info!("Restored session from summary");
|
||||
debug!("Restored session from summary");
|
||||
Ok(false)
|
||||
}
|
||||
|
||||
@@ -3836,7 +3872,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
match provider.stream(request.clone()).await {
|
||||
Ok(stream) => {
|
||||
if attempt > 1 {
|
||||
info!("Stream started successfully after {} attempts", attempt);
|
||||
debug!("Stream started successfully after {} attempts", attempt);
|
||||
}
|
||||
debug!("Stream started successfully");
|
||||
debug!(
|
||||
@@ -3886,9 +3922,9 @@ impl<W: UiWriter> Agent<W> {
|
||||
let mut response_started = false;
|
||||
let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
|
||||
let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
|
||||
const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 2; // Limit auto-summary retries
|
||||
const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery)
|
||||
let mut final_output_called = false; // Track if final_output was called
|
||||
let mut executed_tools_in_session: std::collections::HashSet<String> = std::collections::HashSet::new(); // Track executed tools to prevent duplicates
|
||||
// Note: Session-level duplicate tracking was removed - we only prevent sequential duplicates (DUP IN CHUNK, DUP IN MSG)
|
||||
|
||||
// Check if we need to summarize before starting
|
||||
if self.context_window.should_summarize() {
|
||||
@@ -4189,77 +4225,51 @@ impl<W: UiWriter> Agent<W> {
|
||||
};
|
||||
|
||||
// De-duplicate tool calls and track duplicates
|
||||
let mut seen_in_chunk: Vec<ToolCall> = Vec::new();
|
||||
let mut last_tool_in_chunk: Option<ToolCall> = None;
|
||||
let mut deduplicated_tools: Vec<(ToolCall, Option<String>)> = Vec::new();
|
||||
|
||||
for tool_call in tools_to_process {
|
||||
let mut duplicate_type = None;
|
||||
|
||||
// Check for duplicates in current chunk
|
||||
if seen_in_chunk
|
||||
.iter()
|
||||
.any(|tc| are_duplicates(tc, &tool_call))
|
||||
{
|
||||
// Check for IMMEDIATELY SEQUENTIAL duplicate in current chunk
|
||||
// Only the immediately previous tool call counts as a duplicate
|
||||
if let Some(ref last_tool) = last_tool_in_chunk {
|
||||
if are_duplicates(last_tool, &tool_call) {
|
||||
duplicate_type = Some("DUP IN CHUNK".to_string());
|
||||
}
|
||||
} else {
|
||||
// Check for duplicate against previous message in history
|
||||
// Look at the last assistant message that contains tool calls
|
||||
// Check for IMMEDIATELY SEQUENTIAL duplicate against previous message
|
||||
// Only mark as duplicate if the LAST tool call in the previous message
|
||||
// matches AND there's no significant text after it
|
||||
let mut found_in_prev = false;
|
||||
for msg in self.context_window.conversation_history.iter().rev() {
|
||||
if matches!(msg.role, MessageRole::Assistant) {
|
||||
// Try to parse tool calls from the message content
|
||||
if msg.content.contains(r#"\"tool\""#) {
|
||||
// Simple JSON extraction for tool calls
|
||||
let content = &msg.content;
|
||||
let mut start_idx = 0;
|
||||
while let Some(tool_start) =
|
||||
content[start_idx..].find(r#"{\"tool\""#)
|
||||
{
|
||||
let tool_start = start_idx + tool_start;
|
||||
// Find the end of this JSON object
|
||||
let mut brace_count = 0;
|
||||
let mut in_string = false;
|
||||
let mut escape_next = false;
|
||||
let mut end_idx = tool_start;
|
||||
|
||||
for (i, ch) in content[tool_start..].char_indices()
|
||||
{
|
||||
if escape_next {
|
||||
escape_next = false;
|
||||
continue;
|
||||
}
|
||||
if ch == '\\' && in_string {
|
||||
escape_next = true;
|
||||
continue;
|
||||
}
|
||||
if ch == '"' && !escape_next {
|
||||
in_string = !in_string;
|
||||
}
|
||||
if !in_string {
|
||||
if ch == '{' {
|
||||
brace_count += 1;
|
||||
} else if ch == '}' {
|
||||
brace_count -= 1;
|
||||
if brace_count == 0 {
|
||||
end_idx = tool_start + i + 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if end_idx > tool_start {
|
||||
let tool_json = &content[tool_start..end_idx];
|
||||
if let Ok(prev_tool) =
|
||||
serde_json::from_str::<ToolCall>(tool_json)
|
||||
{
|
||||
// Find the LAST tool call in the message
|
||||
let content = &msg.content;
|
||||
|
||||
// Look for the last occurrence of a tool call pattern
|
||||
if let Some(last_tool_start) = content.rfind(r#"{"tool""#)
|
||||
.or_else(|| content.rfind(r#"{ "tool""#))
|
||||
{
|
||||
// Find the end of this JSON object
|
||||
if let Some(end_offset) = StreamingToolParser::find_complete_json_object_end(&content[last_tool_start..]) {
|
||||
let end_idx = last_tool_start + end_offset + 1;
|
||||
let tool_json = &content[last_tool_start..end_idx];
|
||||
|
||||
// Check if there's any non-whitespace text after this tool call
|
||||
let text_after = content[end_idx..].trim();
|
||||
let has_text_after = !text_after.is_empty();
|
||||
|
||||
// Only consider it a duplicate if:
|
||||
// 1. The tool call matches
|
||||
// 2. There's no text after it (it was the last thing in the message)
|
||||
if !has_text_after {
|
||||
if let Ok(prev_tool) = serde_json::from_str::<ToolCall>(tool_json) {
|
||||
if are_duplicates(&prev_tool, &tool_call) {
|
||||
found_in_prev = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
start_idx = end_idx;
|
||||
}
|
||||
}
|
||||
// Only check the most recent assistant message
|
||||
@@ -4272,13 +4282,8 @@ impl<W: UiWriter> Agent<W> {
|
||||
}
|
||||
}
|
||||
|
||||
// Add to seen list if not a duplicate in chunk
|
||||
if duplicate_type
|
||||
.as_ref()
|
||||
.map_or(true, |s| s != "DUP IN CHUNK")
|
||||
{
|
||||
seen_in_chunk.push(tool_call.clone());
|
||||
}
|
||||
// Track the last tool call for sequential duplicate detection
|
||||
last_tool_in_chunk = Some(tool_call.clone());
|
||||
|
||||
deduplicated_tools.push((tool_call, duplicate_type));
|
||||
}
|
||||
@@ -4286,22 +4291,11 @@ impl<W: UiWriter> Agent<W> {
|
||||
// Process each tool call
|
||||
for (tool_call, duplicate_type) in deduplicated_tools {
|
||||
debug!("Processing completed tool call: {:?}", tool_call);
|
||||
|
||||
// Mark that we detected a tool call - this prevents content from being printed
|
||||
// even if the tool is skipped as a duplicate
|
||||
tool_executed = true;
|
||||
|
||||
// Check if this tool was already executed in this session
|
||||
let tool_key = format!("{}:{}", tool_call.tool, serde_json::to_string(&tool_call.args).unwrap_or_default());
|
||||
if executed_tools_in_session.contains(&tool_key) {
|
||||
// Log the duplicate with red prefix
|
||||
let prefixed_tool_name = format!("🟥 {} DUP IN SESSION", tool_call.tool);
|
||||
let warning_msg = format!(
|
||||
"⚠️ Duplicate tool call detected (already executed in session): Skipping {} with args {}",
|
||||
tool_call.tool,
|
||||
serde_json::to_string(&tool_call.args).unwrap_or_else(|_| "<unserializable>".to_string())
|
||||
);
|
||||
let mut modified_tool_call = tool_call.clone();
|
||||
modified_tool_call.tool = prefixed_tool_name;
|
||||
debug!("{}", warning_msg);
|
||||
continue; // Skip execution of duplicate
|
||||
}
|
||||
|
||||
// If it's a duplicate, log it and return a warning
|
||||
if let Some(dup_type) = &duplicate_type {
|
||||
@@ -4639,15 +4633,25 @@ impl<W: UiWriter> Agent<W> {
|
||||
tool_executed = true;
|
||||
any_tool_executed = true; // Track across all iterations
|
||||
|
||||
// Add to executed tools set to prevent re-execution in this session
|
||||
executed_tools_in_session.insert(tool_key.clone());
|
||||
// Reset auto-continue attempts after successful tool execution
|
||||
// This gives the LLM fresh attempts since it's making progress
|
||||
auto_summary_attempts = 0;
|
||||
|
||||
|
||||
// Reset the JSON tool call filter state after each tool execution
|
||||
// This ensures the filter doesn't stay in suppression mode for subsequent streaming content
|
||||
self.ui_writer.reset_json_filter();
|
||||
|
||||
// Reset parser for next iteration - this clears the text buffer
|
||||
parser.reset();
|
||||
// Only reset parser if there are no more unexecuted tool calls in the buffer
|
||||
// This handles the case where the LLM emits multiple tool calls in one response
|
||||
if parser.has_unexecuted_tool_call() {
|
||||
debug!("Parser still has unexecuted tool calls, not resetting buffer");
|
||||
// Mark current tool as consumed so we don't re-detect it
|
||||
parser.mark_tool_calls_consumed();
|
||||
} else {
|
||||
// Reset parser for next iteration - this clears the text buffer
|
||||
parser.reset();
|
||||
}
|
||||
|
||||
// Clear current_response for next iteration to prevent buffered text
|
||||
// from being incorrectly displayed after tool execution
|
||||
@@ -4662,8 +4666,14 @@ impl<W: UiWriter> Agent<W> {
|
||||
} // End of for loop processing each tool call
|
||||
|
||||
// If we processed any tools in multiple mode, break out to start new stream
|
||||
// BUT only if there are no more unexecuted tool calls in the buffer
|
||||
if tool_executed && self.config.agent.allow_multiple_tool_calls {
|
||||
break;
|
||||
if parser.has_unexecuted_tool_call() {
|
||||
debug!("Tool executed but parser still has unexecuted tool calls, continuing to process");
|
||||
// Don't break - continue processing to pick up remaining tool calls
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If no tool calls were completed, continue streaming normally
|
||||
@@ -4753,7 +4763,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
" - Text buffer content: {:?}",
|
||||
parser.get_text_content()
|
||||
);
|
||||
error!(" - Native tool calls: {:?}", parser.native_tool_calls);
|
||||
error!(" - Has incomplete tool call: {}", parser.has_incomplete_tool_call());
|
||||
error!(" - Message stopped: {}", parser.is_message_stopped());
|
||||
error!(" - In JSON tool call: {}", parser.in_json_tool_call);
|
||||
error!(" - JSON tool start: {:?}", parser.json_tool_start);
|
||||
@@ -4831,6 +4841,17 @@ impl<W: UiWriter> Agent<W> {
|
||||
));
|
||||
}
|
||||
|
||||
// If tools were executed in previous iterations but final_output wasn't called,
|
||||
// break to let the outer loop's auto-continue logic handle it
|
||||
if any_tool_executed && !final_output_called {
|
||||
debug!("Tools were executed but final_output not called - breaking to auto-continue");
|
||||
// Add the text response to context before breaking
|
||||
if has_text_response && !current_response.trim().is_empty() {
|
||||
full_response = current_response.clone();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// Set full_response to current_response (don't append)
|
||||
// current_response already contains everything that was displayed
|
||||
// Don't set full_response here - it would duplicate the output
|
||||
@@ -4873,8 +4894,8 @@ impl<W: UiWriter> Agent<W> {
|
||||
);
|
||||
|
||||
error!("Error type: {}", std::any::type_name_of_val(&e));
|
||||
error!("Parser state at error: text_buffer_len={}, native_tool_calls={}, message_stopped={}",
|
||||
parser.text_buffer_len(), parser.native_tool_calls.len(), parser.is_message_stopped());
|
||||
error!("Parser state at error: text_buffer_len={}, has_incomplete={}, message_stopped={}",
|
||||
parser.text_buffer_len(), parser.has_incomplete_tool_call(), parser.is_message_stopped());
|
||||
|
||||
// Store the error for potential logging later
|
||||
_last_error = Some(error_details.clone());
|
||||
@@ -4893,7 +4914,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
// If we have any content or tool calls, treat this as a graceful end
|
||||
if chunks_received > 0
|
||||
&& (!parser.get_text_content().is_empty()
|
||||
|| parser.native_tool_calls.len() > 0)
|
||||
|| parser.has_unexecuted_tool_call())
|
||||
{
|
||||
warn!("Stream terminated unexpectedly but we have content, continuing");
|
||||
break; // Break to process what we have
|
||||
@@ -4941,18 +4962,77 @@ impl<W: UiWriter> Agent<W> {
|
||||
|
||||
let has_response = !current_response.is_empty() || !full_response.is_empty();
|
||||
|
||||
// Check if the response is essentially empty (just whitespace or timing lines)
|
||||
// This detects cases where the LLM outputs nothing substantive
|
||||
let response_text = if !current_response.is_empty() {
|
||||
¤t_response
|
||||
} else {
|
||||
&full_response
|
||||
};
|
||||
let is_empty_response = response_text.trim().is_empty()
|
||||
|| response_text.lines().all(|line| line.trim().is_empty() || line.trim().starts_with("⏱️"));
|
||||
|
||||
// Check if there's an incomplete tool call in the buffer
|
||||
let has_incomplete_tool_call = parser.has_incomplete_tool_call();
|
||||
|
||||
// Check if there's a complete but unexecuted tool call in the buffer
|
||||
let has_unexecuted_tool_call = parser.has_unexecuted_tool_call();
|
||||
|
||||
// Log when we detect unexecuted or incomplete tool calls for debugging
|
||||
if has_incomplete_tool_call {
|
||||
debug!("Detected incomplete tool call in buffer (buffer_len={}, consumed_up_to={})",
|
||||
parser.text_buffer_len(), parser.text_buffer_len());
|
||||
}
|
||||
if has_unexecuted_tool_call {
|
||||
debug!("Detected unexecuted tool call in buffer - this may indicate a parsing issue");
|
||||
warn!("Unexecuted tool call detected in buffer after stream ended");
|
||||
}
|
||||
|
||||
// Auto-continue if tools were executed but final_output was never called
|
||||
// This is the simple rule: LLM must call final_output before returning control
|
||||
if any_tool_executed && !final_output_called {
|
||||
// OR if the LLM emitted an incomplete tool call (truncated JSON)
|
||||
// OR if the LLM emitted a complete tool call that wasn't executed
|
||||
// This ensures we don't return control when the LLM clearly intended to call a tool
|
||||
// Note: We removed the redundant condition (any_tool_executed && is_empty_response)
|
||||
// because it's already covered by (any_tool_executed && !final_output_called)
|
||||
let should_auto_continue = (any_tool_executed && !final_output_called)
|
||||
|| has_incomplete_tool_call
|
||||
|| has_unexecuted_tool_call;
|
||||
if should_auto_continue {
|
||||
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
|
||||
auto_summary_attempts += 1;
|
||||
warn!(
|
||||
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})",
|
||||
iteration_count, auto_summary_attempts
|
||||
);
|
||||
self.ui_writer.print_context_status(
|
||||
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
|
||||
);
|
||||
if has_incomplete_tool_call {
|
||||
warn!(
|
||||
"LLM emitted incomplete tool call ({} iterations, auto-continue attempt {}/{})",
|
||||
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||
);
|
||||
self.ui_writer.print_context_status(
|
||||
"\n🔄 Model emitted incomplete tool call. Auto-continuing...\n"
|
||||
);
|
||||
} else if has_unexecuted_tool_call {
|
||||
warn!(
|
||||
"LLM emitted unexecuted tool call ({} iterations, auto-continue attempt {}/{})",
|
||||
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||
);
|
||||
self.ui_writer.print_context_status(
|
||||
"\n🔄 Model emitted tool call that wasn't executed. Auto-continuing...\n"
|
||||
);
|
||||
} else if is_empty_response {
|
||||
warn!(
|
||||
"LLM emitted empty/trivial response ({} iterations, auto-continue attempt {}/{})",
|
||||
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||
);
|
||||
self.ui_writer.print_context_status(
|
||||
"\n🔄 Model emitted empty response. Auto-continuing...\n"
|
||||
);
|
||||
} else {
|
||||
warn!(
|
||||
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {}/{})",
|
||||
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||
);
|
||||
self.ui_writer.print_context_status(
|
||||
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
|
||||
);
|
||||
}
|
||||
|
||||
// Add any text response to context before prompting for continuation
|
||||
if has_response {
|
||||
@@ -4971,10 +5051,17 @@ impl<W: UiWriter> Agent<W> {
|
||||
}
|
||||
|
||||
// Add a follow-up message asking for continuation
|
||||
let continue_prompt = Message::new(
|
||||
MessageRole::User,
|
||||
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
|
||||
);
|
||||
let continue_prompt = if has_incomplete_tool_call {
|
||||
Message::new(
|
||||
MessageRole::User,
|
||||
"Your previous response was cut off mid-tool-call. Please complete the tool call and continue.".to_string(),
|
||||
)
|
||||
} else {
|
||||
Message::new(
|
||||
MessageRole::User,
|
||||
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
|
||||
)
|
||||
};
|
||||
self.context_window.add_message(continue_prompt);
|
||||
request.messages = self.context_window.conversation_history.clone();
|
||||
|
||||
@@ -4983,11 +5070,17 @@ impl<W: UiWriter> Agent<W> {
|
||||
} else {
|
||||
// Max attempts reached, give up gracefully
|
||||
warn!(
|
||||
"Max auto-continue attempts ({}) reached, returning without final_output",
|
||||
MAX_AUTO_SUMMARY_ATTEMPTS
|
||||
"Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, final_output_called={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
|
||||
MAX_AUTO_SUMMARY_ATTEMPTS,
|
||||
iteration_count,
|
||||
any_tool_executed,
|
||||
final_output_called,
|
||||
has_incomplete_tool_call,
|
||||
has_unexecuted_tool_call,
|
||||
is_empty_response
|
||||
);
|
||||
self.ui_writer.print_agent_response(
|
||||
"\n⚠️ The model stopped without calling final_output after multiple attempts.\n"
|
||||
&format!("\n⚠️ The model stopped without calling final_output after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
|
||||
);
|
||||
}
|
||||
} else if has_response {
|
||||
@@ -6434,7 +6527,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
let driver = mutex.into_inner();
|
||||
match driver.quit().await {
|
||||
Ok(_) => {
|
||||
info!("WebDriver session closed successfully");
|
||||
debug!("WebDriver session closed successfully");
|
||||
|
||||
// Kill the safaridriver process
|
||||
if let Some(mut process) =
|
||||
@@ -6443,7 +6536,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
if let Err(e) = process.kill().await {
|
||||
warn!("Failed to kill safaridriver process: {}", e);
|
||||
} else {
|
||||
info!("Safaridriver process terminated");
|
||||
debug!("Safaridriver process terminated");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ use crate::ui_writer::UiWriter;
|
||||
use crate::{Agent, DiscoveryOptions, TaskResult};
|
||||
use anyhow::Result;
|
||||
use std::time::Instant;
|
||||
use tracing::{info, warn};
|
||||
use tracing::{debug, warn};
|
||||
|
||||
/// Configuration for retry behavior
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -142,7 +142,7 @@ where
|
||||
match result {
|
||||
Ok(task_result) => {
|
||||
if retry_count > 0 {
|
||||
info!(
|
||||
debug!(
|
||||
"{} task succeeded after {} retries (elapsed: {:?})",
|
||||
config.role_name,
|
||||
retry_count,
|
||||
@@ -259,7 +259,7 @@ where
|
||||
match operation().await {
|
||||
Ok(result) => {
|
||||
if retry_count > 0 {
|
||||
info!(
|
||||
debug!(
|
||||
"Operation '{}' succeeded after {} retries",
|
||||
operation_name, retry_count
|
||||
);
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
use anyhow::Result;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
/// Version of the session continuation format
|
||||
const CONTINUATION_VERSION: &str = "1.0";
|
||||
@@ -89,7 +89,7 @@ pub fn save_continuation(continuation: &SessionContinuation) -> Result<PathBuf>
|
||||
let json = serde_json::to_string_pretty(continuation)?;
|
||||
std::fs::write(&latest_path, &json)?;
|
||||
|
||||
info!("Saved session continuation to {:?}", latest_path);
|
||||
debug!("Saved session continuation to {:?}", latest_path);
|
||||
Ok(latest_path)
|
||||
}
|
||||
|
||||
@@ -113,7 +113,7 @@ pub fn load_continuation() -> Result<Option<SessionContinuation>> {
|
||||
);
|
||||
}
|
||||
|
||||
info!("Loaded session continuation from {:?}", latest_path);
|
||||
debug!("Loaded session continuation from {:?}", latest_path);
|
||||
Ok(Some(continuation))
|
||||
}
|
||||
|
||||
@@ -131,7 +131,7 @@ pub fn clear_continuation() -> Result<()> {
|
||||
debug!("Removed session file: {:?}", path);
|
||||
}
|
||||
}
|
||||
info!("Cleared session continuation artifacts");
|
||||
debug!("Cleared session continuation artifacts");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
234
crates/g3-core/tests/auto_continue_test.rs
Normal file
234
crates/g3-core/tests/auto_continue_test.rs
Normal file
@@ -0,0 +1,234 @@
|
||||
//! Tests for the auto-continue detection features
|
||||
//!
|
||||
//! These tests verify the logic used to detect when the LLM should auto-continue:
|
||||
//! 1. Empty/trivial responses (just timing lines)
|
||||
//! 2. Incomplete tool calls
|
||||
//! 3. Unexecuted tool calls
|
||||
//! 4. Missing final_output after tool execution
|
||||
|
||||
/// Helper function to check if a response is considered "empty" or trivial
|
||||
/// This mirrors the logic in lib.rs for detecting empty responses
|
||||
fn is_empty_response(response_text: &str) -> bool {
|
||||
response_text.trim().is_empty()
|
||||
|| response_text.lines().all(|line| {
|
||||
line.trim().is_empty() || line.trim().starts_with("⏱️")
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_empty_string() {
|
||||
assert!(is_empty_response(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_whitespace_only() {
|
||||
assert!(is_empty_response(" "));
|
||||
assert!(is_empty_response("\n\n\n"));
|
||||
assert!(is_empty_response(" \n \t \n "));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_timing_line_only() {
|
||||
assert!(is_empty_response("⏱️ 43.0s | 💭 3.6s"));
|
||||
assert!(is_empty_response(" ⏱️ 43.0s | 💭 3.6s "));
|
||||
assert!(is_empty_response("\n⏱️ 43.0s | 💭 3.6s\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_multiple_timing_lines() {
|
||||
let response = "\n⏱️ 10.0s | 💭 1.0s\n\n⏱️ 20.0s | 💭 2.0s\n";
|
||||
assert!(is_empty_response(response));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_timing_with_empty_lines() {
|
||||
let response = "\n\n⏱️ 43.0s | 💭 3.6s\n\n";
|
||||
assert!(is_empty_response(response));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_substantive_content() {
|
||||
// These should NOT be considered empty
|
||||
assert!(!is_empty_response("Hello, I will help you."));
|
||||
assert!(!is_empty_response("Let me read that file."));
|
||||
assert!(!is_empty_response("I've completed the task."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_timing_with_text() {
|
||||
// If there's any substantive text, it's not empty
|
||||
let response = "⏱️ 43.0s | 💭 3.6s\nHere is the result.";
|
||||
assert!(!is_empty_response(response));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_text_before_timing() {
|
||||
let response = "Done!\n⏱️ 43.0s | 💭 3.6s";
|
||||
assert!(!is_empty_response(response));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_json_tool_call() {
|
||||
// A JSON tool call is definitely not empty
|
||||
let response = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
|
||||
assert!(!is_empty_response(response));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_partial_json() {
|
||||
// Even partial JSON is not empty
|
||||
let response = r#"{"tool": "read_file", "args": {"#;
|
||||
assert!(!is_empty_response(response));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_markdown() {
|
||||
// Markdown content is not empty
|
||||
let response = "# Summary\n\nI completed the task.";
|
||||
assert!(!is_empty_response(response));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_response_detection_code_block() {
|
||||
// Code blocks are not empty
|
||||
let response = "```rust\nfn main() {}\n```";
|
||||
assert!(!is_empty_response(response));
|
||||
}
|
||||
|
||||
// Test the MAX_AUTO_SUMMARY_ATTEMPTS constant value
|
||||
// This is a compile-time check that the constant exists and has the expected value
|
||||
#[test]
|
||||
fn test_max_auto_summary_attempts_is_reasonable() {
|
||||
// The constant should be at least 3 to give the LLM a fair chance to recover
|
||||
// We can't directly access the constant from here, but we document the expected value
|
||||
// Current value: 5 (increased from 2)
|
||||
const EXPECTED_MIN_ATTEMPTS: usize = 3;
|
||||
const EXPECTED_MAX_ATTEMPTS: usize = 10;
|
||||
const CURRENT_VALUE: usize = 5;
|
||||
|
||||
assert!(CURRENT_VALUE >= EXPECTED_MIN_ATTEMPTS,
|
||||
"MAX_AUTO_SUMMARY_ATTEMPTS should be at least {} for reliable recovery", EXPECTED_MIN_ATTEMPTS);
|
||||
assert!(CURRENT_VALUE <= EXPECTED_MAX_ATTEMPTS,
|
||||
"MAX_AUTO_SUMMARY_ATTEMPTS should not exceed {} to avoid infinite loops", EXPECTED_MAX_ATTEMPTS);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Auto-continue condition logic
|
||||
// =============================================================================
|
||||
|
||||
/// Simulates the should_auto_continue logic from lib.rs
|
||||
fn should_auto_continue(
|
||||
any_tool_executed: bool,
|
||||
final_output_called: bool,
|
||||
has_incomplete_tool_call: bool,
|
||||
has_unexecuted_tool_call: bool,
|
||||
is_empty_response: bool,
|
||||
) -> bool {
|
||||
(any_tool_executed && !final_output_called)
|
||||
|| has_incomplete_tool_call
|
||||
|| has_unexecuted_tool_call
|
||||
|| (any_tool_executed && is_empty_response)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_after_tool_no_final_output() {
|
||||
// Tool executed but no final_output - should continue
|
||||
assert!(should_auto_continue(
|
||||
true, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_with_final_output() {
|
||||
// Tool executed AND final_output called - should NOT continue
|
||||
assert!(!should_auto_continue(
|
||||
true, // any_tool_executed
|
||||
true, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_incomplete_tool_call() {
|
||||
// Incomplete tool call - should continue regardless of other flags
|
||||
assert!(should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
true, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_unexecuted_tool_call() {
|
||||
// Unexecuted tool call - should continue
|
||||
assert!(should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
true, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_empty_response_after_tool() {
|
||||
// Empty response after tool execution - should continue
|
||||
assert!(should_auto_continue(
|
||||
true, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
true, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_empty_response_no_tool() {
|
||||
// Empty response but no tool executed - should NOT continue
|
||||
// (This is a normal case where LLM just didn't respond)
|
||||
assert!(!should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
true, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_auto_continue_no_conditions_met() {
|
||||
// No tools, no incomplete calls, substantive response - should NOT continue
|
||||
assert!(!should_auto_continue(
|
||||
false, // any_tool_executed
|
||||
false, // final_output_called
|
||||
false, // has_incomplete_tool_call
|
||||
false, // has_unexecuted_tool_call
|
||||
false, // is_empty_response
|
||||
));
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Redundant condition detection
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_redundant_empty_response_condition() {
|
||||
// This test documents that (any_tool_executed && is_empty_response) is redundant
|
||||
// when (any_tool_executed && !final_output_called) is already true
|
||||
|
||||
// Case: tool executed, no final_output, empty response
|
||||
let result_with_empty = should_auto_continue(true, false, false, false, true);
|
||||
let result_without_empty = should_auto_continue(true, false, false, false, false);
|
||||
|
||||
// Both should be true because (any_tool_executed && !final_output_called) is true
|
||||
assert_eq!(result_with_empty, result_without_empty,
|
||||
"The is_empty_response condition is redundant when any_tool_executed && !final_output_called");
|
||||
}
|
||||
231
crates/g3-core/tests/duplicate_detection_test.rs
Normal file
231
crates/g3-core/tests/duplicate_detection_test.rs
Normal file
@@ -0,0 +1,231 @@
|
||||
//! Tests for tool call duplicate detection
|
||||
//!
|
||||
//! These tests ensure that duplicate detection only catches IMMEDIATELY SEQUENTIAL
|
||||
//! duplicates, not legitimate re-use of tools with text between them.
|
||||
|
||||
use g3_core::StreamingToolParser;
|
||||
use g3_providers::CompletionChunk;
|
||||
|
||||
// Helper to create a chunk
|
||||
fn chunk(content: &str, finished: bool) -> CompletionChunk {
|
||||
CompletionChunk {
|
||||
content: content.to_string(),
|
||||
finished,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: find_complete_json_object_end helper function
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_find_complete_json_object_end_simple() {
|
||||
let json = r#"{"tool": "test", "args": {}}"#;
|
||||
let end = StreamingToolParser::find_complete_json_object_end(json);
|
||||
assert!(end.is_some(), "Should find end of complete JSON");
|
||||
assert_eq!(end.unwrap(), json.len() - 1, "End should be at last character");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_complete_json_object_end_nested() {
|
||||
let json = r#"{"tool": "test", "args": {"nested": {"deep": true}}}"#;
|
||||
let end = StreamingToolParser::find_complete_json_object_end(json);
|
||||
assert!(end.is_some(), "Should find end of nested JSON");
|
||||
assert_eq!(end.unwrap(), json.len() - 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_complete_json_object_end_with_trailing_text() {
|
||||
let json = r#"{"tool": "test", "args": {}} some text after"#;
|
||||
let end = StreamingToolParser::find_complete_json_object_end(json);
|
||||
assert!(end.is_some(), "Should find end of JSON even with trailing text");
|
||||
// The end should be at the closing brace, not at the end of the string
|
||||
let end_pos = end.unwrap();
|
||||
assert_eq!(&json[end_pos..end_pos+1], "}", "End should be at closing brace");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_complete_json_object_end_incomplete() {
|
||||
let json = r#"{"tool": "test", "args": {"#;
|
||||
let end = StreamingToolParser::find_complete_json_object_end(json);
|
||||
assert!(end.is_none(), "Should return None for incomplete JSON");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Tool calls separated by text should NOT be duplicates
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_same_tool_with_text_between_not_duplicate() {
|
||||
// This tests the scenario where the LLM calls the same tool twice
|
||||
// but with explanatory text between them - this should NOT be a duplicate
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// First tool call
|
||||
let content1 = r#"{"tool": "todo_read", "args": {}}"#;
|
||||
let tools1 = parser.process_chunk(&chunk(content1, true));
|
||||
assert_eq!(tools1.len(), 1, "First tool call should be detected");
|
||||
assert_eq!(tools1[0].tool, "todo_read");
|
||||
|
||||
// Reset parser (simulating what happens after tool execution)
|
||||
parser.reset();
|
||||
|
||||
// Some text, then the same tool call again
|
||||
let content2 = r#"Now let me check the TODO again to verify my changes.
|
||||
{"tool": "todo_read", "args": {}}"#;
|
||||
let tools2 = parser.process_chunk(&chunk(content2, true));
|
||||
|
||||
// The second tool call should be detected - it's NOT a duplicate
|
||||
// because there's text before it
|
||||
assert_eq!(tools2.len(), 1, "Second tool call should be detected (not a duplicate)");
|
||||
assert_eq!(tools2[0].tool, "todo_read");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_different_tools_back_to_back_not_duplicate() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Two different tool calls back to back
|
||||
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
||||
{"tool": "shell", "args": {"command": "ls"}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, true));
|
||||
|
||||
// Both should be detected - they're different tools
|
||||
assert!(tools.len() >= 1, "Should detect tool calls");
|
||||
// At minimum, the first one should be detected
|
||||
assert_eq!(tools[0].tool, "read_file");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_same_tool_different_args_not_duplicate() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Same tool but different arguments - NOT a duplicate
|
||||
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
||||
{"tool": "read_file", "args": {"file_path": "b.txt"}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, true));
|
||||
|
||||
// Both should be detected - different args means not a duplicate
|
||||
assert!(tools.len() >= 1, "Should detect tool calls");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Immediately sequential identical tool calls ARE duplicates
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_identical_tool_calls_back_to_back_are_duplicates() {
|
||||
// This tests the scenario where the LLM stutters and outputs
|
||||
// the exact same tool call twice in a row - this IS a duplicate
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Two identical tool calls with no text between them
|
||||
let content = r#"{"tool": "todo_read", "args": {}}
|
||||
{"tool": "todo_read", "args": {}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, true));
|
||||
|
||||
// The parser should detect both, but the deduplication logic
|
||||
// (which happens at a higher level in the agent) should mark
|
||||
// the second one as a duplicate
|
||||
// Here we just verify both are parsed
|
||||
assert!(tools.len() >= 1, "Should detect at least one tool call");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Text content detection for duplicate logic
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_has_text_after_tool_call() {
|
||||
// Helper test to verify we can detect text after a tool call
|
||||
let content_with_text = r#"{"tool": "test", "args": {}} Some text after"#;
|
||||
let content_without_text = r#"{"tool": "test", "args": {}}"#;
|
||||
let content_with_whitespace_only = r#"{"tool": "test", "args": {}}
|
||||
"#;
|
||||
|
||||
// Find the end of the JSON in each case
|
||||
let end1 = StreamingToolParser::find_complete_json_object_end(content_with_text).unwrap();
|
||||
let end2 = StreamingToolParser::find_complete_json_object_end(content_without_text).unwrap();
|
||||
let end3 = StreamingToolParser::find_complete_json_object_end(content_with_whitespace_only).unwrap();
|
||||
|
||||
// Check what's after the JSON
|
||||
let after1 = content_with_text[end1 + 1..].trim();
|
||||
let after2 = content_without_text.get(end2 + 1..).unwrap_or("").trim();
|
||||
let after3 = content_with_whitespace_only[end3 + 1..].trim();
|
||||
|
||||
assert!(!after1.is_empty(), "Should have text after tool call");
|
||||
assert!(after2.is_empty(), "Should have no text after tool call");
|
||||
assert!(after3.is_empty(), "Whitespace-only should count as no text");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Edge cases
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_tool_call_with_newlines_between() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Tool calls separated by multiple newlines (but no actual text)
|
||||
// This SHOULD be considered a duplicate since there's no meaningful text
|
||||
let content = r#"{"tool": "todo_read", "args": {}}
|
||||
|
||||
|
||||
{"tool": "todo_read", "args": {}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, true));
|
||||
assert!(tools.len() >= 1, "Should detect at least one tool call");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_call_with_whitespace_text_between() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Tool calls separated by text that's just whitespace and punctuation
|
||||
// The key is whether there's "meaningful" text
|
||||
let content = r#"{"tool": "todo_read", "args": {}}
|
||||
OK, now again:
|
||||
{"tool": "todo_read", "args": {}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, true));
|
||||
|
||||
// Both should be detected since there's text between them
|
||||
assert!(tools.len() >= 1, "Should detect tool calls");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_call_in_middle_of_text() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Tool call surrounded by text
|
||||
let content = r#"Let me read the file first.
|
||||
{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
||||
Now I'll analyze the contents."#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, true));
|
||||
assert_eq!(tools.len(), 1, "Should detect the tool call");
|
||||
assert_eq!(tools[0].tool, "read_file");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_different_tool_calls_with_text() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Multiple different tool calls with text between each
|
||||
let content = r#"First, let me read the file:
|
||||
{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
||||
Now let me check the TODO:
|
||||
{"tool": "todo_read", "args": {}}
|
||||
Finally, let me run a command:
|
||||
{"tool": "shell", "args": {"command": "ls"}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, true));
|
||||
|
||||
// All three should be detected
|
||||
assert!(tools.len() >= 1, "Should detect tool calls");
|
||||
}
|
||||
182
crates/g3-core/tests/incomplete_tool_call_test.rs
Normal file
182
crates/g3-core/tests/incomplete_tool_call_test.rs
Normal file
@@ -0,0 +1,182 @@
|
||||
//! Tests for the incomplete tool call detection feature
|
||||
|
||||
use g3_core::StreamingToolParser;
|
||||
use g3_providers::CompletionChunk;
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_tool_call_empty_buffer() {
|
||||
let parser = StreamingToolParser::new();
|
||||
assert!(!parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_tool_call_no_tool_pattern() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let chunk = CompletionChunk {
|
||||
content: "Hello, I will help you with that.".to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
assert!(!parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_tool_call_complete_tool_call() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Complete JSON should NOT be detected as incomplete
|
||||
assert!(!parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_tool_call_truncated_tool_call() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
// Simulate truncated tool call - missing closing braces
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Incomplete JSON should be detected
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_tool_call_truncated_mid_value() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
// Simulate truncated tool call - cut off mid-value
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"{"tool": "shell", "args": {"command": "cargo test --package g3-cli --test filter_json_test test_streaming -- --test-threads=1 2>&1 | tail"#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Incomplete JSON should be detected
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_tool_call_with_text_before() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
// Text before the incomplete tool call
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"Let me read that file for you.
|
||||
|
||||
{"tool": "read_file", "args": {"file_path":"#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Incomplete JSON should be detected
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_incomplete_tool_call_malformed_like_trace() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
// This simulates a truncated tool call where the stream ended mid-JSON
|
||||
// The actual trace showed truncated output, not malformed characters
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"{"tool": "read_file", "args": {"file_path":"src/engine.rkt""#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Truncated JSON (missing closing braces) should be detected as incomplete
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_unexecuted_tool_call_empty_buffer() {
|
||||
let parser = StreamingToolParser::new();
|
||||
assert!(!parser.has_unexecuted_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_unexecuted_tool_call_no_tool_pattern() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let chunk = CompletionChunk {
|
||||
content: "Hello, I will help you with that.".to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
assert!(!parser.has_unexecuted_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_unexecuted_tool_call_complete_tool_call() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Complete JSON tool call that wasn't executed should be detected
|
||||
assert!(parser.has_unexecuted_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_unexecuted_tool_call_incomplete_json() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted)
|
||||
assert!(!parser.has_unexecuted_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_unexecuted_tool_call_with_trailing_text() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
// Complete JSON tool call followed by trailing text
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
||||
|
||||
Some trailing text after the JSON"#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Complete JSON tool call should be detected even with trailing text
|
||||
assert!(parser.has_unexecuted_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_has_unexecuted_tool_call_with_text_before_and_after() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let chunk = CompletionChunk {
|
||||
content: r#"Let me read that file.
|
||||
|
||||
{"tool": "shell", "args": {"command": "ls -la"}}
|
||||
|
||||
I'll execute this command now."#.to_string(),
|
||||
finished: false,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
};
|
||||
parser.process_chunk(&chunk);
|
||||
// Complete JSON tool call should be detected
|
||||
assert!(parser.has_unexecuted_tool_call());
|
||||
}
|
||||
545
crates/g3-core/tests/streaming_parser_test.rs
Normal file
545
crates/g3-core/tests/streaming_parser_test.rs
Normal file
@@ -0,0 +1,545 @@
|
||||
//! Comprehensive tests for StreamingToolParser
|
||||
//!
|
||||
//! Tests cover:
|
||||
//! - Multiple tool calls in one response
|
||||
//! - Tool call followed by text
|
||||
//! - Incomplete tool calls at various truncation points
|
||||
//! - Parser reset behavior
|
||||
//! - Buffer management
|
||||
|
||||
use g3_core::StreamingToolParser;
|
||||
use g3_providers::CompletionChunk;
|
||||
|
||||
// Helper to create a chunk
|
||||
fn chunk(content: &str, finished: bool) -> CompletionChunk {
|
||||
CompletionChunk {
|
||||
content: content.to_string(),
|
||||
finished,
|
||||
tool_calls: None,
|
||||
usage: None,
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Multiple tool calls in one response
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_multiple_tool_calls_in_single_chunk() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Two complete tool calls in one chunk
|
||||
let content = r#"Let me do two things:
|
||||
{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
||||
Now the second:
|
||||
{"tool": "shell", "args": {"command": "ls"}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
// Should detect at least one tool call
|
||||
// Note: Current implementation may only return the first one found
|
||||
assert!(!tools.is_empty(), "Should detect at least one tool call");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_tool_calls_across_chunks() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// First tool call
|
||||
let tools1 = parser.process_chunk(&chunk(
|
||||
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools1.len(), 1, "First tool call should be detected");
|
||||
assert_eq!(tools1[0].tool, "read_file");
|
||||
|
||||
// Reset parser (simulating what happens after tool execution)
|
||||
parser.reset();
|
||||
|
||||
// Second tool call
|
||||
let tools2 = parser.process_chunk(&chunk(
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools2.len(), 1, "Second tool call should be detected");
|
||||
assert_eq!(tools2[0].tool, "shell");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_first_complete_second_incomplete() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// First complete, second incomplete
|
||||
let content = r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}
|
||||
{"tool": "shell", "args": {"command": "ls"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
// Should detect the first complete tool call
|
||||
// The incomplete one should be detected by has_incomplete_tool_call
|
||||
assert!(parser.has_incomplete_tool_call(), "Should detect incomplete tool call");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Tool call followed by text
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_tool_call_with_trailing_text() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
let content = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
||||
|
||||
Here is the content of the file..."#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].tool, "read_file");
|
||||
|
||||
// The trailing text should be in the buffer
|
||||
let text = parser.get_text_content();
|
||||
assert!(text.contains("Here is the content"), "Trailing text should be preserved");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_before_tool_call() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
let content = r#"Let me read that file for you.
|
||||
|
||||
{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].tool, "read_file");
|
||||
|
||||
// The leading text should be in the buffer
|
||||
let text = parser.get_text_content();
|
||||
assert!(text.contains("Let me read"), "Leading text should be preserved");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_before_and_after_tool_call() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
let content = r#"I'll check the file.
|
||||
|
||||
{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
||||
|
||||
Done checking."#;
|
||||
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
assert_eq!(tools.len(), 1);
|
||||
|
||||
let text = parser.get_text_content();
|
||||
assert!(text.contains("I'll check"), "Leading text should be preserved");
|
||||
assert!(text.contains("Done checking"), "Trailing text should be preserved");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Incomplete tool calls at various truncation points
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_incomplete_after_tool_key() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
parser.process_chunk(&chunk(r#"{"tool":"#, false));
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incomplete_after_tool_name() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file""#, false));
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incomplete_after_args_key() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args":"#, false));
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incomplete_mid_args_object() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path":"#, false));
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incomplete_mid_string_value() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
parser.process_chunk(&chunk(r#"{"tool": "shell", "args": {"command": "ls -la /very/long/path"#, false));
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_incomplete_missing_final_brace() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}"#, false));
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_complete_tool_call_not_incomplete() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
|
||||
assert!(!parser.has_incomplete_tool_call(), "Complete tool call should not be marked incomplete");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Parser reset behavior
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_reset_clears_buffer() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
parser.process_chunk(&chunk("Some content here", false));
|
||||
assert!(!parser.get_text_content().is_empty());
|
||||
|
||||
parser.reset();
|
||||
|
||||
assert!(parser.get_text_content().is_empty(), "Buffer should be empty after reset");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reset_clears_incomplete_state() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Create incomplete tool call
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"#, false));
|
||||
assert!(parser.has_incomplete_tool_call());
|
||||
|
||||
parser.reset();
|
||||
|
||||
assert!(!parser.has_incomplete_tool_call(), "Incomplete state should be cleared after reset");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reset_clears_unexecuted_state() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Create complete but "unexecuted" tool call
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#, false));
|
||||
assert!(parser.has_unexecuted_tool_call());
|
||||
|
||||
parser.reset();
|
||||
|
||||
assert!(!parser.has_unexecuted_tool_call(), "Unexecuted state should be cleared after reset");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_reset_allows_new_tool_calls() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// First tool call
|
||||
let tools1 = parser.process_chunk(&chunk(
|
||||
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools1.len(), 1);
|
||||
|
||||
parser.reset();
|
||||
|
||||
// Second tool call after reset
|
||||
let tools2 = parser.process_chunk(&chunk(
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools2.len(), 1);
|
||||
assert_eq!(tools2[0].tool, "shell");
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Buffer management and edge cases
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_streaming_chunks_accumulate() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Stream in chunks
|
||||
parser.process_chunk(&chunk(r#"{"tool": "#, false));
|
||||
parser.process_chunk(&chunk(r#""read_file", "#, false));
|
||||
parser.process_chunk(&chunk(r#""args": {"file_path": "#, false));
|
||||
parser.process_chunk(&chunk(r#""test.txt"}}"#, false));
|
||||
|
||||
// Should have accumulated the complete tool call
|
||||
let text = parser.get_text_content();
|
||||
assert!(text.contains(r#""tool""#));
|
||||
assert!(text.contains(r#""read_file""#));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_finished_chunk_triggers_final_parse() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Incomplete chunks
|
||||
parser.process_chunk(&chunk(r#"{"tool": "read_file", "#, false));
|
||||
let tools1 = parser.process_chunk(&chunk(r#""args": {"file_path": "test.txt"}}"#, false));
|
||||
|
||||
// Tool should be detected before finished
|
||||
assert!(!tools1.is_empty() || !parser.has_unexecuted_tool_call(),
|
||||
"Tool should be detected during streaming or marked as unexecuted");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_empty_chunks_ignored() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
parser.process_chunk(&chunk("", false));
|
||||
parser.process_chunk(&chunk("", false));
|
||||
|
||||
assert!(parser.get_text_content().is_empty());
|
||||
assert!(!parser.has_incomplete_tool_call());
|
||||
assert!(!parser.has_unexecuted_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_whitespace_only_chunks() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
parser.process_chunk(&chunk(" \n\t ", false));
|
||||
|
||||
assert!(!parser.has_incomplete_tool_call());
|
||||
assert!(!parser.has_unexecuted_tool_call());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_with_escaped_quotes() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
let content = r#"{"tool": "shell", "args": {"command": "echo \"hello\""}}"#;
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].tool, "shell");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_with_escaped_backslashes() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
let content = r#"{"tool": "write_file", "args": {"file_path": "C:\\Users\\test.txt", "content": "data"}}"#;
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].tool, "write_file");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_json_with_nested_braces_in_string() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
let content = r#"{"tool": "write_file", "args": {"content": "{\"nested\": {\"json\": true}}"}}"#;
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
assert_eq!(tools.len(), 1);
|
||||
assert_eq!(tools[0].tool, "write_file");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_text_buffer_length_tracking() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
parser.process_chunk(&chunk("Hello", false));
|
||||
assert_eq!(parser.text_buffer_len(), 5);
|
||||
|
||||
parser.process_chunk(&chunk(" World", false));
|
||||
assert_eq!(parser.text_buffer_len(), 11);
|
||||
|
||||
parser.reset();
|
||||
assert_eq!(parser.text_buffer_len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_message_stopped_flag() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
parser.process_chunk(&chunk("Hello", false));
|
||||
assert!(!parser.is_message_stopped());
|
||||
|
||||
parser.process_chunk(&chunk(" World", true));
|
||||
assert!(parser.is_message_stopped());
|
||||
|
||||
parser.reset();
|
||||
assert!(!parser.is_message_stopped());
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Tool call pattern variations
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_tool_pattern_no_spaces() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let tools = parser.process_chunk(&chunk(
|
||||
r#"{"tool":"read_file","args":{"file_path":"test.txt"}}"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools.len(), 1);
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: mark_tool_calls_consumed functionality
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_mark_consumed_clears_unexecuted_state() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Add a complete tool call
|
||||
parser.process_chunk(&chunk(
|
||||
r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#,
|
||||
false
|
||||
));
|
||||
|
||||
// Should be detected as unexecuted
|
||||
assert!(parser.has_unexecuted_tool_call());
|
||||
|
||||
// Mark as consumed
|
||||
parser.mark_tool_calls_consumed();
|
||||
|
||||
// Should no longer be detected as unexecuted
|
||||
assert!(!parser.has_unexecuted_tool_call(),
|
||||
"After marking consumed, has_unexecuted_tool_call should return false");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mark_consumed_allows_new_tool_detection() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// First tool call
|
||||
parser.process_chunk(&chunk(
|
||||
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
||||
false
|
||||
));
|
||||
parser.mark_tool_calls_consumed();
|
||||
|
||||
// Second tool call (without reset)
|
||||
parser.process_chunk(&chunk(
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#,
|
||||
false
|
||||
));
|
||||
|
||||
// Should detect the new unexecuted tool call
|
||||
assert!(parser.has_unexecuted_tool_call(),
|
||||
"New tool call after consumed position should be detected");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_bare_brace_not_incomplete() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Just a bare opening brace - not a tool call pattern
|
||||
parser.process_chunk(&chunk(r#"{""#, false));
|
||||
|
||||
// Should NOT be detected as incomplete because it doesn't match tool patterns
|
||||
assert!(!parser.has_incomplete_tool_call(),
|
||||
"Bare {{ should not be detected as incomplete tool call");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_duplicate_tool_call_pattern() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Simulate the problematic pattern: tool call, garbage, duplicate tool call
|
||||
let content = concat!(
|
||||
r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#,
|
||||
"\n\n{\"\n\n",
|
||||
r#"{"tool": "str_replace", "args": {"file_path": "test.rs", "diff": "test"}}"#
|
||||
);
|
||||
let tools = parser.process_chunk(&chunk(content, false));
|
||||
|
||||
// Should detect at least one tool call
|
||||
assert!(!tools.is_empty(), "Should detect at least one tool call");
|
||||
|
||||
// After processing, there should be an unexecuted tool call (the duplicate)
|
||||
// because the parser only returns the first one it finds during streaming
|
||||
assert!(parser.has_unexecuted_tool_call(),
|
||||
"Should detect the duplicate as unexecuted");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_tool_calls_returned_on_finish() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
|
||||
// Two complete tool calls in one chunk, with finished=true
|
||||
let content = concat!(
|
||||
r#"{"tool": "read_file", "args": {"file_path": "a.txt"}}"#,
|
||||
"\nSome text\n",
|
||||
r#"{"tool": "shell", "args": {"command": "ls"}}"#
|
||||
);
|
||||
|
||||
// First, add content without finishing
|
||||
parser.process_chunk(&chunk(content, false));
|
||||
|
||||
// Now finish the stream - should return ALL tool calls
|
||||
let tools = parser.process_chunk(&chunk("", true));
|
||||
|
||||
// Should return both tool calls
|
||||
assert_eq!(tools.len(), 2, "Should return both tool calls when stream finishes");
|
||||
assert_eq!(tools[0].tool, "read_file");
|
||||
assert_eq!(tools[1].tool, "shell");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_pattern_extra_spaces() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let tools = parser.process_chunk(&chunk(
|
||||
r#"{ "tool" : "read_file" , "args" : { "file_path" : "test.txt" } }"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tool_pattern_with_newlines() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
// Note: The parser looks for specific patterns like {"tool": or { "tool":
|
||||
// Multi-line JSON with newlines between { and "tool" won't match
|
||||
// This is expected behavior - the pattern matching is intentionally strict
|
||||
let _tools = parser.process_chunk(&chunk(
|
||||
r#"{
|
||||
"tool": "read_file",
|
||||
"args": {
|
||||
"file_path": "test.txt"
|
||||
}
|
||||
}"#,
|
||||
false
|
||||
));
|
||||
// This won't be detected as a tool call due to newline after {
|
||||
// The has_unexecuted_tool_call check also won't find it
|
||||
// This is a known limitation of the pattern-based detection
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Test: Edge cases for has_message_like_keys validation
|
||||
// =============================================================================
|
||||
|
||||
#[test]
|
||||
fn test_normal_args_accepted() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
let tools = parser.process_chunk(&chunk(
|
||||
r#"{"tool": "read_file", "args": {"file_path": "test.txt", "start": 0, "end": 100}}"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_content_with_phrases_in_value_accepted() {
|
||||
let mut parser = StreamingToolParser::new();
|
||||
// Phrases like "I'll" in VALUES should be fine (only keys are checked)
|
||||
let tools = parser.process_chunk(&chunk(
|
||||
r#"{"tool": "write_file", "args": {"file_path": "test.txt", "content": "I'll help you with that. Let me explain."}}"#,
|
||||
false
|
||||
));
|
||||
assert_eq!(tools.len(), 1);
|
||||
}
|
||||
@@ -7,7 +7,7 @@ use std::path::{Path, PathBuf};
|
||||
use std::process::Stdio;
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing::{debug, error, warn};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::status::{FlockStatus, SegmentState, SegmentStatus};
|
||||
@@ -174,7 +174,7 @@ impl FlockMode {
|
||||
|
||||
/// Run flock mode
|
||||
pub async fn run(&mut self) -> Result<()> {
|
||||
info!(
|
||||
debug!(
|
||||
"Starting flock mode with {} segments",
|
||||
self.config.num_segments
|
||||
);
|
||||
@@ -625,7 +625,7 @@ async fn run_segment(
|
||||
status_file: PathBuf,
|
||||
session_id: String,
|
||||
) -> Result<SegmentStatus> {
|
||||
info!(
|
||||
debug!(
|
||||
"Starting segment {} in {}",
|
||||
segment_id,
|
||||
segment_dir.display()
|
||||
|
||||
@@ -3,7 +3,7 @@ use regex::Regex;
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
use tempfile::NamedTempFile;
|
||||
use tracing::{debug, error, info};
|
||||
use tracing::{debug, error};
|
||||
|
||||
/// Expand tilde (~) in a path to the user's home directory
|
||||
fn expand_tilde(path: &str) -> String {
|
||||
@@ -72,7 +72,7 @@ impl CodeExecutor {
|
||||
}
|
||||
|
||||
for (language, code) in code_blocks {
|
||||
info!("Executing {} code", language);
|
||||
debug!("Executing {} code", language);
|
||||
|
||||
if show_code {
|
||||
results.push(format!("📋 Running {} code:", language));
|
||||
@@ -459,7 +459,7 @@ pub fn is_cargo_llvm_cov_installed() -> Result<bool> {
|
||||
|
||||
/// Install llvm-tools-preview via rustup
|
||||
pub fn install_llvm_tools() -> Result<()> {
|
||||
info!("Installing llvm-tools-preview...");
|
||||
debug!("Installing llvm-tools-preview...");
|
||||
let output = Command::new("rustup")
|
||||
.args(&["component", "add", "llvm-tools-preview"])
|
||||
.output()?;
|
||||
@@ -469,13 +469,13 @@ pub fn install_llvm_tools() -> Result<()> {
|
||||
anyhow::bail!("Failed to install llvm-tools-preview: {}", stderr);
|
||||
}
|
||||
|
||||
info!("✅ llvm-tools-preview installed successfully");
|
||||
debug!("✅ llvm-tools-preview installed successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Install cargo-llvm-cov via cargo install
|
||||
pub fn install_cargo_llvm_cov() -> Result<()> {
|
||||
info!("Installing cargo-llvm-cov... (this may take a few minutes)");
|
||||
debug!("Installing cargo-llvm-cov... (this may take a few minutes)");
|
||||
let output = Command::new("cargo")
|
||||
.args(&["install", "cargo-llvm-cov"])
|
||||
.output()?;
|
||||
@@ -485,7 +485,7 @@ pub fn install_cargo_llvm_cov() -> Result<()> {
|
||||
anyhow::bail!("Failed to install cargo-llvm-cov: {}", stderr);
|
||||
}
|
||||
|
||||
info!("✅ cargo-llvm-cov installed successfully");
|
||||
debug!("✅ cargo-llvm-cov installed successfully");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -496,20 +496,20 @@ pub fn ensure_coverage_tools_installed() -> Result<bool> {
|
||||
|
||||
// Check and install llvm-tools-preview
|
||||
if !is_llvm_tools_installed()? {
|
||||
info!("llvm-tools-preview not found, installing...");
|
||||
debug!("llvm-tools-preview not found, installing...");
|
||||
install_llvm_tools()?;
|
||||
already_installed = false;
|
||||
} else {
|
||||
info!("✅ llvm-tools-preview is already installed");
|
||||
debug!("✅ llvm-tools-preview is already installed");
|
||||
}
|
||||
|
||||
// Check and install cargo-llvm-cov
|
||||
if !is_cargo_llvm_cov_installed()? {
|
||||
info!("cargo-llvm-cov not found, installing...");
|
||||
debug!("cargo-llvm-cov not found, installing...");
|
||||
install_cargo_llvm_cov()?;
|
||||
already_installed = false;
|
||||
} else {
|
||||
info!("✅ cargo-llvm-cov is already installed");
|
||||
debug!("✅ cargo-llvm-cov is already installed");
|
||||
}
|
||||
|
||||
Ok(already_installed)
|
||||
|
||||
@@ -328,7 +328,7 @@ impl AnthropicProvider {
|
||||
tracing::debug!("create_request_body called: max_tokens={}, disable_thinking={}, thinking_budget_tokens={:?}", max_tokens, disable_thinking, self.thinking_budget_tokens);
|
||||
|
||||
let thinking = if disable_thinking {
|
||||
tracing::info!(
|
||||
tracing::debug!(
|
||||
"Thinking mode explicitly disabled for this request (max_tokens={})",
|
||||
max_tokens
|
||||
);
|
||||
|
||||
@@ -64,7 +64,7 @@ use serde::{Deserialize, Serialize};
|
||||
use std::time::Duration;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tracing::{debug, error, info, warn};
|
||||
use tracing::{debug, error, warn};
|
||||
|
||||
use crate::{
|
||||
CompletionChunk, CompletionRequest, CompletionResponse, CompletionStream, LLMProvider, Message,
|
||||
@@ -166,7 +166,7 @@ impl DatabricksProvider {
|
||||
.build()
|
||||
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
"Initialized Databricks provider with model: {} on host: {}",
|
||||
model, host
|
||||
);
|
||||
@@ -196,7 +196,7 @@ impl DatabricksProvider {
|
||||
.build()
|
||||
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
|
||||
|
||||
info!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host);
|
||||
debug!("Initialized Databricks provider '{}' with model: {} on host: {}", name, model, host);
|
||||
|
||||
Ok(Self {
|
||||
client,
|
||||
@@ -220,7 +220,7 @@ impl DatabricksProvider {
|
||||
.build()
|
||||
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
|
||||
|
||||
info!(
|
||||
debug!(
|
||||
"Initialized Databricks provider with OAuth for model: {} on host: {}",
|
||||
model, host
|
||||
);
|
||||
@@ -249,7 +249,7 @@ impl DatabricksProvider {
|
||||
.build()
|
||||
.map_err(|e| anyhow!("Failed to create HTTP client: {}", e))?;
|
||||
|
||||
info!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host);
|
||||
debug!("Initialized Databricks provider '{}' with OAuth for model: {} on host: {}", name, model, host);
|
||||
|
||||
Ok(Self {
|
||||
client,
|
||||
@@ -857,7 +857,7 @@ impl LLMProvider for DatabricksProvider {
|
||||
if status == reqwest::StatusCode::FORBIDDEN
|
||||
&& (error_text.contains("Invalid Token") || error_text.contains("invalid_token"))
|
||||
{
|
||||
info!("Received 403 Invalid Token error, attempting to refresh OAuth token");
|
||||
debug!("Received 403 Invalid Token error, attempting to refresh OAuth token");
|
||||
|
||||
// Try to refresh the token if we're using OAuth
|
||||
if let DatabricksAuth::OAuth { .. } = &provider_clone.auth {
|
||||
@@ -867,7 +867,7 @@ impl LLMProvider for DatabricksProvider {
|
||||
// Try to get a new token (will attempt refresh or new OAuth flow)
|
||||
match provider_clone.auth.get_token().await {
|
||||
Ok(_new_token) => {
|
||||
info!("Successfully refreshed OAuth token, retrying request");
|
||||
debug!("Successfully refreshed OAuth token, retrying request");
|
||||
|
||||
// Retry the request with the new token
|
||||
response = provider_clone
|
||||
@@ -1038,7 +1038,7 @@ impl LLMProvider for DatabricksProvider {
|
||||
if status == reqwest::StatusCode::FORBIDDEN
|
||||
&& (error_text.contains("Invalid Token") || error_text.contains("invalid_token"))
|
||||
{
|
||||
info!("Received 403 Invalid Token error, attempting to refresh OAuth token");
|
||||
debug!("Received 403 Invalid Token error, attempting to refresh OAuth token");
|
||||
|
||||
// Try to refresh the token if we're using OAuth
|
||||
if let DatabricksAuth::OAuth { .. } = &provider_clone.auth {
|
||||
@@ -1048,7 +1048,7 @@ impl LLMProvider for DatabricksProvider {
|
||||
// Try to get a new token (will attempt refresh or new OAuth flow)
|
||||
match provider_clone.auth.get_token().await {
|
||||
Ok(_new_token) => {
|
||||
info!("Successfully refreshed OAuth token, retrying streaming request");
|
||||
debug!("Successfully refreshed OAuth token, retrying streaming request");
|
||||
|
||||
// Retry the request with the new token
|
||||
response = provider_clone
|
||||
|
||||
@@ -12,7 +12,7 @@ use std::sync::Arc;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::sync::Mutex;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tracing::{debug, error, info};
|
||||
use tracing::{debug, error};
|
||||
|
||||
pub struct EmbeddedProvider {
|
||||
session: Arc<Mutex<LlamaSession>>,
|
||||
@@ -32,7 +32,7 @@ impl EmbeddedProvider {
|
||||
gpu_layers: Option<u32>,
|
||||
threads: Option<u32>,
|
||||
) -> Result<Self> {
|
||||
info!("Loading embedded model from: {}", model_path);
|
||||
debug!("Loading embedded model from: {}", model_path);
|
||||
|
||||
// Expand tilde in path
|
||||
let expanded_path = shellexpand::tilde(&model_path);
|
||||
@@ -41,7 +41,7 @@ impl EmbeddedProvider {
|
||||
// If model doesn't exist and it's the default Qwen model, offer to download it
|
||||
if !model_path_buf.exists() {
|
||||
if model_path.contains("qwen2.5-7b-instruct-q3_k_m.gguf") {
|
||||
info!("Model file not found. Attempting to download Qwen 2.5 7B model...");
|
||||
debug!("Model file not found. Attempting to download Qwen 2.5 7B model...");
|
||||
Self::download_qwen_model(&model_path_buf)?;
|
||||
} else {
|
||||
anyhow::bail!("Model file not found: {}", model_path_buf.display());
|
||||
@@ -55,14 +55,14 @@ impl EmbeddedProvider {
|
||||
|
||||
if let Some(gpu_layers) = gpu_layers {
|
||||
params.n_gpu_layers = gpu_layers;
|
||||
info!("Using {} GPU layers", gpu_layers);
|
||||
debug!("Using {} GPU layers", gpu_layers);
|
||||
}
|
||||
|
||||
let context_size = context_length.unwrap_or(4096);
|
||||
info!("Using context length: {}", context_size);
|
||||
debug!("Using context length: {}", context_size);
|
||||
|
||||
// Load the model
|
||||
info!("Loading model...");
|
||||
debug!("Loading model...");
|
||||
let model = LlamaModel::load_from_file(model_path, params)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to load model: {}", e))?;
|
||||
|
||||
@@ -79,7 +79,7 @@ impl EmbeddedProvider {
|
||||
.create_session(session_params)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to create session: {}", e))?;
|
||||
|
||||
info!("Successfully loaded {} model", model_type);
|
||||
debug!("Successfully loaded {} model", model_type);
|
||||
|
||||
Ok(Self {
|
||||
session: Arc::new(Mutex::new(session)),
|
||||
@@ -330,7 +330,7 @@ impl EmbeddedProvider {
|
||||
Ok(inner_result) => match inner_result {
|
||||
Ok(task_result) => match task_result {
|
||||
Ok((text, token_count)) => {
|
||||
info!(
|
||||
debug!(
|
||||
"Completed generation: {} tokens (dynamic limit was {})",
|
||||
token_count, dynamic_max_tokens
|
||||
);
|
||||
@@ -448,9 +448,9 @@ impl EmbeddedProvider {
|
||||
fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
info!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)...");
|
||||
info!("This is a one-time download that may take several minutes depending on your connection.");
|
||||
info!("Downloading to: {}", model_path.display());
|
||||
debug!("Downloading Qwen 2.5 7B model (Q3_K_M quantization, ~3.5GB)...");
|
||||
debug!("This is a one-time download that may take several minutes depending on your connection.");
|
||||
debug!("Downloading to: {}", model_path.display());
|
||||
|
||||
// Use curl with progress bar for download
|
||||
let output = Command::new("curl")
|
||||
@@ -497,7 +497,7 @@ impl EmbeddedProvider {
|
||||
);
|
||||
}
|
||||
|
||||
info!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb);
|
||||
debug!("Successfully downloaded Qwen 2.5 7B model ({}MB)", size_mb);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -392,7 +392,7 @@ pub async fn get_oauth_token_async(
|
||||
if let Err(e) = token_cache.save_token(&new_token) {
|
||||
tracing::warn!("Failed to save refreshed token: {}", e);
|
||||
}
|
||||
tracing::info!("Successfully refreshed token");
|
||||
tracing::debug!("Successfully refreshed token");
|
||||
return Ok(new_token.access_token);
|
||||
}
|
||||
Err(e) => {
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
# Clean logs first
|
||||
rm -rf ~/RustroverProjects/g3/logs/*.log ~/RustroverProjects/g3/logs/*.txt 2>/dev/null || true
|
||||
|
||||
# Create test requirements file
|
||||
mkdir -p /tmp/g3-test-planning/g3-plan
|
||||
cat > /tmp/g3-test-planning/g3-plan/new_requirements.md <<'EOF'
|
||||
Simple test task: List all .rs files in the src directory.
|
||||
EOF
|
||||
|
||||
# Initialize git repo for test (planning mode requires git)
|
||||
cd /tmp/g3-test-planning
|
||||
if [ ! -d .git ]; then
|
||||
git init
|
||||
git config user.name "Test User"
|
||||
git config user.email "test@example.com"
|
||||
git add .
|
||||
git commit -m "Initial commit" || true
|
||||
fi
|
||||
|
||||
echo "Test environment ready at /tmp/g3-test-planning"
|
||||
echo "Run: cd /tmp && ~/RustroverProjects/g3/target/release/g3 --planning --codepath /tmp/g3-test-planning --no-git"
|
||||
Reference in New Issue
Block a user