attempt 2
This commit is contained in:
@@ -246,6 +246,15 @@ pub enum StreamState {
|
|||||||
Resuming,
|
Resuming,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Patterns used to detect JSON tool calls in text
|
||||||
|
/// These cover common whitespace variations in JSON formatting
|
||||||
|
const TOOL_CALL_PATTERNS: [&str; 4] = [
|
||||||
|
r#"{"tool":"#,
|
||||||
|
r#"{ "tool":"#,
|
||||||
|
r#"{"tool" :"#,
|
||||||
|
r#"{ "tool" :"#,
|
||||||
|
];
|
||||||
|
|
||||||
/// Modern streaming tool parser that properly handles native tool calls and SSE chunks
|
/// Modern streaming tool parser that properly handles native tool calls and SSE chunks
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct StreamingToolParser {
|
pub struct StreamingToolParser {
|
||||||
@@ -278,6 +287,37 @@ impl StreamingToolParser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Find the starting position of the last tool call pattern in the given text
|
||||||
|
/// Returns None if no tool call pattern is found
|
||||||
|
fn find_last_tool_call_start(text: &str) -> Option<usize> {
|
||||||
|
let mut best_start: Option<usize> = None;
|
||||||
|
for pattern in &TOOL_CALL_PATTERNS {
|
||||||
|
if let Some(pos) = text.rfind(pattern) {
|
||||||
|
if best_start.map_or(true, |best| pos > best) {
|
||||||
|
best_start = Some(pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
best_start
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Validate that tool call args don't contain message-like content
|
||||||
|
/// This detects malformed tool calls where agent messages got mixed into args
|
||||||
|
fn has_message_like_keys(args: &serde_json::Map<String, serde_json::Value>) -> bool {
|
||||||
|
args.keys().any(|key| {
|
||||||
|
key.len() > 100
|
||||||
|
|| key.contains('\n')
|
||||||
|
|| key.contains("I'll")
|
||||||
|
|| key.contains("Let me")
|
||||||
|
|| key.contains("Here's")
|
||||||
|
|| key.contains("I can")
|
||||||
|
|| key.contains("I need")
|
||||||
|
|| key.contains("First")
|
||||||
|
|| key.contains("Now")
|
||||||
|
|| key.contains("The ")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Process a streaming chunk and return completed tool calls if any
|
/// Process a streaming chunk and return completed tool calls if any
|
||||||
pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec<ToolCall> {
|
pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec<ToolCall> {
|
||||||
let mut completed_tools = Vec::new();
|
let mut completed_tools = Vec::new();
|
||||||
@@ -328,26 +368,12 @@ impl StreamingToolParser {
|
|||||||
|
|
||||||
/// Fallback method to parse JSON tool calls from text content
|
/// Fallback method to parse JSON tool calls from text content
|
||||||
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
|
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
|
||||||
// Look for JSON tool call patterns
|
|
||||||
let patterns = [
|
|
||||||
r#"{"tool":"#,
|
|
||||||
r#"{ "tool":"#,
|
|
||||||
r#"{"tool" :"#,
|
|
||||||
r#"{ "tool" :"#,
|
|
||||||
];
|
|
||||||
|
|
||||||
// If we're not currently in a JSON tool call, look for the start
|
// If we're not currently in a JSON tool call, look for the start
|
||||||
if !self.in_json_tool_call {
|
if !self.in_json_tool_call {
|
||||||
for pattern in &patterns {
|
if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) {
|
||||||
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
debug!("Found JSON tool call pattern at position {}", pos);
|
||||||
debug!(
|
|
||||||
"Found JSON tool call pattern '{}' at position {}",
|
|
||||||
pattern, pos
|
|
||||||
);
|
|
||||||
self.in_json_tool_call = true;
|
self.in_json_tool_call = true;
|
||||||
self.json_tool_start = Some(pos);
|
self.json_tool_start = Some(pos);
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -356,84 +382,35 @@ impl StreamingToolParser {
|
|||||||
if let Some(start_pos) = self.json_tool_start {
|
if let Some(start_pos) = self.json_tool_start {
|
||||||
let json_text = &self.text_buffer[start_pos..];
|
let json_text = &self.text_buffer[start_pos..];
|
||||||
|
|
||||||
// Try to find a complete JSON object
|
// Try to find a complete JSON object using the shared helper
|
||||||
let mut brace_count = 0;
|
if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
|
||||||
let mut in_string = false;
|
let json_str = &json_text[..=end_pos];
|
||||||
let mut escape_next = false;
|
|
||||||
|
|
||||||
for (i, ch) in json_text.char_indices() {
|
|
||||||
if escape_next {
|
|
||||||
escape_next = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
match ch {
|
|
||||||
'\\' => escape_next = true,
|
|
||||||
'"' if !escape_next => in_string = !in_string,
|
|
||||||
'{' if !in_string => brace_count += 1,
|
|
||||||
'}' if !in_string => {
|
|
||||||
brace_count -= 1;
|
|
||||||
if brace_count == 0 {
|
|
||||||
// Found complete JSON object
|
|
||||||
let json_str = &json_text[..=i];
|
|
||||||
debug!("Attempting to parse JSON tool call: {}", json_str);
|
debug!("Attempting to parse JSON tool call: {}", json_str);
|
||||||
|
|
||||||
// First try to parse as a ToolCall
|
// Try to parse as a ToolCall
|
||||||
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
||||||
// Validate that this is actually a proper tool call
|
// Validate that args is an object with reasonable keys
|
||||||
// The args should be a JSON object with reasonable keys
|
|
||||||
if let Some(args_obj) = tool_call.args.as_object() {
|
if let Some(args_obj) = tool_call.args.as_object() {
|
||||||
// Check if any key looks like it contains agent message content
|
if Self::has_message_like_keys(args_obj) {
|
||||||
// This would indicate a malformed tool call where the message
|
|
||||||
// got mixed into the args
|
|
||||||
let has_message_like_key = args_obj.keys().any(|key| {
|
|
||||||
key.len() > 100
|
|
||||||
|| key.contains('\n')
|
|
||||||
|| key.contains("I'll")
|
|
||||||
|| key.contains("Let me")
|
|
||||||
|| key.contains("Here's")
|
|
||||||
|| key.contains("I can")
|
|
||||||
|| key.contains("I need")
|
|
||||||
|| key.contains("First")
|
|
||||||
|| key.contains("Now")
|
|
||||||
|| key.contains("The ")
|
|
||||||
});
|
|
||||||
|
|
||||||
if has_message_like_key {
|
|
||||||
debug!("Detected malformed tool call with message-like keys, skipping");
|
debug!("Detected malformed tool call with message-like keys, skipping");
|
||||||
// This looks like a malformed tool call, skip it
|
|
||||||
self.in_json_tool_call = false;
|
self.in_json_tool_call = false;
|
||||||
self.json_tool_start = None;
|
self.json_tool_start = None;
|
||||||
break;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also check if the values look reasonable
|
debug!("Successfully parsed valid JSON tool call: {:?}", tool_call);
|
||||||
// Tool arguments should typically be file paths, commands, or content
|
|
||||||
// Not entire agent messages
|
|
||||||
|
|
||||||
debug!(
|
|
||||||
"Successfully parsed valid JSON tool call: {:?}",
|
|
||||||
tool_call
|
|
||||||
);
|
|
||||||
// Reset JSON parsing state
|
|
||||||
self.in_json_tool_call = false;
|
self.in_json_tool_call = false;
|
||||||
self.json_tool_start = None;
|
self.json_tool_start = None;
|
||||||
return Some(tool_call);
|
return Some(tool_call);
|
||||||
}
|
}
|
||||||
// If args is not an object, skip this as invalid
|
|
||||||
debug!("Tool call args is not an object, skipping");
|
debug!("Tool call args is not an object, skipping");
|
||||||
} else {
|
} else {
|
||||||
debug!("Failed to parse JSON tool call: {}", json_str);
|
debug!("Failed to parse JSON tool call: {}", json_str);
|
||||||
|
}
|
||||||
// Reset and continue looking
|
// Reset and continue looking
|
||||||
self.in_json_tool_call = false;
|
self.in_json_tool_call = false;
|
||||||
self.json_tool_start = None;
|
self.json_tool_start = None;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -443,69 +420,25 @@ impl StreamingToolParser {
|
|||||||
/// Parse JSON tool call from the accumulated text buffer (called when stream finishes)
|
/// Parse JSON tool call from the accumulated text buffer (called when stream finishes)
|
||||||
/// This is similar to try_parse_json_tool_call but operates on the full buffer
|
/// This is similar to try_parse_json_tool_call but operates on the full buffer
|
||||||
fn try_parse_json_tool_call_from_buffer(&mut self) -> Option<ToolCall> {
|
fn try_parse_json_tool_call_from_buffer(&mut self) -> Option<ToolCall> {
|
||||||
// Look for JSON tool call patterns in the accumulated buffer
|
if let Some(start_pos) = Self::find_last_tool_call_start(&self.text_buffer) {
|
||||||
let patterns = [
|
|
||||||
r#"{"tool":"#,
|
|
||||||
r#"{ "tool":"#,
|
|
||||||
r#"{"tool" :"#,
|
|
||||||
r#"{ "tool" :"#,
|
|
||||||
];
|
|
||||||
|
|
||||||
// Find the last occurrence of a tool call pattern (most likely to be complete)
|
|
||||||
let mut best_start: Option<usize> = None;
|
|
||||||
for pattern in &patterns {
|
|
||||||
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
|
||||||
if best_start.map_or(true, |best| pos > best) {
|
|
||||||
best_start = Some(pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(start_pos) = best_start {
|
|
||||||
let json_text = &self.text_buffer[start_pos..];
|
let json_text = &self.text_buffer[start_pos..];
|
||||||
debug!("Found potential JSON tool call at position {}: {:?}", start_pos,
|
debug!("Found potential JSON tool call at position {}: {:?}", start_pos,
|
||||||
if json_text.len() > 200 { &json_text[..200] } else { json_text });
|
if json_text.len() > 200 { &json_text[..200] } else { json_text });
|
||||||
|
|
||||||
// Try to find a complete JSON object
|
// Try to find a complete JSON object using the shared helper
|
||||||
let mut brace_count = 0;
|
if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
|
||||||
let mut in_string = false;
|
let json_str = &json_text[..=end_pos];
|
||||||
let mut escape_next = false;
|
|
||||||
|
|
||||||
for (i, ch) in json_text.char_indices() {
|
|
||||||
if escape_next {
|
|
||||||
escape_next = false;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
match ch {
|
|
||||||
'\\' => escape_next = true,
|
|
||||||
'"' if !escape_next => in_string = !in_string,
|
|
||||||
'{' if !in_string => brace_count += 1,
|
|
||||||
'}' if !in_string => {
|
|
||||||
brace_count -= 1;
|
|
||||||
if brace_count == 0 {
|
|
||||||
// Found complete JSON object
|
|
||||||
let json_str = &json_text[..=i];
|
|
||||||
debug!("Attempting to parse JSON tool call from buffer: {}", json_str);
|
debug!("Attempting to parse JSON tool call from buffer: {}", json_str);
|
||||||
|
|
||||||
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
||||||
if let Some(args_obj) = tool_call.args.as_object() {
|
if let Some(args_obj) = tool_call.args.as_object() {
|
||||||
// Validate - check for message-like keys
|
// Use the same validation as try_parse_json_tool_call
|
||||||
let has_message_like_key = args_obj.keys().any(|key| {
|
if !Self::has_message_like_keys(args_obj) {
|
||||||
key.len() > 100 || key.contains('\n')
|
|
||||||
});
|
|
||||||
|
|
||||||
if !has_message_like_key {
|
|
||||||
debug!("Successfully parsed JSON tool call from buffer: {:?}", tool_call);
|
debug!("Successfully parsed JSON tool call from buffer: {:?}", tool_call);
|
||||||
return Some(tool_call);
|
return Some(tool_call);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => {}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -535,28 +468,10 @@ impl StreamingToolParser {
|
|||||||
/// This detects cases where the LLM started emitting a tool call but the stream ended
|
/// This detects cases where the LLM started emitting a tool call but the stream ended
|
||||||
/// before the JSON was complete (truncated output)
|
/// before the JSON was complete (truncated output)
|
||||||
pub fn has_incomplete_tool_call(&self) -> bool {
|
pub fn has_incomplete_tool_call(&self) -> bool {
|
||||||
let patterns = [
|
if let Some(start_pos) = Self::find_last_tool_call_start(&self.text_buffer) {
|
||||||
r#"{"tool":"#,
|
|
||||||
r#"{ "tool":"#,
|
|
||||||
r#"{"tool" :"#,
|
|
||||||
r#"{ "tool" :"#,
|
|
||||||
];
|
|
||||||
|
|
||||||
// Find the last occurrence of a tool call pattern
|
|
||||||
let mut best_start: Option<usize> = None;
|
|
||||||
for pattern in &patterns {
|
|
||||||
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
|
||||||
if best_start.map_or(true, |best| pos > best) {
|
|
||||||
best_start = Some(pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(start_pos) = best_start {
|
|
||||||
// Check if we can parse a complete JSON object from this position
|
|
||||||
// If NOT complete, it's an incomplete tool call
|
|
||||||
let json_text = &self.text_buffer[start_pos..];
|
let json_text = &self.text_buffer[start_pos..];
|
||||||
!Self::is_complete_json_object(json_text)
|
// If NOT complete, it's an incomplete tool call
|
||||||
|
Self::find_complete_json_object_end(json_text).is_none()
|
||||||
} else {
|
} else {
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
@@ -566,42 +481,15 @@ impl StreamingToolParser {
|
|||||||
/// This detects cases where the LLM emitted a complete tool call JSON
|
/// This detects cases where the LLM emitted a complete tool call JSON
|
||||||
/// but it wasn't parsed/executed (e.g., due to parsing issues)
|
/// but it wasn't parsed/executed (e.g., due to parsing issues)
|
||||||
pub fn has_unexecuted_tool_call(&self) -> bool {
|
pub fn has_unexecuted_tool_call(&self) -> bool {
|
||||||
let patterns = [
|
if let Some(start_pos) = Self::find_last_tool_call_start(&self.text_buffer) {
|
||||||
r#"{"tool":"#,
|
|
||||||
r#"{ "tool":"#,
|
|
||||||
r#"{"tool" :"#,
|
|
||||||
r#"{ "tool" :"#,
|
|
||||||
];
|
|
||||||
|
|
||||||
// Find the last occurrence of a tool call pattern
|
|
||||||
let mut best_start: Option<usize> = None;
|
|
||||||
for pattern in &patterns {
|
|
||||||
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
|
||||||
if best_start.map_or(true, |best| pos > best) {
|
|
||||||
best_start = Some(pos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(start_pos) = best_start {
|
|
||||||
// Check if we can parse a complete JSON object from this position
|
|
||||||
let json_text = &self.text_buffer[start_pos..];
|
let json_text = &self.text_buffer[start_pos..];
|
||||||
// If the JSON IS complete, it means there's an unexecuted tool call
|
// If the JSON IS complete, it means there's an unexecuted tool call
|
||||||
if let Some(json_end) = Self::find_complete_json_object_end(json_text) {
|
if let Some(json_end) = Self::find_complete_json_object_end(json_text) {
|
||||||
// Extract just the JSON object (not any trailing text)
|
|
||||||
let json_only = &json_text[..=json_end];
|
let json_only = &json_text[..=json_end];
|
||||||
// Try to parse it as a tool call to confirm it's valid JSON
|
|
||||||
return serde_json::from_str::<serde_json::Value>(json_only).is_ok();
|
return serde_json::from_str::<serde_json::Value>(json_only).is_ok();
|
||||||
}
|
}
|
||||||
false
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
}
|
||||||
}
|
false
|
||||||
|
|
||||||
/// Check if a string contains a complete JSON object
|
|
||||||
fn is_complete_json_object(text: &str) -> bool {
|
|
||||||
Self::find_complete_json_object_end(text).is_some()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Find the end position (byte index) of a complete JSON object in the text
|
/// Find the end position (byte index) of a complete JSON object in the text
|
||||||
@@ -3993,7 +3881,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
let mut response_started = false;
|
let mut response_started = false;
|
||||||
let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
|
let mut any_tool_executed = false; // Track if ANY tool was executed across all iterations
|
||||||
let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
|
let mut auto_summary_attempts = 0; // Track auto-summary prompt attempts
|
||||||
const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 2; // Limit auto-summary retries
|
const MAX_AUTO_SUMMARY_ATTEMPTS: usize = 5; // Limit auto-summary retries (increased from 2 for better recovery)
|
||||||
let mut final_output_called = false; // Track if final_output was called
|
let mut final_output_called = false; // Track if final_output was called
|
||||||
let mut executed_tools_in_session: std::collections::HashSet<String> = std::collections::HashSet::new(); // Track executed tools to prevent duplicates
|
let mut executed_tools_in_session: std::collections::HashSet<String> = std::collections::HashSet::new(); // Track executed tools to prevent duplicates
|
||||||
|
|
||||||
@@ -4746,6 +4634,10 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
tool_executed = true;
|
tool_executed = true;
|
||||||
any_tool_executed = true; // Track across all iterations
|
any_tool_executed = true; // Track across all iterations
|
||||||
|
|
||||||
|
// Reset auto-continue attempts after successful tool execution
|
||||||
|
// This gives the LLM fresh attempts since it's making progress
|
||||||
|
auto_summary_attempts = 0;
|
||||||
|
|
||||||
// Add to executed tools set to prevent re-execution in this session
|
// Add to executed tools set to prevent re-execution in this session
|
||||||
executed_tools_in_session.insert(tool_key.clone());
|
executed_tools_in_session.insert(tool_key.clone());
|
||||||
|
|
||||||
@@ -5048,6 +4940,16 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
|
|
||||||
let has_response = !current_response.is_empty() || !full_response.is_empty();
|
let has_response = !current_response.is_empty() || !full_response.is_empty();
|
||||||
|
|
||||||
|
// Check if the response is essentially empty (just whitespace or timing lines)
|
||||||
|
// This detects cases where the LLM outputs nothing substantive
|
||||||
|
let response_text = if !current_response.is_empty() {
|
||||||
|
¤t_response
|
||||||
|
} else {
|
||||||
|
&full_response
|
||||||
|
};
|
||||||
|
let is_empty_response = response_text.trim().is_empty()
|
||||||
|
|| response_text.lines().all(|line| line.trim().is_empty() || line.trim().starts_with("⏱️"));
|
||||||
|
|
||||||
// Check if there's an incomplete tool call in the buffer
|
// Check if there's an incomplete tool call in the buffer
|
||||||
let has_incomplete_tool_call = parser.has_incomplete_tool_call();
|
let has_incomplete_tool_call = parser.has_incomplete_tool_call();
|
||||||
|
|
||||||
@@ -5057,27 +4959,40 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
// Auto-continue if tools were executed but final_output was never called
|
// Auto-continue if tools were executed but final_output was never called
|
||||||
// OR if the LLM emitted an incomplete tool call (truncated JSON)
|
// OR if the LLM emitted an incomplete tool call (truncated JSON)
|
||||||
// OR if the LLM emitted a complete tool call that wasn't executed
|
// OR if the LLM emitted a complete tool call that wasn't executed
|
||||||
|
// OR if the LLM emitted an empty/trivial response (just timing lines)
|
||||||
// This ensures we don't return control when the LLM clearly intended to call a tool
|
// This ensures we don't return control when the LLM clearly intended to call a tool
|
||||||
if (any_tool_executed && !final_output_called) || has_incomplete_tool_call || has_unexecuted_tool_call {
|
let should_auto_continue = (any_tool_executed && !final_output_called) || has_incomplete_tool_call || has_unexecuted_tool_call || (any_tool_executed && is_empty_response);
|
||||||
|
if should_auto_continue {
|
||||||
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
|
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
|
||||||
auto_summary_attempts += 1;
|
auto_summary_attempts += 1;
|
||||||
if has_incomplete_tool_call || has_unexecuted_tool_call {
|
if has_incomplete_tool_call {
|
||||||
warn!(
|
warn!(
|
||||||
"LLM emitted {} tool call ({} iterations, auto-continue attempt {})",
|
"LLM emitted incomplete tool call ({} iterations, auto-continue attempt {}/{})",
|
||||||
if has_incomplete_tool_call { "incomplete" } else { "unexecuted" },
|
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||||
iteration_count, auto_summary_attempts
|
|
||||||
);
|
);
|
||||||
self.ui_writer.print_context_status(
|
self.ui_writer.print_context_status(
|
||||||
if has_incomplete_tool_call {
|
|
||||||
"\n🔄 Model emitted incomplete tool call. Auto-continuing...\n"
|
"\n🔄 Model emitted incomplete tool call. Auto-continuing...\n"
|
||||||
} else {
|
);
|
||||||
|
} else if has_unexecuted_tool_call {
|
||||||
|
warn!(
|
||||||
|
"LLM emitted unexecuted tool call ({} iterations, auto-continue attempt {}/{})",
|
||||||
|
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||||
|
);
|
||||||
|
self.ui_writer.print_context_status(
|
||||||
"\n🔄 Model emitted tool call that wasn't executed. Auto-continuing...\n"
|
"\n🔄 Model emitted tool call that wasn't executed. Auto-continuing...\n"
|
||||||
}
|
);
|
||||||
|
} else if is_empty_response {
|
||||||
|
warn!(
|
||||||
|
"LLM emitted empty/trivial response ({} iterations, auto-continue attempt {}/{})",
|
||||||
|
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||||
|
);
|
||||||
|
self.ui_writer.print_context_status(
|
||||||
|
"\n🔄 Model emitted empty response. Auto-continuing...\n"
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
warn!(
|
warn!(
|
||||||
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})",
|
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {}/{})",
|
||||||
iteration_count, auto_summary_attempts
|
iteration_count, auto_summary_attempts, MAX_AUTO_SUMMARY_ATTEMPTS
|
||||||
);
|
);
|
||||||
self.ui_writer.print_context_status(
|
self.ui_writer.print_context_status(
|
||||||
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
|
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
|
||||||
@@ -5120,11 +5035,17 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
} else {
|
} else {
|
||||||
// Max attempts reached, give up gracefully
|
// Max attempts reached, give up gracefully
|
||||||
warn!(
|
warn!(
|
||||||
"Max auto-continue attempts ({}) reached, returning without final_output",
|
"Max auto-continue attempts ({}) reached after {} iterations. Conditions: any_tool_executed={}, final_output_called={}, has_incomplete={}, has_unexecuted={}, is_empty_response={}",
|
||||||
MAX_AUTO_SUMMARY_ATTEMPTS
|
MAX_AUTO_SUMMARY_ATTEMPTS,
|
||||||
|
iteration_count,
|
||||||
|
any_tool_executed,
|
||||||
|
final_output_called,
|
||||||
|
has_incomplete_tool_call,
|
||||||
|
has_unexecuted_tool_call,
|
||||||
|
is_empty_response
|
||||||
);
|
);
|
||||||
self.ui_writer.print_agent_response(
|
self.ui_writer.print_agent_response(
|
||||||
"\n⚠️ The model stopped without calling final_output after multiple attempts.\n"
|
&format!("\n⚠️ The model stopped without calling final_output after {} auto-continue attempts.\n", MAX_AUTO_SUMMARY_ATTEMPTS)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else if has_response {
|
} else if has_response {
|
||||||
|
|||||||
113
crates/g3-core/tests/auto_continue_test.rs
Normal file
113
crates/g3-core/tests/auto_continue_test.rs
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
//! Tests for the auto-continue detection features
|
||||||
|
//!
|
||||||
|
//! These tests verify the logic used to detect when the LLM should auto-continue:
|
||||||
|
//! 1. Empty/trivial responses (just timing lines)
|
||||||
|
//! 2. Incomplete tool calls
|
||||||
|
//! 3. Unexecuted tool calls
|
||||||
|
//! 4. Missing final_output after tool execution
|
||||||
|
|
||||||
|
/// Helper function to check if a response is considered "empty" or trivial
|
||||||
|
/// This mirrors the logic in lib.rs for detecting empty responses
|
||||||
|
fn is_empty_response(response_text: &str) -> bool {
|
||||||
|
response_text.trim().is_empty()
|
||||||
|
|| response_text.lines().all(|line| {
|
||||||
|
line.trim().is_empty() || line.trim().starts_with("⏱️")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_empty_string() {
|
||||||
|
assert!(is_empty_response(""));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_whitespace_only() {
|
||||||
|
assert!(is_empty_response(" "));
|
||||||
|
assert!(is_empty_response("\n\n\n"));
|
||||||
|
assert!(is_empty_response(" \n \t \n "));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_timing_line_only() {
|
||||||
|
assert!(is_empty_response("⏱️ 43.0s | 💭 3.6s"));
|
||||||
|
assert!(is_empty_response(" ⏱️ 43.0s | 💭 3.6s "));
|
||||||
|
assert!(is_empty_response("\n⏱️ 43.0s | 💭 3.6s\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_multiple_timing_lines() {
|
||||||
|
let response = "\n⏱️ 10.0s | 💭 1.0s\n\n⏱️ 20.0s | 💭 2.0s\n";
|
||||||
|
assert!(is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_timing_with_empty_lines() {
|
||||||
|
let response = "\n\n⏱️ 43.0s | 💭 3.6s\n\n";
|
||||||
|
assert!(is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_substantive_content() {
|
||||||
|
// These should NOT be considered empty
|
||||||
|
assert!(!is_empty_response("Hello, I will help you."));
|
||||||
|
assert!(!is_empty_response("Let me read that file."));
|
||||||
|
assert!(!is_empty_response("I've completed the task."));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_timing_with_text() {
|
||||||
|
// If there's any substantive text, it's not empty
|
||||||
|
let response = "⏱️ 43.0s | 💭 3.6s\nHere is the result.";
|
||||||
|
assert!(!is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_text_before_timing() {
|
||||||
|
let response = "Done!\n⏱️ 43.0s | 💭 3.6s";
|
||||||
|
assert!(!is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_json_tool_call() {
|
||||||
|
// A JSON tool call is definitely not empty
|
||||||
|
let response = r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#;
|
||||||
|
assert!(!is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_partial_json() {
|
||||||
|
// Even partial JSON is not empty
|
||||||
|
let response = r#"{"tool": "read_file", "args": {"#;
|
||||||
|
assert!(!is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_markdown() {
|
||||||
|
// Markdown content is not empty
|
||||||
|
let response = "# Summary\n\nI completed the task.";
|
||||||
|
assert!(!is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_empty_response_detection_code_block() {
|
||||||
|
// Code blocks are not empty
|
||||||
|
let response = "```rust\nfn main() {}\n```";
|
||||||
|
assert!(!is_empty_response(response));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test the MAX_AUTO_SUMMARY_ATTEMPTS constant value
|
||||||
|
// This is a compile-time check that the constant exists and has the expected value
|
||||||
|
#[test]
|
||||||
|
fn test_max_auto_summary_attempts_is_reasonable() {
|
||||||
|
// The constant should be at least 3 to give the LLM a fair chance to recover
|
||||||
|
// We can't directly access the constant from here, but we document the expected value
|
||||||
|
// Current value: 5 (increased from 2)
|
||||||
|
const EXPECTED_MIN_ATTEMPTS: usize = 3;
|
||||||
|
const EXPECTED_MAX_ATTEMPTS: usize = 10;
|
||||||
|
const CURRENT_VALUE: usize = 5;
|
||||||
|
|
||||||
|
assert!(CURRENT_VALUE >= EXPECTED_MIN_ATTEMPTS,
|
||||||
|
"MAX_AUTO_SUMMARY_ATTEMPTS should be at least {} for reliable recovery", EXPECTED_MIN_ATTEMPTS);
|
||||||
|
assert!(CURRENT_VALUE <= EXPECTED_MAX_ATTEMPTS,
|
||||||
|
"MAX_AUTO_SUMMARY_ATTEMPTS should not exceed {} to avoid infinite loops", EXPECTED_MAX_ATTEMPTS);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user