attempted fix of autocontinue
This commit is contained in:
@@ -68,6 +68,18 @@ fn main() {
|
|||||||
dylib_dst.display()
|
dylib_dst.display()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Re-sign the dylib with ad-hoc signature to fix code signing issues on Apple Silicon
|
||||||
|
// This is necessary because incremental compilation can invalidate signatures
|
||||||
|
let codesign_status = Command::new("codesign")
|
||||||
|
.args(&["-f", "-s", "-", dylib_dst.to_str().unwrap()])
|
||||||
|
.status();
|
||||||
|
|
||||||
|
if let Ok(status) = codesign_status {
|
||||||
|
if !status.success() {
|
||||||
|
println!("cargo:warning=Failed to codesign libVisionBridge.dylib (non-fatal)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Add rpath so the dylib can be found at runtime
|
// Add rpath so the dylib can be found at runtime
|
||||||
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
|
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
|
||||||
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
||||||
|
|||||||
@@ -531,6 +531,113 @@ impl StreamingToolParser {
|
|||||||
self.message_stopped
|
self.message_stopped
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if the text buffer contains an incomplete JSON tool call
|
||||||
|
/// This detects cases where the LLM started emitting a tool call but the stream ended
|
||||||
|
/// before the JSON was complete (truncated output)
|
||||||
|
pub fn has_incomplete_tool_call(&self) -> bool {
|
||||||
|
let patterns = [
|
||||||
|
r#"{"tool":"#,
|
||||||
|
r#"{ "tool":"#,
|
||||||
|
r#"{"tool" :"#,
|
||||||
|
r#"{ "tool" :"#,
|
||||||
|
];
|
||||||
|
|
||||||
|
// Find the last occurrence of a tool call pattern
|
||||||
|
let mut best_start: Option<usize> = None;
|
||||||
|
for pattern in &patterns {
|
||||||
|
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
||||||
|
if best_start.map_or(true, |best| pos > best) {
|
||||||
|
best_start = Some(pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(start_pos) = best_start {
|
||||||
|
// Check if we can parse a complete JSON object from this position
|
||||||
|
// If NOT complete, it's an incomplete tool call
|
||||||
|
let json_text = &self.text_buffer[start_pos..];
|
||||||
|
!Self::is_complete_json_object(json_text)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the text buffer contains an unexecuted tool call
|
||||||
|
/// This detects cases where the LLM emitted a complete tool call JSON
|
||||||
|
/// but it wasn't parsed/executed (e.g., due to parsing issues)
|
||||||
|
pub fn has_unexecuted_tool_call(&self) -> bool {
|
||||||
|
let patterns = [
|
||||||
|
r#"{"tool":"#,
|
||||||
|
r#"{ "tool":"#,
|
||||||
|
r#"{"tool" :"#,
|
||||||
|
r#"{ "tool" :"#,
|
||||||
|
];
|
||||||
|
|
||||||
|
// Find the last occurrence of a tool call pattern
|
||||||
|
let mut best_start: Option<usize> = None;
|
||||||
|
for pattern in &patterns {
|
||||||
|
if let Some(pos) = self.text_buffer.rfind(pattern) {
|
||||||
|
if best_start.map_or(true, |best| pos > best) {
|
||||||
|
best_start = Some(pos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(start_pos) = best_start {
|
||||||
|
// Check if we can parse a complete JSON object from this position
|
||||||
|
let json_text = &self.text_buffer[start_pos..];
|
||||||
|
// If the JSON IS complete, it means there's an unexecuted tool call
|
||||||
|
if let Some(json_end) = Self::find_complete_json_object_end(json_text) {
|
||||||
|
// Extract just the JSON object (not any trailing text)
|
||||||
|
let json_only = &json_text[..=json_end];
|
||||||
|
// Try to parse it as a tool call to confirm it's valid JSON
|
||||||
|
return serde_json::from_str::<serde_json::Value>(json_only).is_ok();
|
||||||
|
}
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if a string contains a complete JSON object
|
||||||
|
fn is_complete_json_object(text: &str) -> bool {
|
||||||
|
Self::find_complete_json_object_end(text).is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the end position (byte index) of a complete JSON object in the text
|
||||||
|
/// Returns None if no complete JSON object is found
|
||||||
|
fn find_complete_json_object_end(text: &str) -> Option<usize> {
|
||||||
|
let mut brace_count = 0;
|
||||||
|
let mut in_string = false;
|
||||||
|
let mut escape_next = false;
|
||||||
|
let mut found_start = false;
|
||||||
|
|
||||||
|
for (i, ch) in text.char_indices() {
|
||||||
|
if escape_next {
|
||||||
|
escape_next = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
match ch {
|
||||||
|
'\\' => escape_next = true,
|
||||||
|
'"' if !escape_next => in_string = !in_string,
|
||||||
|
'{' if !in_string => {
|
||||||
|
brace_count += 1;
|
||||||
|
found_start = true;
|
||||||
|
}
|
||||||
|
'}' if !in_string => {
|
||||||
|
brace_count -= 1;
|
||||||
|
if brace_count == 0 && found_start {
|
||||||
|
return Some(i); // Return the byte index of the closing brace
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None // No complete JSON object found
|
||||||
|
}
|
||||||
|
|
||||||
/// Reset the parser state for a new message
|
/// Reset the parser state for a new message
|
||||||
pub fn reset(&mut self) {
|
pub fn reset(&mut self) {
|
||||||
self.text_buffer.clear();
|
self.text_buffer.clear();
|
||||||
@@ -4941,11 +5048,33 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
|
|
||||||
let has_response = !current_response.is_empty() || !full_response.is_empty();
|
let has_response = !current_response.is_empty() || !full_response.is_empty();
|
||||||
|
|
||||||
|
// Check if there's an incomplete tool call in the buffer
|
||||||
|
let has_incomplete_tool_call = parser.has_incomplete_tool_call();
|
||||||
|
|
||||||
|
// Check if there's a complete but unexecuted tool call in the buffer
|
||||||
|
let has_unexecuted_tool_call = parser.has_unexecuted_tool_call();
|
||||||
|
|
||||||
// Auto-continue if tools were executed but final_output was never called
|
// Auto-continue if tools were executed but final_output was never called
|
||||||
// This is the simple rule: LLM must call final_output before returning control
|
// OR if the LLM emitted an incomplete tool call (truncated JSON)
|
||||||
if any_tool_executed && !final_output_called {
|
// OR if the LLM emitted a complete tool call that wasn't executed
|
||||||
|
// This ensures we don't return control when the LLM clearly intended to call a tool
|
||||||
|
if (any_tool_executed && !final_output_called) || has_incomplete_tool_call || has_unexecuted_tool_call {
|
||||||
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
|
if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS {
|
||||||
auto_summary_attempts += 1;
|
auto_summary_attempts += 1;
|
||||||
|
if has_incomplete_tool_call || has_unexecuted_tool_call {
|
||||||
|
warn!(
|
||||||
|
"LLM emitted {} tool call ({} iterations, auto-continue attempt {})",
|
||||||
|
if has_incomplete_tool_call { "incomplete" } else { "unexecuted" },
|
||||||
|
iteration_count, auto_summary_attempts
|
||||||
|
);
|
||||||
|
self.ui_writer.print_context_status(
|
||||||
|
if has_incomplete_tool_call {
|
||||||
|
"\n🔄 Model emitted incomplete tool call. Auto-continuing...\n"
|
||||||
|
} else {
|
||||||
|
"\n🔄 Model emitted tool call that wasn't executed. Auto-continuing...\n"
|
||||||
|
}
|
||||||
|
);
|
||||||
|
} else {
|
||||||
warn!(
|
warn!(
|
||||||
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})",
|
"LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})",
|
||||||
iteration_count, auto_summary_attempts
|
iteration_count, auto_summary_attempts
|
||||||
@@ -4953,6 +5082,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
self.ui_writer.print_context_status(
|
self.ui_writer.print_context_status(
|
||||||
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
|
"\n🔄 Model stopped without calling final_output. Auto-continuing...\n"
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Add any text response to context before prompting for continuation
|
// Add any text response to context before prompting for continuation
|
||||||
if has_response {
|
if has_response {
|
||||||
@@ -4971,10 +5101,17 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Add a follow-up message asking for continuation
|
// Add a follow-up message asking for continuation
|
||||||
let continue_prompt = Message::new(
|
let continue_prompt = if has_incomplete_tool_call {
|
||||||
|
Message::new(
|
||||||
|
MessageRole::User,
|
||||||
|
"Your previous response was cut off mid-tool-call. Please complete the tool call and continue.".to_string(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
Message::new(
|
||||||
MessageRole::User,
|
MessageRole::User,
|
||||||
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
|
"Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(),
|
||||||
);
|
)
|
||||||
|
};
|
||||||
self.context_window.add_message(continue_prompt);
|
self.context_window.add_message(continue_prompt);
|
||||||
request.messages = self.context_window.conversation_history.clone();
|
request.messages = self.context_window.conversation_history.clone();
|
||||||
|
|
||||||
|
|||||||
182
crates/g3-core/tests/incomplete_tool_call_test.rs
Normal file
182
crates/g3-core/tests/incomplete_tool_call_test.rs
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
//! Tests for the incomplete tool call detection feature
|
||||||
|
|
||||||
|
use g3_core::StreamingToolParser;
|
||||||
|
use g3_providers::CompletionChunk;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_incomplete_tool_call_empty_buffer() {
|
||||||
|
let parser = StreamingToolParser::new();
|
||||||
|
assert!(!parser.has_incomplete_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_incomplete_tool_call_no_tool_pattern() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: "Hello, I will help you with that.".to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
assert!(!parser.has_incomplete_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_incomplete_tool_call_complete_tool_call() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Complete JSON should NOT be detected as incomplete
|
||||||
|
assert!(!parser.has_incomplete_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_incomplete_tool_call_truncated_tool_call() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
// Simulate truncated tool call - missing closing braces
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Incomplete JSON should be detected
|
||||||
|
assert!(parser.has_incomplete_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_incomplete_tool_call_truncated_mid_value() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
// Simulate truncated tool call - cut off mid-value
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"{"tool": "shell", "args": {"command": "cargo test --package g3-cli --test filter_json_test test_streaming -- --test-threads=1 2>&1 | tail"#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Incomplete JSON should be detected
|
||||||
|
assert!(parser.has_incomplete_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_incomplete_tool_call_with_text_before() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
// Text before the incomplete tool call
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"Let me read that file for you.
|
||||||
|
|
||||||
|
{"tool": "read_file", "args": {"file_path":"#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Incomplete JSON should be detected
|
||||||
|
assert!(parser.has_incomplete_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_incomplete_tool_call_malformed_like_trace() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
// This simulates a truncated tool call where the stream ended mid-JSON
|
||||||
|
// The actual trace showed truncated output, not malformed characters
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"{"tool": "read_file", "args": {"file_path":"src/engine.rkt""#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Truncated JSON (missing closing braces) should be detected as incomplete
|
||||||
|
assert!(parser.has_incomplete_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_unexecuted_tool_call_empty_buffer() {
|
||||||
|
let parser = StreamingToolParser::new();
|
||||||
|
assert!(!parser.has_unexecuted_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_unexecuted_tool_call_no_tool_pattern() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: "Hello, I will help you with that.".to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
assert!(!parser.has_unexecuted_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_unexecuted_tool_call_complete_tool_call() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Complete JSON tool call that wasn't executed should be detected
|
||||||
|
assert!(parser.has_unexecuted_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_unexecuted_tool_call_incomplete_json() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted)
|
||||||
|
assert!(!parser.has_unexecuted_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_unexecuted_tool_call_with_trailing_text() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
// Complete JSON tool call followed by trailing text
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}
|
||||||
|
|
||||||
|
Some trailing text after the JSON"#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Complete JSON tool call should be detected even with trailing text
|
||||||
|
assert!(parser.has_unexecuted_tool_call());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_has_unexecuted_tool_call_with_text_before_and_after() {
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
let chunk = CompletionChunk {
|
||||||
|
content: r#"Let me read that file.
|
||||||
|
|
||||||
|
{"tool": "shell", "args": {"command": "ls -la"}}
|
||||||
|
|
||||||
|
I'll execute this command now."#.to_string(),
|
||||||
|
finished: false,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
parser.process_chunk(&chunk);
|
||||||
|
// Complete JSON tool call should be detected
|
||||||
|
assert!(parser.has_unexecuted_tool_call());
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user