From 0e4febc3fbca308787119cc6bbc8002376fa13dd Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Mon, 22 Dec 2025 15:01:27 +1100 Subject: [PATCH] attempted fix of autocontinue --- crates/g3-computer-control/build.rs | 12 ++ crates/g3-core/src/lib.rs | 163 ++++++++++++++-- .../tests/incomplete_tool_call_test.rs | 182 ++++++++++++++++++ 3 files changed, 344 insertions(+), 13 deletions(-) create mode 100644 crates/g3-core/tests/incomplete_tool_call_test.rs diff --git a/crates/g3-computer-control/build.rs b/crates/g3-computer-control/build.rs index b7760a2..60d5598 100644 --- a/crates/g3-computer-control/build.rs +++ b/crates/g3-computer-control/build.rs @@ -68,6 +68,18 @@ fn main() { dylib_dst.display() ); + // Re-sign the dylib with ad-hoc signature to fix code signing issues on Apple Silicon + // This is necessary because incremental compilation can invalidate signatures + let codesign_status = Command::new("codesign") + .args(&["-f", "-s", "-", dylib_dst.to_str().unwrap()]) + .status(); + + if let Ok(status) = codesign_status { + if !status.success() { + println!("cargo:warning=Failed to codesign libVisionBridge.dylib (non-fatal)"); + } + } + // Add rpath so the dylib can be found at runtime println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path"); println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path"); diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 0536332..eae704f 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -531,6 +531,113 @@ impl StreamingToolParser { self.message_stopped } + /// Check if the text buffer contains an incomplete JSON tool call + /// This detects cases where the LLM started emitting a tool call but the stream ended + /// before the JSON was complete (truncated output) + pub fn has_incomplete_tool_call(&self) -> bool { + let patterns = [ + r#"{"tool":"#, + r#"{ "tool":"#, + r#"{"tool" :"#, + r#"{ "tool" :"#, + ]; + + // Find the last occurrence of a tool call pattern + let mut best_start: Option = None; + for pattern in &patterns { + if let Some(pos) = self.text_buffer.rfind(pattern) { + if best_start.map_or(true, |best| pos > best) { + best_start = Some(pos); + } + } + } + + if let Some(start_pos) = best_start { + // Check if we can parse a complete JSON object from this position + // If NOT complete, it's an incomplete tool call + let json_text = &self.text_buffer[start_pos..]; + !Self::is_complete_json_object(json_text) + } else { + false + } + } + + /// Check if the text buffer contains an unexecuted tool call + /// This detects cases where the LLM emitted a complete tool call JSON + /// but it wasn't parsed/executed (e.g., due to parsing issues) + pub fn has_unexecuted_tool_call(&self) -> bool { + let patterns = [ + r#"{"tool":"#, + r#"{ "tool":"#, + r#"{"tool" :"#, + r#"{ "tool" :"#, + ]; + + // Find the last occurrence of a tool call pattern + let mut best_start: Option = None; + for pattern in &patterns { + if let Some(pos) = self.text_buffer.rfind(pattern) { + if best_start.map_or(true, |best| pos > best) { + best_start = Some(pos); + } + } + } + + if let Some(start_pos) = best_start { + // Check if we can parse a complete JSON object from this position + let json_text = &self.text_buffer[start_pos..]; + // If the JSON IS complete, it means there's an unexecuted tool call + if let Some(json_end) = Self::find_complete_json_object_end(json_text) { + // Extract just the JSON object (not any trailing text) + let json_only = &json_text[..=json_end]; + // Try to parse it as a tool call to confirm it's valid JSON + return serde_json::from_str::(json_only).is_ok(); + } + false + } else { + false + } + } + + /// Check if a string contains a complete JSON object + fn is_complete_json_object(text: &str) -> bool { + Self::find_complete_json_object_end(text).is_some() + } + + /// Find the end position (byte index) of a complete JSON object in the text + /// Returns None if no complete JSON object is found + fn find_complete_json_object_end(text: &str) -> Option { + let mut brace_count = 0; + let mut in_string = false; + let mut escape_next = false; + let mut found_start = false; + + for (i, ch) in text.char_indices() { + if escape_next { + escape_next = false; + continue; + } + + match ch { + '\\' => escape_next = true, + '"' if !escape_next => in_string = !in_string, + '{' if !in_string => { + brace_count += 1; + found_start = true; + } + '}' if !in_string => { + brace_count -= 1; + if brace_count == 0 && found_start { + return Some(i); // Return the byte index of the closing brace + } + } + _ => {} + } + } + + None // No complete JSON object found + } + /// Reset the parser state for a new message pub fn reset(&mut self) { self.text_buffer.clear(); @@ -4941,18 +5048,41 @@ impl Agent { let has_response = !current_response.is_empty() || !full_response.is_empty(); + // Check if there's an incomplete tool call in the buffer + let has_incomplete_tool_call = parser.has_incomplete_tool_call(); + + // Check if there's a complete but unexecuted tool call in the buffer + let has_unexecuted_tool_call = parser.has_unexecuted_tool_call(); + // Auto-continue if tools were executed but final_output was never called - // This is the simple rule: LLM must call final_output before returning control - if any_tool_executed && !final_output_called { + // OR if the LLM emitted an incomplete tool call (truncated JSON) + // OR if the LLM emitted a complete tool call that wasn't executed + // This ensures we don't return control when the LLM clearly intended to call a tool + if (any_tool_executed && !final_output_called) || has_incomplete_tool_call || has_unexecuted_tool_call { if auto_summary_attempts < MAX_AUTO_SUMMARY_ATTEMPTS { auto_summary_attempts += 1; - warn!( - "LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})", - iteration_count, auto_summary_attempts - ); - self.ui_writer.print_context_status( - "\nšŸ”„ Model stopped without calling final_output. Auto-continuing...\n" - ); + if has_incomplete_tool_call || has_unexecuted_tool_call { + warn!( + "LLM emitted {} tool call ({} iterations, auto-continue attempt {})", + if has_incomplete_tool_call { "incomplete" } else { "unexecuted" }, + iteration_count, auto_summary_attempts + ); + self.ui_writer.print_context_status( + if has_incomplete_tool_call { + "\nšŸ”„ Model emitted incomplete tool call. Auto-continuing...\n" + } else { + "\nšŸ”„ Model emitted tool call that wasn't executed. Auto-continuing...\n" + } + ); + } else { + warn!( + "LLM stopped without calling final_output after executing tools ({} iterations, auto-continue attempt {})", + iteration_count, auto_summary_attempts + ); + self.ui_writer.print_context_status( + "\nšŸ”„ Model stopped without calling final_output. Auto-continuing...\n" + ); + } // Add any text response to context before prompting for continuation if has_response { @@ -4971,10 +5101,17 @@ impl Agent { } // Add a follow-up message asking for continuation - let continue_prompt = Message::new( - MessageRole::User, - "Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(), - ); + let continue_prompt = if has_incomplete_tool_call { + Message::new( + MessageRole::User, + "Your previous response was cut off mid-tool-call. Please complete the tool call and continue.".to_string(), + ) + } else { + Message::new( + MessageRole::User, + "Please continue until you are done. You **MUST** call `final_output` with a summary when done.".to_string(), + ) + }; self.context_window.add_message(continue_prompt); request.messages = self.context_window.conversation_history.clone(); diff --git a/crates/g3-core/tests/incomplete_tool_call_test.rs b/crates/g3-core/tests/incomplete_tool_call_test.rs new file mode 100644 index 0000000..4366672 --- /dev/null +++ b/crates/g3-core/tests/incomplete_tool_call_test.rs @@ -0,0 +1,182 @@ +//! Tests for the incomplete tool call detection feature + +use g3_core::StreamingToolParser; +use g3_providers::CompletionChunk; + +#[test] +fn test_has_incomplete_tool_call_empty_buffer() { + let parser = StreamingToolParser::new(); + assert!(!parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_no_tool_pattern() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: "Hello, I will help you with that.".to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + assert!(!parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_complete_tool_call() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON should NOT be detected as incomplete + assert!(!parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_truncated_tool_call() { + let mut parser = StreamingToolParser::new(); + // Simulate truncated tool call - missing closing braces + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should be detected + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_truncated_mid_value() { + let mut parser = StreamingToolParser::new(); + // Simulate truncated tool call - cut off mid-value + let chunk = CompletionChunk { + content: r#"{"tool": "shell", "args": {"command": "cargo test --package g3-cli --test filter_json_test test_streaming -- --test-threads=1 2>&1 | tail"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should be detected + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_with_text_before() { + let mut parser = StreamingToolParser::new(); + // Text before the incomplete tool call + let chunk = CompletionChunk { + content: r#"Let me read that file for you. + +{"tool": "read_file", "args": {"file_path":"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should be detected + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_incomplete_tool_call_malformed_like_trace() { + let mut parser = StreamingToolParser::new(); + // This simulates a truncated tool call where the stream ended mid-JSON + // The actual trace showed truncated output, not malformed characters + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path":"src/engine.rkt""#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Truncated JSON (missing closing braces) should be detected as incomplete + assert!(parser.has_incomplete_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_empty_buffer() { + let parser = StreamingToolParser::new(); + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_no_tool_pattern() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: "Hello, I will help you with that.".to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_complete_tool_call() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON tool call that wasn't executed should be detected + assert!(parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_incomplete_json() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt""#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Incomplete JSON should NOT be detected as unexecuted (it's incomplete, not unexecuted) + assert!(!parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_with_trailing_text() { + let mut parser = StreamingToolParser::new(); + // Complete JSON tool call followed by trailing text + let chunk = CompletionChunk { + content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}} + +Some trailing text after the JSON"#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON tool call should be detected even with trailing text + assert!(parser.has_unexecuted_tool_call()); +} + +#[test] +fn test_has_unexecuted_tool_call_with_text_before_and_after() { + let mut parser = StreamingToolParser::new(); + let chunk = CompletionChunk { + content: r#"Let me read that file. + +{"tool": "shell", "args": {"command": "ls -la"}} + +I'll execute this command now."#.to_string(), + finished: false, + tool_calls: None, + usage: None, + }; + parser.process_chunk(&chunk); + // Complete JSON tool call should be detected + assert!(parser.has_unexecuted_tool_call()); +}