fix: remove allow_multiple_tool_calls config and simplify tool execution flow

This fixes a bug where the agent would stop responding abruptly without calling final_output. The root cause was the allow_multiple_tool_calls config option (default: false) which caused the agent to break out of the streaming loop mid-stream after executing the first tool, losing any subsequent content. Changes: - Remove allow_multiple_tool_calls config option entirely - Always process all tool calls without breaking mid-stream - Simplify system prompt generation (no longer needs boolean param) - Let the stream complete fully before continuing to next iteration - Change find_last_tool_call_start to find_first_tool_call_start - Remove parser.reset() call on duplicate detection Benefits: - Simpler logic with less conditional branching - No lost content after tool calls - Consistent behavior for all users - Reduced config complexity
2026-01-09 13:28:07 +11:00
parent a72d5a650a
commit 67be0f20c7
11 changed files with 317 additions and 116 deletions
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -231,7 +231,7 @@ impl<W: UiWriter> Agent<W> {
            // Use default system prompt based on provider capabilities
            if provider_has_native_tool_calling {
                // For native tool calling providers, use a more explicit system prompt
-                get_system_prompt_for_native(config.agent.allow_multiple_tool_calls)
+                get_system_prompt_for_native()
            } else {
                // For non-native providers (embedded models), use JSON format instructions
                SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
@@ -1893,13 +1893,8 @@ impl<W: UiWriter> Agent<W> {
                        let completed_tools = parser.process_chunk(&chunk);

                        // Handle completed tool calls - process all if multiple calls enabled
-                        let tools_to_process: Vec<ToolCall> =
-                            if self.config.agent.allow_multiple_tool_calls {
-                                completed_tools
-                            } else {
-                                // Original behavior - only take the first tool
-                                completed_tools.into_iter().take(1).collect()
-                            };
+                        // Always process all tool calls - they will be executed after stream ends
+                        let tools_to_process: Vec<ToolCall> = completed_tools;

                        // Helper function to check if two tool calls are duplicates
                        let are_duplicates = |tc1: &ToolCall, tc2: &ToolCall| -> bool {
@@ -1953,11 +1948,10 @@ impl<W: UiWriter> Agent<W> {
                                modified_tool_call.tool = prefixed_tool_name;
                                debug!("{}", warning_msg);

-                                // Reset the parser to clear any partial/polluted state.
-                                // This prevents "example" tool calls in markdown or LLM stuttering
-                                // from polluting subsequent parsing.
-                                parser.reset();
-
+                                // NOTE: Do NOT call parser.reset() here!
+                                // Resetting the parser clears the entire text buffer, which would
+                                // lose any subsequent (non-duplicate) tool calls that haven't been
+                                // processed yet.
                                continue; // Skip execution of duplicate
                            }

@@ -2289,22 +2283,11 @@ impl<W: UiWriter> Agent<W> {
                            // Reset response_started flag for next iteration
                            response_started = false;

-                            // For single tool mode, break immediately
-                            if !self.config.agent.allow_multiple_tool_calls {
-                                break; // Break out of current stream to start a new one
-                            }
+                            // Continue processing - don't break mid-stream
                        } // End of for loop processing each tool call

-                        // If we processed any tools in multiple mode, break out to start new stream
-                        // BUT only if there are no more unexecuted tool calls in the buffer
-                        if tool_executed && self.config.agent.allow_multiple_tool_calls {
-                            if parser.has_unexecuted_tool_call() {
-                                debug!("Tool executed but parser still has unexecuted tool calls, continuing to process");
-                                // Don't break - continue processing to pick up remaining tool calls
-                            } else {
-                                break;
-                            }
-                        }
+                        // Note: We no longer break mid-stream after tool execution.
+                        // All tool calls are collected and executed after the stream ends.

                        // If no tool calls were completed, continue streaming normally
                        if !tool_executed {
@@ -2782,6 +2765,8 @@ impl<W: UiWriter> Agent<W> {
            pending_images: &mut self.pending_images,
            is_autonomous: self.is_autonomous,
            requirements_sha: self.requirements_sha.as_deref(),
+            context_total_tokens: self.context_window.total_tokens,
+            context_used_tokens: self.context_window.used_tokens,
        };

        // Dispatch to the appropriate tool handler
--- a/crates/g3-core/src/prompts.rs
+++ b/crates/g3-core/src/prompts.rs
@@ -210,21 +210,17 @@ pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
    concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);

 /// Generate system prompt based on whether multiple tool calls are allowed
-pub fn get_system_prompt_for_native(allow_multiple: bool) -> String {
-    if allow_multiple {
-        // Replace the "ONE tool" instruction with multiple tools instruction
-        let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
-        base.replace(
-            "2. Call the appropriate tool with the required parameters",
-            "2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate. 
+pub fn get_system_prompt_for_native() -> String {
+    // Always allow multiple tool calls - they are processed sequentially after stream ends
+    let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
+    base.replace(
+        "2. Call the appropriate tool with the required parameters",
+        "2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate. 
              <use_parallel_tool_calls>
  For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
  </use_parallel_tool_calls>
 "
-        )
-    } else {
-        SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
-    }
+    )
 }

 const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
@@ -410,12 +406,9 @@ const G3_IDENTITY_LINE: &str = "You are G3, an AI programming agent of the same
 /// The agent_prompt replaces only the G3 identity line at the start of the prompt.
 /// Everything else (tool instructions, coding guidelines, etc.) is preserved.
 pub fn get_agent_system_prompt(agent_prompt: &str, allow_multiple_tool_calls: bool) -> String {
-    // Get the full system prompt (with or without parallel tool calls)
-    let full_prompt = if allow_multiple_tool_calls {
-        get_system_prompt_for_native(true)
-    } else {
-        SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
-    };
+    // Get the full system prompt (always allows multiple tool calls now)
+    let _ = allow_multiple_tool_calls; // Parameter kept for API compatibility but ignored
+    let full_prompt = get_system_prompt_for_native();

    // Replace only the G3 identity line with the custom agent prompt
    full_prompt.replace(G3_IDENTITY_LINE, agent_prompt.trim())
--- a/crates/g3-core/src/streaming_parser.rs
+++ b/crates/g3-core/src/streaming_parser.rs
@@ -154,8 +154,14 @@ impl StreamingToolParser {
    fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
        // If we're not currently in a JSON tool call, look for the start
        if !self.in_json_tool_call {
-            if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) {
-                debug!("Found JSON tool call pattern at position {}", pos);
+            // Only search in the unconsumed portion of the buffer to avoid
+            // re-parsing already-executed tool calls
+            let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
+            // Use find_first_tool_call_start to find the FIRST tool call, not the last.
+            // This ensures we process tool calls in order when multiple arrive together.
+            if let Some(relative_pos) = Self::find_first_tool_call_start(unchecked_buffer) {
+                let pos = self.last_consumed_position + relative_pos;
+                debug!("Found JSON tool call pattern at position {} (relative: {})", pos, relative_pos);
                self.in_json_tool_call = true;
                self.json_tool_start = Some(pos);
            }
@@ -413,4 +419,55 @@ mod tests {
        assert!(!parser.message_stopped);
        assert_eq!(parser.last_consumed_position, 0);
    }
+
+    #[test]
+    fn test_multiple_tool_calls_processed_in_order() {
+        // Test that when multiple tool calls arrive together, they are processed
+        // in order (first one first, not last one first)
+        let mut parser = StreamingToolParser::new();
+        
+        // Simulate two tool calls arriving in the same chunk
+        let content = r#"Some text before
+
+{"tool": "shell", "args": {"command": "first"}}
+
+{"tool": "shell", "args": {"command": "second"}}
+
+Some text after"#;
+        
+        let chunk = g3_providers::CompletionChunk {
+            content: content.to_string(),
+            finished: true,
+            tool_calls: None,
+            usage: None,
+        };
+        
+        let tools = parser.process_chunk(&chunk);
+        
+        // Should find both tool calls
+        assert_eq!(tools.len(), 2, "Expected 2 tool calls, got {}", tools.len());
+        
+        // First tool call should be "first", not "second"
+        assert_eq!(tools[0].tool, "shell");
+        assert_eq!(tools[0].args["command"], "first", 
+            "First tool call should have command 'first', got {:?}", tools[0].args);
+        
+        // Second tool call should be "second"
+        assert_eq!(tools[1].tool, "shell");
+        assert_eq!(tools[1].args["command"], "second",
+            "Second tool call should have command 'second', got {:?}", tools[1].args);
+    }
+
+    #[test]
+    fn test_find_first_vs_last_tool_call() {
+        let text = r#"{"tool": "first"} and {"tool": "second"}"#;
+        
+        let first_pos = StreamingToolParser::find_first_tool_call_start(text);
+        let last_pos = StreamingToolParser::find_last_tool_call_start(text);
+        
+        assert!(first_pos.is_some());
+        assert!(last_pos.is_some());
+        assert!(first_pos.unwrap() < last_pos.unwrap(), 
+            "First position ({:?}) should be less than last position ({:?})", first_pos, last_pos);
+    }
 }
--- a/crates/g3-core/src/tools/executor.rs
+++ b/crates/g3-core/src/tools/executor.rs
@@ -25,6 +25,8 @@ pub struct ToolContext<'a, W: UiWriter> {
    pub pending_images: &'a mut Vec<g3_providers::ImageContent>,
    pub is_autonomous: bool,
    pub requirements_sha: Option<&'a str>,
+    pub context_total_tokens: u32,
+    pub context_used_tokens: u32,
 }

 impl<'a, W: UiWriter> ToolContext<'a, W> {
--- a/crates/g3-core/src/tools/file_ops.rs
+++ b/crates/g3-core/src/tools/file_ops.rs
@@ -10,10 +10,50 @@ use crate::ToolCall;

 use super::executor::ToolContext;

+/// Bytes per token heuristic (conservative estimate for code/text mix)
+const BYTES_PER_TOKEN: f32 = 3.5;
+
+/// Maximum percentage of context window a single file read can consume
+const MAX_FILE_READ_PERCENT: f32 = 0.20; // 20%
+
+/// Estimate token count from byte size
+fn estimate_tokens_from_bytes(bytes: usize) -> u32 {
+    ((bytes as f32 / BYTES_PER_TOKEN) * 1.1).ceil() as u32 // 10% safety buffer
+}
+
+/// Calculate the maximum bytes we should read based on context window state.
+/// Returns None if no limit needed, Some(max_bytes) if limiting required.
+fn calculate_read_limit(file_bytes: usize, total_tokens: u32, used_tokens: u32) -> Option<usize> {
+    let file_tokens = estimate_tokens_from_bytes(file_bytes);
+    let max_tokens_for_file = (total_tokens as f32 * MAX_FILE_READ_PERCENT) as u32;
+    
+    // Tier 1: File is small enough (< 20% of context) - no limit
+    if file_tokens < max_tokens_for_file {
+        return None;
+    }
+    
+    // Calculate available context
+    let available_tokens = total_tokens.saturating_sub(used_tokens);
+    let half_available = available_tokens / 2;
+    
+    // Tier 3: If 20% would exceed half of available, cap at half available
+    let effective_max_tokens = if max_tokens_for_file > half_available {
+        half_available
+    } else {
+        // Tier 2: Cap at 20% of total context
+        max_tokens_for_file
+    };
+    
+    // Convert tokens back to bytes
+    let max_bytes = (effective_max_tokens as f32 * BYTES_PER_TOKEN / 1.1) as usize;
+    
+    Some(max_bytes)
+}
+
 /// Execute the `read_file` tool.
 pub async fn execute_read_file<W: UiWriter>(
    tool_call: &ToolCall,
-    _ctx: &ToolContext<'_, W>,
+    ctx: &ToolContext<'_, W>,
 ) -> Result<String> {
    debug!("Processing read_file tool call");
    
@@ -47,54 +87,83 @@ pub async fn execute_read_file<W: UiWriter>(

    match std::fs::read_to_string(path_str) {
        Ok(content) => {
-            // Validate and apply range if specified
-            let start = start_char.unwrap_or(0);
-            let end = end_char.unwrap_or(content.len());
+            let total_file_len = content.len();
+            
+            // Calculate token-aware limit for the content we're about to read
+            let read_limit = calculate_read_limit(
+                total_file_len,
+                ctx.context_total_tokens,
+                ctx.context_used_tokens,
+            );

-            // Validation
-            if start > content.len() {
+            // Validate user-specified range
+            let user_start = start_char.unwrap_or(0);
+            if user_start > total_file_len {
                return Ok(format!(
                    "❌ Start position {} exceeds file length {}",
-                    start,
-                    content.len()
+                    user_start,
+                    total_file_len
                ));
            }
-            if end > content.len() {
+            
+            let user_end = end_char.unwrap_or(total_file_len);
+            if user_end > total_file_len {
                return Ok(format!(
                    "❌ End position {} exceeds file length {}",
-                    end,
-                    content.len()
+                    user_end,
+                    total_file_len
                ));
            }
-            if start > end {
+            if user_start > user_end {
                return Ok(format!(
                    "❌ Start position {} is greater than end position {}",
-                    start, end
+                    user_start, user_end
                ));
            }

+            // Calculate the range we'll actually read
+            let user_range_len = user_end - user_start;
+            
+            // Determine if we need to apply token-aware limiting
+            let (effective_end, was_truncated) = match read_limit {
+                Some(max_bytes) if user_range_len > max_bytes => {
+                    // Truncate to max_bytes from the start position
+                    (user_start + max_bytes, true)
+                }
+                _ => (user_end, false),
+            };
+
            // Extract the requested portion, ensuring we're at char boundaries
-            let start_boundary = if start == 0 {
+            let start_boundary = if user_start == 0 {
                0
            } else {
                content
                    .char_indices()
-                    .find(|(i, _)| *i >= start)
+                    .find(|(i, _)| *i >= user_start)
                    .map(|(i, _)| i)
-                    .unwrap_or(start)
+                    .unwrap_or(user_start)
            };
            let end_boundary = content
                .char_indices()
-                .find(|(i, _)| *i >= end)
+                .find(|(i, _)| *i >= effective_end)
                .map(|(i, _)| i)
-                .unwrap_or(content.len());
+                .unwrap_or(total_file_len);

            let partial_content = &content[start_boundary..end_boundary];
            let line_count = partial_content.lines().count();
            let total_lines = content.lines().count();

-            // Format output with range info if partial
-            if start_char.is_some() || end_char.is_some() {
+            // Format output based on whether truncation occurred
+            if was_truncated {
+                // Token-aware truncation header
+                let context_pct = (ctx.context_used_tokens as f32 / ctx.context_total_tokens as f32 * 100.0) as u32;
+                Ok(format!(
+                    "⚠️ FILE TRUNCATED: Reading chars {}-{} of {} total (file exceeds 20% context window threshold, context at {}%)\n\
+                     📄 File content ({} lines of {} total):\n{}",
+                    start_boundary, end_boundary, total_file_len, context_pct,
+                    line_count, total_lines, partial_content
+                ))
+            } else if start_char.is_some() || end_char.is_some() {
                Ok(format!(
                    "📄 File content (chars {}-{}, {} lines of {} total):\n{}",
                    start_boundary, end_boundary, line_count, total_lines, partial_content