fix: remove allow_multiple_tool_calls config and simplify tool execution flow
This fixes a bug where the agent would stop responding abruptly without calling final_output. The root cause was the allow_multiple_tool_calls config option (default: false) which caused the agent to break out of the streaming loop mid-stream after executing the first tool, losing any subsequent content. Changes: - Remove allow_multiple_tool_calls config option entirely - Always process all tool calls without breaking mid-stream - Simplify system prompt generation (no longer needs boolean param) - Let the stream complete fully before continuing to next iteration - Change find_last_tool_call_start to find_first_tool_call_start - Remove parser.reset() call on duplicate detection Benefits: - Simpler logic with less conditional branching - No lost content after tool calls - Consistent behavior for all users - Reduced config complexity
This commit is contained in:
@@ -759,7 +759,8 @@ async fn run_agent_mode(
|
|||||||
let config = g3_config::Config::load(config_path)?;
|
let config = g3_config::Config::load(config_path)?;
|
||||||
|
|
||||||
// Generate the combined system prompt (agent prompt + tool instructions)
|
// Generate the combined system prompt (agent prompt + tool instructions)
|
||||||
let system_prompt = get_agent_system_prompt(&agent_prompt, config.agent.allow_multiple_tool_calls);
|
// Note: allow_multiple_tool_calls parameter is deprecated but kept for API compatibility
|
||||||
|
let system_prompt = get_agent_system_prompt(&agent_prompt, true);
|
||||||
|
|
||||||
// Read README if present
|
// Read README if present
|
||||||
let readme_content = std::fs::read_to_string(workspace_dir.join("README.md")).ok();
|
let readme_content = std::fs::read_to_string(workspace_dir.join("README.md")).ok();
|
||||||
|
|||||||
@@ -94,7 +94,6 @@ pub struct AgentConfig {
|
|||||||
pub max_context_length: Option<u32>,
|
pub max_context_length: Option<u32>,
|
||||||
pub fallback_default_max_tokens: usize,
|
pub fallback_default_max_tokens: usize,
|
||||||
pub enable_streaming: bool,
|
pub enable_streaming: bool,
|
||||||
pub allow_multiple_tool_calls: bool,
|
|
||||||
pub timeout_seconds: u64,
|
pub timeout_seconds: u64,
|
||||||
pub auto_compact: bool,
|
pub auto_compact: bool,
|
||||||
pub max_retry_attempts: u32,
|
pub max_retry_attempts: u32,
|
||||||
@@ -191,7 +190,6 @@ impl Default for Config {
|
|||||||
max_context_length: None,
|
max_context_length: None,
|
||||||
fallback_default_max_tokens: 8192,
|
fallback_default_max_tokens: 8192,
|
||||||
enable_streaming: true,
|
enable_streaming: true,
|
||||||
allow_multiple_tool_calls: false,
|
|
||||||
timeout_seconds: 60,
|
timeout_seconds: 60,
|
||||||
auto_compact: true,
|
auto_compact: true,
|
||||||
max_retry_attempts: 3,
|
max_retry_attempts: 3,
|
||||||
|
|||||||
@@ -48,7 +48,6 @@ fallback_default_max_tokens = 8192
|
|||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
allow_multiple_tool_calls = false
|
|
||||||
max_retry_attempts = 3
|
max_retry_attempts = 3
|
||||||
autonomous_max_retry_attempts = 6
|
autonomous_max_retry_attempts = 6
|
||||||
{}"#, test_config_footer());
|
{}"#, test_config_footer());
|
||||||
@@ -93,7 +92,6 @@ fallback_default_max_tokens = 8192
|
|||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
allow_multiple_tool_calls = false
|
|
||||||
max_retry_attempts = 3
|
max_retry_attempts = 3
|
||||||
autonomous_max_retry_attempts = 6
|
autonomous_max_retry_attempts = 6
|
||||||
{}"#, test_config_footer());
|
{}"#, test_config_footer());
|
||||||
@@ -138,7 +136,6 @@ fallback_default_max_tokens = 8192
|
|||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
allow_multiple_tool_calls = false
|
|
||||||
max_retry_attempts = 3
|
max_retry_attempts = 3
|
||||||
autonomous_max_retry_attempts = 6
|
autonomous_max_retry_attempts = 6
|
||||||
{}"#, test_config_footer());
|
{}"#, test_config_footer());
|
||||||
@@ -176,7 +173,6 @@ fallback_default_max_tokens = 8192
|
|||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
allow_multiple_tool_calls = false
|
|
||||||
max_retry_attempts = 3
|
max_retry_attempts = 3
|
||||||
autonomous_max_retry_attempts = 6
|
autonomous_max_retry_attempts = 6
|
||||||
{}"#, test_config_footer());
|
{}"#, test_config_footer());
|
||||||
@@ -218,7 +214,6 @@ fallback_default_max_tokens = 8192
|
|||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
allow_multiple_tool_calls = false
|
|
||||||
max_retry_attempts = 3
|
max_retry_attempts = 3
|
||||||
autonomous_max_retry_attempts = 6
|
autonomous_max_retry_attempts = 6
|
||||||
{}"#, test_config_footer());
|
{}"#, test_config_footer());
|
||||||
@@ -257,7 +252,6 @@ fallback_default_max_tokens = 8192
|
|||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
allow_multiple_tool_calls = false
|
|
||||||
max_retry_attempts = 3
|
max_retry_attempts = 3
|
||||||
autonomous_max_retry_attempts = 6
|
autonomous_max_retry_attempts = 6
|
||||||
{}"#, test_config_footer());
|
{}"#, test_config_footer());
|
||||||
|
|||||||
@@ -1,40 +0,0 @@
|
|||||||
#[cfg(test)]
|
|
||||||
mod test_multiple_tool_calls {
|
|
||||||
use g3_config::{AgentConfig, Config};
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_config_has_multiple_tool_calls_field() {
|
|
||||||
let config = Config::default();
|
|
||||||
|
|
||||||
// Test that the field exists and defaults to false
|
|
||||||
assert_eq!(config.agent.allow_multiple_tool_calls, false);
|
|
||||||
|
|
||||||
// Test that we can create a config with the field set to true
|
|
||||||
let mut custom_config = Config::default();
|
|
||||||
custom_config.agent.allow_multiple_tool_calls = true;
|
|
||||||
assert_eq!(custom_config.agent.allow_multiple_tool_calls, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_agent_config_serialization() {
|
|
||||||
let agent_config = AgentConfig {
|
|
||||||
max_context_length: Some(100000),
|
|
||||||
fallback_default_max_tokens: 8192,
|
|
||||||
enable_streaming: true,
|
|
||||||
allow_multiple_tool_calls: true,
|
|
||||||
timeout_seconds: 60,
|
|
||||||
auto_compact: true,
|
|
||||||
max_retry_attempts: 3,
|
|
||||||
autonomous_max_retry_attempts: 6,
|
|
||||||
check_todo_staleness: true,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Test serialization
|
|
||||||
let json = serde_json::to_string(&agent_config).unwrap();
|
|
||||||
assert!(json.contains("\"allow_multiple_tool_calls\":true"));
|
|
||||||
|
|
||||||
// Test deserialization
|
|
||||||
let deserialized: AgentConfig = serde_json::from_str(&json).unwrap();
|
|
||||||
assert_eq!(deserialized.allow_multiple_tool_calls, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -231,7 +231,7 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
// Use default system prompt based on provider capabilities
|
// Use default system prompt based on provider capabilities
|
||||||
if provider_has_native_tool_calling {
|
if provider_has_native_tool_calling {
|
||||||
// For native tool calling providers, use a more explicit system prompt
|
// For native tool calling providers, use a more explicit system prompt
|
||||||
get_system_prompt_for_native(config.agent.allow_multiple_tool_calls)
|
get_system_prompt_for_native()
|
||||||
} else {
|
} else {
|
||||||
// For non-native providers (embedded models), use JSON format instructions
|
// For non-native providers (embedded models), use JSON format instructions
|
||||||
SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
|
SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
|
||||||
@@ -1893,13 +1893,8 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
let completed_tools = parser.process_chunk(&chunk);
|
let completed_tools = parser.process_chunk(&chunk);
|
||||||
|
|
||||||
// Handle completed tool calls - process all if multiple calls enabled
|
// Handle completed tool calls - process all if multiple calls enabled
|
||||||
let tools_to_process: Vec<ToolCall> =
|
// Always process all tool calls - they will be executed after stream ends
|
||||||
if self.config.agent.allow_multiple_tool_calls {
|
let tools_to_process: Vec<ToolCall> = completed_tools;
|
||||||
completed_tools
|
|
||||||
} else {
|
|
||||||
// Original behavior - only take the first tool
|
|
||||||
completed_tools.into_iter().take(1).collect()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Helper function to check if two tool calls are duplicates
|
// Helper function to check if two tool calls are duplicates
|
||||||
let are_duplicates = |tc1: &ToolCall, tc2: &ToolCall| -> bool {
|
let are_duplicates = |tc1: &ToolCall, tc2: &ToolCall| -> bool {
|
||||||
@@ -1953,11 +1948,10 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
modified_tool_call.tool = prefixed_tool_name;
|
modified_tool_call.tool = prefixed_tool_name;
|
||||||
debug!("{}", warning_msg);
|
debug!("{}", warning_msg);
|
||||||
|
|
||||||
// Reset the parser to clear any partial/polluted state.
|
// NOTE: Do NOT call parser.reset() here!
|
||||||
// This prevents "example" tool calls in markdown or LLM stuttering
|
// Resetting the parser clears the entire text buffer, which would
|
||||||
// from polluting subsequent parsing.
|
// lose any subsequent (non-duplicate) tool calls that haven't been
|
||||||
parser.reset();
|
// processed yet.
|
||||||
|
|
||||||
continue; // Skip execution of duplicate
|
continue; // Skip execution of duplicate
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2289,22 +2283,11 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
// Reset response_started flag for next iteration
|
// Reset response_started flag for next iteration
|
||||||
response_started = false;
|
response_started = false;
|
||||||
|
|
||||||
// For single tool mode, break immediately
|
// Continue processing - don't break mid-stream
|
||||||
if !self.config.agent.allow_multiple_tool_calls {
|
|
||||||
break; // Break out of current stream to start a new one
|
|
||||||
}
|
|
||||||
} // End of for loop processing each tool call
|
} // End of for loop processing each tool call
|
||||||
|
|
||||||
// If we processed any tools in multiple mode, break out to start new stream
|
// Note: We no longer break mid-stream after tool execution.
|
||||||
// BUT only if there are no more unexecuted tool calls in the buffer
|
// All tool calls are collected and executed after the stream ends.
|
||||||
if tool_executed && self.config.agent.allow_multiple_tool_calls {
|
|
||||||
if parser.has_unexecuted_tool_call() {
|
|
||||||
debug!("Tool executed but parser still has unexecuted tool calls, continuing to process");
|
|
||||||
// Don't break - continue processing to pick up remaining tool calls
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If no tool calls were completed, continue streaming normally
|
// If no tool calls were completed, continue streaming normally
|
||||||
if !tool_executed {
|
if !tool_executed {
|
||||||
@@ -2782,6 +2765,8 @@ impl<W: UiWriter> Agent<W> {
|
|||||||
pending_images: &mut self.pending_images,
|
pending_images: &mut self.pending_images,
|
||||||
is_autonomous: self.is_autonomous,
|
is_autonomous: self.is_autonomous,
|
||||||
requirements_sha: self.requirements_sha.as_deref(),
|
requirements_sha: self.requirements_sha.as_deref(),
|
||||||
|
context_total_tokens: self.context_window.total_tokens,
|
||||||
|
context_used_tokens: self.context_window.used_tokens,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Dispatch to the appropriate tool handler
|
// Dispatch to the appropriate tool handler
|
||||||
|
|||||||
@@ -210,21 +210,17 @@ pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
|
|||||||
concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
|
concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
|
||||||
|
|
||||||
/// Generate system prompt based on whether multiple tool calls are allowed
|
/// Generate system prompt based on whether multiple tool calls are allowed
|
||||||
pub fn get_system_prompt_for_native(allow_multiple: bool) -> String {
|
pub fn get_system_prompt_for_native() -> String {
|
||||||
if allow_multiple {
|
// Always allow multiple tool calls - they are processed sequentially after stream ends
|
||||||
// Replace the "ONE tool" instruction with multiple tools instruction
|
let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
|
||||||
let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
|
base.replace(
|
||||||
base.replace(
|
"2. Call the appropriate tool with the required parameters",
|
||||||
"2. Call the appropriate tool with the required parameters",
|
"2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate.
|
||||||
"2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate.
|
|
||||||
<use_parallel_tool_calls>
|
<use_parallel_tool_calls>
|
||||||
For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
|
For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
|
||||||
</use_parallel_tool_calls>
|
</use_parallel_tool_calls>
|
||||||
"
|
"
|
||||||
)
|
)
|
||||||
} else {
|
|
||||||
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
|
const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
|
||||||
@@ -410,12 +406,9 @@ const G3_IDENTITY_LINE: &str = "You are G3, an AI programming agent of the same
|
|||||||
/// The agent_prompt replaces only the G3 identity line at the start of the prompt.
|
/// The agent_prompt replaces only the G3 identity line at the start of the prompt.
|
||||||
/// Everything else (tool instructions, coding guidelines, etc.) is preserved.
|
/// Everything else (tool instructions, coding guidelines, etc.) is preserved.
|
||||||
pub fn get_agent_system_prompt(agent_prompt: &str, allow_multiple_tool_calls: bool) -> String {
|
pub fn get_agent_system_prompt(agent_prompt: &str, allow_multiple_tool_calls: bool) -> String {
|
||||||
// Get the full system prompt (with or without parallel tool calls)
|
// Get the full system prompt (always allows multiple tool calls now)
|
||||||
let full_prompt = if allow_multiple_tool_calls {
|
let _ = allow_multiple_tool_calls; // Parameter kept for API compatibility but ignored
|
||||||
get_system_prompt_for_native(true)
|
let full_prompt = get_system_prompt_for_native();
|
||||||
} else {
|
|
||||||
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
|
|
||||||
};
|
|
||||||
|
|
||||||
// Replace only the G3 identity line with the custom agent prompt
|
// Replace only the G3 identity line with the custom agent prompt
|
||||||
full_prompt.replace(G3_IDENTITY_LINE, agent_prompt.trim())
|
full_prompt.replace(G3_IDENTITY_LINE, agent_prompt.trim())
|
||||||
|
|||||||
@@ -154,8 +154,14 @@ impl StreamingToolParser {
|
|||||||
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
|
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
|
||||||
// If we're not currently in a JSON tool call, look for the start
|
// If we're not currently in a JSON tool call, look for the start
|
||||||
if !self.in_json_tool_call {
|
if !self.in_json_tool_call {
|
||||||
if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) {
|
// Only search in the unconsumed portion of the buffer to avoid
|
||||||
debug!("Found JSON tool call pattern at position {}", pos);
|
// re-parsing already-executed tool calls
|
||||||
|
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
|
||||||
|
// Use find_first_tool_call_start to find the FIRST tool call, not the last.
|
||||||
|
// This ensures we process tool calls in order when multiple arrive together.
|
||||||
|
if let Some(relative_pos) = Self::find_first_tool_call_start(unchecked_buffer) {
|
||||||
|
let pos = self.last_consumed_position + relative_pos;
|
||||||
|
debug!("Found JSON tool call pattern at position {} (relative: {})", pos, relative_pos);
|
||||||
self.in_json_tool_call = true;
|
self.in_json_tool_call = true;
|
||||||
self.json_tool_start = Some(pos);
|
self.json_tool_start = Some(pos);
|
||||||
}
|
}
|
||||||
@@ -413,4 +419,55 @@ mod tests {
|
|||||||
assert!(!parser.message_stopped);
|
assert!(!parser.message_stopped);
|
||||||
assert_eq!(parser.last_consumed_position, 0);
|
assert_eq!(parser.last_consumed_position, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multiple_tool_calls_processed_in_order() {
|
||||||
|
// Test that when multiple tool calls arrive together, they are processed
|
||||||
|
// in order (first one first, not last one first)
|
||||||
|
let mut parser = StreamingToolParser::new();
|
||||||
|
|
||||||
|
// Simulate two tool calls arriving in the same chunk
|
||||||
|
let content = r#"Some text before
|
||||||
|
|
||||||
|
{"tool": "shell", "args": {"command": "first"}}
|
||||||
|
|
||||||
|
{"tool": "shell", "args": {"command": "second"}}
|
||||||
|
|
||||||
|
Some text after"#;
|
||||||
|
|
||||||
|
let chunk = g3_providers::CompletionChunk {
|
||||||
|
content: content.to_string(),
|
||||||
|
finished: true,
|
||||||
|
tool_calls: None,
|
||||||
|
usage: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let tools = parser.process_chunk(&chunk);
|
||||||
|
|
||||||
|
// Should find both tool calls
|
||||||
|
assert_eq!(tools.len(), 2, "Expected 2 tool calls, got {}", tools.len());
|
||||||
|
|
||||||
|
// First tool call should be "first", not "second"
|
||||||
|
assert_eq!(tools[0].tool, "shell");
|
||||||
|
assert_eq!(tools[0].args["command"], "first",
|
||||||
|
"First tool call should have command 'first', got {:?}", tools[0].args);
|
||||||
|
|
||||||
|
// Second tool call should be "second"
|
||||||
|
assert_eq!(tools[1].tool, "shell");
|
||||||
|
assert_eq!(tools[1].args["command"], "second",
|
||||||
|
"Second tool call should have command 'second', got {:?}", tools[1].args);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_find_first_vs_last_tool_call() {
|
||||||
|
let text = r#"{"tool": "first"} and {"tool": "second"}"#;
|
||||||
|
|
||||||
|
let first_pos = StreamingToolParser::find_first_tool_call_start(text);
|
||||||
|
let last_pos = StreamingToolParser::find_last_tool_call_start(text);
|
||||||
|
|
||||||
|
assert!(first_pos.is_some());
|
||||||
|
assert!(last_pos.is_some());
|
||||||
|
assert!(first_pos.unwrap() < last_pos.unwrap(),
|
||||||
|
"First position ({:?}) should be less than last position ({:?})", first_pos, last_pos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,6 +25,8 @@ pub struct ToolContext<'a, W: UiWriter> {
|
|||||||
pub pending_images: &'a mut Vec<g3_providers::ImageContent>,
|
pub pending_images: &'a mut Vec<g3_providers::ImageContent>,
|
||||||
pub is_autonomous: bool,
|
pub is_autonomous: bool,
|
||||||
pub requirements_sha: Option<&'a str>,
|
pub requirements_sha: Option<&'a str>,
|
||||||
|
pub context_total_tokens: u32,
|
||||||
|
pub context_used_tokens: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, W: UiWriter> ToolContext<'a, W> {
|
impl<'a, W: UiWriter> ToolContext<'a, W> {
|
||||||
|
|||||||
@@ -10,10 +10,50 @@ use crate::ToolCall;
|
|||||||
|
|
||||||
use super::executor::ToolContext;
|
use super::executor::ToolContext;
|
||||||
|
|
||||||
|
/// Bytes per token heuristic (conservative estimate for code/text mix)
|
||||||
|
const BYTES_PER_TOKEN: f32 = 3.5;
|
||||||
|
|
||||||
|
/// Maximum percentage of context window a single file read can consume
|
||||||
|
const MAX_FILE_READ_PERCENT: f32 = 0.20; // 20%
|
||||||
|
|
||||||
|
/// Estimate token count from byte size
|
||||||
|
fn estimate_tokens_from_bytes(bytes: usize) -> u32 {
|
||||||
|
((bytes as f32 / BYTES_PER_TOKEN) * 1.1).ceil() as u32 // 10% safety buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate the maximum bytes we should read based on context window state.
|
||||||
|
/// Returns None if no limit needed, Some(max_bytes) if limiting required.
|
||||||
|
fn calculate_read_limit(file_bytes: usize, total_tokens: u32, used_tokens: u32) -> Option<usize> {
|
||||||
|
let file_tokens = estimate_tokens_from_bytes(file_bytes);
|
||||||
|
let max_tokens_for_file = (total_tokens as f32 * MAX_FILE_READ_PERCENT) as u32;
|
||||||
|
|
||||||
|
// Tier 1: File is small enough (< 20% of context) - no limit
|
||||||
|
if file_tokens < max_tokens_for_file {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate available context
|
||||||
|
let available_tokens = total_tokens.saturating_sub(used_tokens);
|
||||||
|
let half_available = available_tokens / 2;
|
||||||
|
|
||||||
|
// Tier 3: If 20% would exceed half of available, cap at half available
|
||||||
|
let effective_max_tokens = if max_tokens_for_file > half_available {
|
||||||
|
half_available
|
||||||
|
} else {
|
||||||
|
// Tier 2: Cap at 20% of total context
|
||||||
|
max_tokens_for_file
|
||||||
|
};
|
||||||
|
|
||||||
|
// Convert tokens back to bytes
|
||||||
|
let max_bytes = (effective_max_tokens as f32 * BYTES_PER_TOKEN / 1.1) as usize;
|
||||||
|
|
||||||
|
Some(max_bytes)
|
||||||
|
}
|
||||||
|
|
||||||
/// Execute the `read_file` tool.
|
/// Execute the `read_file` tool.
|
||||||
pub async fn execute_read_file<W: UiWriter>(
|
pub async fn execute_read_file<W: UiWriter>(
|
||||||
tool_call: &ToolCall,
|
tool_call: &ToolCall,
|
||||||
_ctx: &ToolContext<'_, W>,
|
ctx: &ToolContext<'_, W>,
|
||||||
) -> Result<String> {
|
) -> Result<String> {
|
||||||
debug!("Processing read_file tool call");
|
debug!("Processing read_file tool call");
|
||||||
|
|
||||||
@@ -47,54 +87,83 @@ pub async fn execute_read_file<W: UiWriter>(
|
|||||||
|
|
||||||
match std::fs::read_to_string(path_str) {
|
match std::fs::read_to_string(path_str) {
|
||||||
Ok(content) => {
|
Ok(content) => {
|
||||||
// Validate and apply range if specified
|
let total_file_len = content.len();
|
||||||
let start = start_char.unwrap_or(0);
|
|
||||||
let end = end_char.unwrap_or(content.len());
|
|
||||||
|
|
||||||
// Validation
|
// Calculate token-aware limit for the content we're about to read
|
||||||
if start > content.len() {
|
let read_limit = calculate_read_limit(
|
||||||
|
total_file_len,
|
||||||
|
ctx.context_total_tokens,
|
||||||
|
ctx.context_used_tokens,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Validate user-specified range
|
||||||
|
let user_start = start_char.unwrap_or(0);
|
||||||
|
if user_start > total_file_len {
|
||||||
return Ok(format!(
|
return Ok(format!(
|
||||||
"❌ Start position {} exceeds file length {}",
|
"❌ Start position {} exceeds file length {}",
|
||||||
start,
|
user_start,
|
||||||
content.len()
|
total_file_len
|
||||||
));
|
|
||||||
}
|
|
||||||
if end > content.len() {
|
|
||||||
return Ok(format!(
|
|
||||||
"❌ End position {} exceeds file length {}",
|
|
||||||
end,
|
|
||||||
content.len()
|
|
||||||
));
|
|
||||||
}
|
|
||||||
if start > end {
|
|
||||||
return Ok(format!(
|
|
||||||
"❌ Start position {} is greater than end position {}",
|
|
||||||
start, end
|
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let user_end = end_char.unwrap_or(total_file_len);
|
||||||
|
if user_end > total_file_len {
|
||||||
|
return Ok(format!(
|
||||||
|
"❌ End position {} exceeds file length {}",
|
||||||
|
user_end,
|
||||||
|
total_file_len
|
||||||
|
));
|
||||||
|
}
|
||||||
|
if user_start > user_end {
|
||||||
|
return Ok(format!(
|
||||||
|
"❌ Start position {} is greater than end position {}",
|
||||||
|
user_start, user_end
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the range we'll actually read
|
||||||
|
let user_range_len = user_end - user_start;
|
||||||
|
|
||||||
|
// Determine if we need to apply token-aware limiting
|
||||||
|
let (effective_end, was_truncated) = match read_limit {
|
||||||
|
Some(max_bytes) if user_range_len > max_bytes => {
|
||||||
|
// Truncate to max_bytes from the start position
|
||||||
|
(user_start + max_bytes, true)
|
||||||
|
}
|
||||||
|
_ => (user_end, false),
|
||||||
|
};
|
||||||
|
|
||||||
// Extract the requested portion, ensuring we're at char boundaries
|
// Extract the requested portion, ensuring we're at char boundaries
|
||||||
let start_boundary = if start == 0 {
|
let start_boundary = if user_start == 0 {
|
||||||
0
|
0
|
||||||
} else {
|
} else {
|
||||||
content
|
content
|
||||||
.char_indices()
|
.char_indices()
|
||||||
.find(|(i, _)| *i >= start)
|
.find(|(i, _)| *i >= user_start)
|
||||||
.map(|(i, _)| i)
|
.map(|(i, _)| i)
|
||||||
.unwrap_or(start)
|
.unwrap_or(user_start)
|
||||||
};
|
};
|
||||||
let end_boundary = content
|
let end_boundary = content
|
||||||
.char_indices()
|
.char_indices()
|
||||||
.find(|(i, _)| *i >= end)
|
.find(|(i, _)| *i >= effective_end)
|
||||||
.map(|(i, _)| i)
|
.map(|(i, _)| i)
|
||||||
.unwrap_or(content.len());
|
.unwrap_or(total_file_len);
|
||||||
|
|
||||||
let partial_content = &content[start_boundary..end_boundary];
|
let partial_content = &content[start_boundary..end_boundary];
|
||||||
let line_count = partial_content.lines().count();
|
let line_count = partial_content.lines().count();
|
||||||
let total_lines = content.lines().count();
|
let total_lines = content.lines().count();
|
||||||
|
|
||||||
// Format output with range info if partial
|
// Format output based on whether truncation occurred
|
||||||
if start_char.is_some() || end_char.is_some() {
|
if was_truncated {
|
||||||
|
// Token-aware truncation header
|
||||||
|
let context_pct = (ctx.context_used_tokens as f32 / ctx.context_total_tokens as f32 * 100.0) as u32;
|
||||||
|
Ok(format!(
|
||||||
|
"⚠️ FILE TRUNCATED: Reading chars {}-{} of {} total (file exceeds 20% context window threshold, context at {}%)\n\
|
||||||
|
📄 File content ({} lines of {} total):\n{}",
|
||||||
|
start_boundary, end_boundary, total_file_len, context_pct,
|
||||||
|
line_count, total_lines, partial_content
|
||||||
|
))
|
||||||
|
} else if start_char.is_some() || end_char.is_some() {
|
||||||
Ok(format!(
|
Ok(format!(
|
||||||
"📄 File content (chars {}-{}, {} lines of {} total):\n{}",
|
"📄 File content (chars {}-{}, {} lines of {} total):\n{}",
|
||||||
start_boundary, end_boundary, line_count, total_lines, partial_content
|
start_boundary, end_boundary, line_count, total_lines, partial_content
|
||||||
|
|||||||
143
crates/g3-core/tests/read_file_token_limit_test.rs
Normal file
143
crates/g3-core/tests/read_file_token_limit_test.rs
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
//! Tests for token-aware read_file limiting.
|
||||||
|
//!
|
||||||
|
//! These tests verify that read_file properly limits output based on
|
||||||
|
//! context window state to prevent blowing out the context.
|
||||||
|
|
||||||
|
use std::fs;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
/// Helper to create a test file with specified size
|
||||||
|
fn create_test_file(dir: &TempDir, name: &str, size_bytes: usize) -> PathBuf {
|
||||||
|
let path = dir.path().join(name);
|
||||||
|
let content: String = "x".repeat(size_bytes);
|
||||||
|
fs::write(&path, &content).unwrap();
|
||||||
|
path
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test the helper functions directly
|
||||||
|
mod helper_tests {
|
||||||
|
/// Bytes per token heuristic (must match file_ops.rs)
|
||||||
|
const BYTES_PER_TOKEN: f32 = 3.5;
|
||||||
|
|
||||||
|
/// Estimate token count from byte size (must match file_ops.rs)
|
||||||
|
fn estimate_tokens_from_bytes(bytes: usize) -> u32 {
|
||||||
|
((bytes as f32 / BYTES_PER_TOKEN) * 1.1).ceil() as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Calculate the maximum bytes we should read based on context window state.
|
||||||
|
fn calculate_read_limit(file_bytes: usize, total_tokens: u32, used_tokens: u32) -> Option<usize> {
|
||||||
|
let file_tokens = estimate_tokens_from_bytes(file_bytes);
|
||||||
|
let max_tokens_for_file = (total_tokens as f32 * 0.20) as u32; // 20%
|
||||||
|
|
||||||
|
// Tier 1: File is small enough (< 20% of context) - no limit
|
||||||
|
if file_tokens < max_tokens_for_file {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate available context
|
||||||
|
let available_tokens = total_tokens.saturating_sub(used_tokens);
|
||||||
|
let half_available = available_tokens / 2;
|
||||||
|
|
||||||
|
// Tier 3: If 20% would exceed half of available, cap at half available
|
||||||
|
let effective_max_tokens = if max_tokens_for_file > half_available {
|
||||||
|
half_available
|
||||||
|
} else {
|
||||||
|
// Tier 2: Cap at 20% of total context
|
||||||
|
max_tokens_for_file
|
||||||
|
};
|
||||||
|
|
||||||
|
// Convert tokens back to bytes
|
||||||
|
let max_bytes = (effective_max_tokens as f32 * BYTES_PER_TOKEN / 1.1) as usize;
|
||||||
|
|
||||||
|
Some(max_bytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_estimate_tokens_from_bytes() {
|
||||||
|
// 3500 bytes at 3.5 bytes/token = 1000 tokens, +10% = 1100
|
||||||
|
let tokens = estimate_tokens_from_bytes(3500);
|
||||||
|
assert_eq!(tokens, 1100);
|
||||||
|
|
||||||
|
// Small file
|
||||||
|
let tokens = estimate_tokens_from_bytes(100);
|
||||||
|
assert!(tokens > 0 && tokens < 50);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tier1_small_file_no_limit() {
|
||||||
|
// 100k token context, file would be ~1000 tokens (well under 20% = 20k)
|
||||||
|
let limit = calculate_read_limit(3500, 100_000, 0);
|
||||||
|
assert!(limit.is_none(), "Small file should have no limit");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tier2_large_file_capped_at_20_percent() {
|
||||||
|
// 100k token context, file would be ~100k tokens (way over 20%)
|
||||||
|
// Should cap at 20% = 20k tokens worth of bytes
|
||||||
|
let limit = calculate_read_limit(350_000, 100_000, 0);
|
||||||
|
assert!(limit.is_some(), "Large file should be limited");
|
||||||
|
|
||||||
|
let max_bytes = limit.unwrap();
|
||||||
|
// 20% of 100k = 20k tokens, at 3.5 bytes/token / 1.1 = ~63k bytes
|
||||||
|
assert!(max_bytes > 50_000 && max_bytes < 70_000,
|
||||||
|
"Expected ~63k bytes, got {}", max_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tier3_context_nearly_full() {
|
||||||
|
// 100k token context, already at 70% (30k available)
|
||||||
|
// 20% of total = 20k tokens, but half of available = 15k tokens
|
||||||
|
// Should cap at 15k tokens worth of bytes
|
||||||
|
let limit = calculate_read_limit(350_000, 100_000, 70_000);
|
||||||
|
assert!(limit.is_some(), "Large file should be limited");
|
||||||
|
|
||||||
|
let max_bytes = limit.unwrap();
|
||||||
|
// 15k tokens at 3.5 bytes/token / 1.1 = ~47k bytes
|
||||||
|
assert!(max_bytes > 40_000 && max_bytes < 55_000,
|
||||||
|
"Expected ~47k bytes when context at 70%, got {}", max_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tier3_context_very_full() {
|
||||||
|
// 100k token context, already at 90% (10k available)
|
||||||
|
// 20% of total = 20k tokens, but half of available = 5k tokens
|
||||||
|
// Should cap at 5k tokens worth of bytes
|
||||||
|
let limit = calculate_read_limit(350_000, 100_000, 90_000);
|
||||||
|
assert!(limit.is_some(), "Large file should be limited");
|
||||||
|
|
||||||
|
let max_bytes = limit.unwrap();
|
||||||
|
// 5k tokens at 3.5 bytes/token / 1.1 = ~15.9k bytes
|
||||||
|
assert!(max_bytes > 10_000 && max_bytes < 20_000,
|
||||||
|
"Expected ~16k bytes when context at 90%, got {}", max_bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_boundary_exactly_20_percent() {
|
||||||
|
// File that's exactly at the 20% boundary
|
||||||
|
// 100k context, 20% = 20k tokens
|
||||||
|
// 20k tokens at 3.5 bytes/token * 1.1 = ~77k bytes would trigger limit
|
||||||
|
|
||||||
|
// Just under - should not limit
|
||||||
|
let limit = calculate_read_limit(60_000, 100_000, 0);
|
||||||
|
assert!(limit.is_none(), "File just under 20% should not be limited");
|
||||||
|
|
||||||
|
// Just over - should limit
|
||||||
|
let limit = calculate_read_limit(80_000, 100_000, 0);
|
||||||
|
assert!(limit.is_some(), "File just over 20% should be limited");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Integration-style tests that would test the actual tool execution
|
||||||
|
/// These are commented out as they require more setup (ToolContext, etc.)
|
||||||
|
/// but document the expected behavior.
|
||||||
|
#[cfg(test)]
|
||||||
|
mod integration_notes {
|
||||||
|
// To fully test execute_read_file with token limiting:
|
||||||
|
// 1. Create a mock ToolContext with specific context_total_tokens and context_used_tokens
|
||||||
|
// 2. Create a large test file
|
||||||
|
// 3. Call execute_read_file and verify:
|
||||||
|
// - Output contains truncation warning header
|
||||||
|
// - Content is actually truncated to expected size
|
||||||
|
// - Header shows correct character range and context percentage
|
||||||
|
}
|
||||||
@@ -23,7 +23,6 @@ fallback_default_max_tokens = 8192
|
|||||||
enable_streaming = true
|
enable_streaming = true
|
||||||
timeout_seconds = 60
|
timeout_seconds = 60
|
||||||
auto_compact = true
|
auto_compact = true
|
||||||
allow_multiple_tool_calls = false
|
|
||||||
max_retry_attempts = 3
|
max_retry_attempts = 3
|
||||||
autonomous_max_retry_attempts = 6
|
autonomous_max_retry_attempts = 6
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user