fix: remove allow_multiple_tool_calls config and simplify tool execution flow

This fixes a bug where the agent would stop responding abruptly without
calling final_output. The root cause was the allow_multiple_tool_calls
config option (default: false) which caused the agent to break out of
the streaming loop mid-stream after executing the first tool, losing
any subsequent content.

Changes:
- Remove allow_multiple_tool_calls config option entirely
- Always process all tool calls without breaking mid-stream
- Simplify system prompt generation (no longer needs boolean param)
- Let the stream complete fully before continuing to next iteration
- Change find_last_tool_call_start to find_first_tool_call_start
- Remove parser.reset() call on duplicate detection

Benefits:
- Simpler logic with less conditional branching
- No lost content after tool calls
- Consistent behavior for all users
- Reduced config complexity
This commit is contained in:
Dhanji R. Prasanna
2026-01-09 13:28:07 +11:00
parent a72d5a650a
commit 67be0f20c7
11 changed files with 317 additions and 116 deletions

View File

@@ -759,7 +759,8 @@ async fn run_agent_mode(
let config = g3_config::Config::load(config_path)?; let config = g3_config::Config::load(config_path)?;
// Generate the combined system prompt (agent prompt + tool instructions) // Generate the combined system prompt (agent prompt + tool instructions)
let system_prompt = get_agent_system_prompt(&agent_prompt, config.agent.allow_multiple_tool_calls); // Note: allow_multiple_tool_calls parameter is deprecated but kept for API compatibility
let system_prompt = get_agent_system_prompt(&agent_prompt, true);
// Read README if present // Read README if present
let readme_content = std::fs::read_to_string(workspace_dir.join("README.md")).ok(); let readme_content = std::fs::read_to_string(workspace_dir.join("README.md")).ok();

View File

@@ -94,7 +94,6 @@ pub struct AgentConfig {
pub max_context_length: Option<u32>, pub max_context_length: Option<u32>,
pub fallback_default_max_tokens: usize, pub fallback_default_max_tokens: usize,
pub enable_streaming: bool, pub enable_streaming: bool,
pub allow_multiple_tool_calls: bool,
pub timeout_seconds: u64, pub timeout_seconds: u64,
pub auto_compact: bool, pub auto_compact: bool,
pub max_retry_attempts: u32, pub max_retry_attempts: u32,
@@ -191,7 +190,6 @@ impl Default for Config {
max_context_length: None, max_context_length: None,
fallback_default_max_tokens: 8192, fallback_default_max_tokens: 8192,
enable_streaming: true, enable_streaming: true,
allow_multiple_tool_calls: false,
timeout_seconds: 60, timeout_seconds: 60,
auto_compact: true, auto_compact: true,
max_retry_attempts: 3, max_retry_attempts: 3,

View File

@@ -48,7 +48,6 @@ fallback_default_max_tokens = 8192
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
allow_multiple_tool_calls = false
max_retry_attempts = 3 max_retry_attempts = 3
autonomous_max_retry_attempts = 6 autonomous_max_retry_attempts = 6
{}"#, test_config_footer()); {}"#, test_config_footer());
@@ -93,7 +92,6 @@ fallback_default_max_tokens = 8192
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
allow_multiple_tool_calls = false
max_retry_attempts = 3 max_retry_attempts = 3
autonomous_max_retry_attempts = 6 autonomous_max_retry_attempts = 6
{}"#, test_config_footer()); {}"#, test_config_footer());
@@ -138,7 +136,6 @@ fallback_default_max_tokens = 8192
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
allow_multiple_tool_calls = false
max_retry_attempts = 3 max_retry_attempts = 3
autonomous_max_retry_attempts = 6 autonomous_max_retry_attempts = 6
{}"#, test_config_footer()); {}"#, test_config_footer());
@@ -176,7 +173,6 @@ fallback_default_max_tokens = 8192
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
allow_multiple_tool_calls = false
max_retry_attempts = 3 max_retry_attempts = 3
autonomous_max_retry_attempts = 6 autonomous_max_retry_attempts = 6
{}"#, test_config_footer()); {}"#, test_config_footer());
@@ -218,7 +214,6 @@ fallback_default_max_tokens = 8192
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
allow_multiple_tool_calls = false
max_retry_attempts = 3 max_retry_attempts = 3
autonomous_max_retry_attempts = 6 autonomous_max_retry_attempts = 6
{}"#, test_config_footer()); {}"#, test_config_footer());
@@ -257,7 +252,6 @@ fallback_default_max_tokens = 8192
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
allow_multiple_tool_calls = false
max_retry_attempts = 3 max_retry_attempts = 3
autonomous_max_retry_attempts = 6 autonomous_max_retry_attempts = 6
{}"#, test_config_footer()); {}"#, test_config_footer());

View File

@@ -1,40 +0,0 @@
#[cfg(test)]
mod test_multiple_tool_calls {
use g3_config::{AgentConfig, Config};
#[test]
fn test_config_has_multiple_tool_calls_field() {
let config = Config::default();
// Test that the field exists and defaults to false
assert_eq!(config.agent.allow_multiple_tool_calls, false);
// Test that we can create a config with the field set to true
let mut custom_config = Config::default();
custom_config.agent.allow_multiple_tool_calls = true;
assert_eq!(custom_config.agent.allow_multiple_tool_calls, true);
}
#[test]
fn test_agent_config_serialization() {
let agent_config = AgentConfig {
max_context_length: Some(100000),
fallback_default_max_tokens: 8192,
enable_streaming: true,
allow_multiple_tool_calls: true,
timeout_seconds: 60,
auto_compact: true,
max_retry_attempts: 3,
autonomous_max_retry_attempts: 6,
check_todo_staleness: true,
};
// Test serialization
let json = serde_json::to_string(&agent_config).unwrap();
assert!(json.contains("\"allow_multiple_tool_calls\":true"));
// Test deserialization
let deserialized: AgentConfig = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.allow_multiple_tool_calls, true);
}
}

View File

@@ -231,7 +231,7 @@ impl<W: UiWriter> Agent<W> {
// Use default system prompt based on provider capabilities // Use default system prompt based on provider capabilities
if provider_has_native_tool_calling { if provider_has_native_tool_calling {
// For native tool calling providers, use a more explicit system prompt // For native tool calling providers, use a more explicit system prompt
get_system_prompt_for_native(config.agent.allow_multiple_tool_calls) get_system_prompt_for_native()
} else { } else {
// For non-native providers (embedded models), use JSON format instructions // For non-native providers (embedded models), use JSON format instructions
SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string() SYSTEM_PROMPT_FOR_NON_NATIVE_TOOL_USE.to_string()
@@ -1893,13 +1893,8 @@ impl<W: UiWriter> Agent<W> {
let completed_tools = parser.process_chunk(&chunk); let completed_tools = parser.process_chunk(&chunk);
// Handle completed tool calls - process all if multiple calls enabled // Handle completed tool calls - process all if multiple calls enabled
let tools_to_process: Vec<ToolCall> = // Always process all tool calls - they will be executed after stream ends
if self.config.agent.allow_multiple_tool_calls { let tools_to_process: Vec<ToolCall> = completed_tools;
completed_tools
} else {
// Original behavior - only take the first tool
completed_tools.into_iter().take(1).collect()
};
// Helper function to check if two tool calls are duplicates // Helper function to check if two tool calls are duplicates
let are_duplicates = |tc1: &ToolCall, tc2: &ToolCall| -> bool { let are_duplicates = |tc1: &ToolCall, tc2: &ToolCall| -> bool {
@@ -1953,11 +1948,10 @@ impl<W: UiWriter> Agent<W> {
modified_tool_call.tool = prefixed_tool_name; modified_tool_call.tool = prefixed_tool_name;
debug!("{}", warning_msg); debug!("{}", warning_msg);
// Reset the parser to clear any partial/polluted state. // NOTE: Do NOT call parser.reset() here!
// This prevents "example" tool calls in markdown or LLM stuttering // Resetting the parser clears the entire text buffer, which would
// from polluting subsequent parsing. // lose any subsequent (non-duplicate) tool calls that haven't been
parser.reset(); // processed yet.
continue; // Skip execution of duplicate continue; // Skip execution of duplicate
} }
@@ -2289,22 +2283,11 @@ impl<W: UiWriter> Agent<W> {
// Reset response_started flag for next iteration // Reset response_started flag for next iteration
response_started = false; response_started = false;
// For single tool mode, break immediately // Continue processing - don't break mid-stream
if !self.config.agent.allow_multiple_tool_calls {
break; // Break out of current stream to start a new one
}
} // End of for loop processing each tool call } // End of for loop processing each tool call
// If we processed any tools in multiple mode, break out to start new stream // Note: We no longer break mid-stream after tool execution.
// BUT only if there are no more unexecuted tool calls in the buffer // All tool calls are collected and executed after the stream ends.
if tool_executed && self.config.agent.allow_multiple_tool_calls {
if parser.has_unexecuted_tool_call() {
debug!("Tool executed but parser still has unexecuted tool calls, continuing to process");
// Don't break - continue processing to pick up remaining tool calls
} else {
break;
}
}
// If no tool calls were completed, continue streaming normally // If no tool calls were completed, continue streaming normally
if !tool_executed { if !tool_executed {
@@ -2782,6 +2765,8 @@ impl<W: UiWriter> Agent<W> {
pending_images: &mut self.pending_images, pending_images: &mut self.pending_images,
is_autonomous: self.is_autonomous, is_autonomous: self.is_autonomous,
requirements_sha: self.requirements_sha.as_deref(), requirements_sha: self.requirements_sha.as_deref(),
context_total_tokens: self.context_window.total_tokens,
context_used_tokens: self.context_window.used_tokens,
}; };
// Dispatch to the appropriate tool handler // Dispatch to the appropriate tool handler

View File

@@ -210,21 +210,17 @@ pub const SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE: &'static str =
concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE); concatcp!(SYSTEM_NATIVE_TOOL_CALLS, CODING_STYLE);
/// Generate system prompt based on whether multiple tool calls are allowed /// Generate system prompt based on whether multiple tool calls are allowed
pub fn get_system_prompt_for_native(allow_multiple: bool) -> String { pub fn get_system_prompt_for_native() -> String {
if allow_multiple { // Always allow multiple tool calls - they are processed sequentially after stream ends
// Replace the "ONE tool" instruction with multiple tools instruction let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string();
let base = SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string(); base.replace(
base.replace( "2. Call the appropriate tool with the required parameters",
"2. Call the appropriate tool with the required parameters", "2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate.
"2. Call the appropriate tool(s) with the required parameters - you may call multiple tools in parallel when appropriate.
<use_parallel_tool_calls> <use_parallel_tool_calls>
For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially. For maximum efficiency, whenever you perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially. Prioritize calling tools in parallel whenever possible. For example, when reading 3 files, run 3 tool calls in parallel to read all 3 files into context at the same time. When running multiple read-only commands like `ls` or `list_dir`, always run all of the commands in parallel. Err on the side of maximizing parallel tool calls rather than running too many tools sequentially.
</use_parallel_tool_calls> </use_parallel_tool_calls>
" "
) )
} else {
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
}
} }
const SYSTEM_NON_NATIVE_TOOL_USE: &'static str = const SYSTEM_NON_NATIVE_TOOL_USE: &'static str =
@@ -410,12 +406,9 @@ const G3_IDENTITY_LINE: &str = "You are G3, an AI programming agent of the same
/// The agent_prompt replaces only the G3 identity line at the start of the prompt. /// The agent_prompt replaces only the G3 identity line at the start of the prompt.
/// Everything else (tool instructions, coding guidelines, etc.) is preserved. /// Everything else (tool instructions, coding guidelines, etc.) is preserved.
pub fn get_agent_system_prompt(agent_prompt: &str, allow_multiple_tool_calls: bool) -> String { pub fn get_agent_system_prompt(agent_prompt: &str, allow_multiple_tool_calls: bool) -> String {
// Get the full system prompt (with or without parallel tool calls) // Get the full system prompt (always allows multiple tool calls now)
let full_prompt = if allow_multiple_tool_calls { let _ = allow_multiple_tool_calls; // Parameter kept for API compatibility but ignored
get_system_prompt_for_native(true) let full_prompt = get_system_prompt_for_native();
} else {
SYSTEM_PROMPT_FOR_NATIVE_TOOL_USE.to_string()
};
// Replace only the G3 identity line with the custom agent prompt // Replace only the G3 identity line with the custom agent prompt
full_prompt.replace(G3_IDENTITY_LINE, agent_prompt.trim()) full_prompt.replace(G3_IDENTITY_LINE, agent_prompt.trim())

View File

@@ -154,8 +154,14 @@ impl StreamingToolParser {
fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> { fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
// If we're not currently in a JSON tool call, look for the start // If we're not currently in a JSON tool call, look for the start
if !self.in_json_tool_call { if !self.in_json_tool_call {
if let Some(pos) = Self::find_last_tool_call_start(&self.text_buffer) { // Only search in the unconsumed portion of the buffer to avoid
debug!("Found JSON tool call pattern at position {}", pos); // re-parsing already-executed tool calls
let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
// Use find_first_tool_call_start to find the FIRST tool call, not the last.
// This ensures we process tool calls in order when multiple arrive together.
if let Some(relative_pos) = Self::find_first_tool_call_start(unchecked_buffer) {
let pos = self.last_consumed_position + relative_pos;
debug!("Found JSON tool call pattern at position {} (relative: {})", pos, relative_pos);
self.in_json_tool_call = true; self.in_json_tool_call = true;
self.json_tool_start = Some(pos); self.json_tool_start = Some(pos);
} }
@@ -413,4 +419,55 @@ mod tests {
assert!(!parser.message_stopped); assert!(!parser.message_stopped);
assert_eq!(parser.last_consumed_position, 0); assert_eq!(parser.last_consumed_position, 0);
} }
#[test]
fn test_multiple_tool_calls_processed_in_order() {
// Test that when multiple tool calls arrive together, they are processed
// in order (first one first, not last one first)
let mut parser = StreamingToolParser::new();
// Simulate two tool calls arriving in the same chunk
let content = r#"Some text before
{"tool": "shell", "args": {"command": "first"}}
{"tool": "shell", "args": {"command": "second"}}
Some text after"#;
let chunk = g3_providers::CompletionChunk {
content: content.to_string(),
finished: true,
tool_calls: None,
usage: None,
};
let tools = parser.process_chunk(&chunk);
// Should find both tool calls
assert_eq!(tools.len(), 2, "Expected 2 tool calls, got {}", tools.len());
// First tool call should be "first", not "second"
assert_eq!(tools[0].tool, "shell");
assert_eq!(tools[0].args["command"], "first",
"First tool call should have command 'first', got {:?}", tools[0].args);
// Second tool call should be "second"
assert_eq!(tools[1].tool, "shell");
assert_eq!(tools[1].args["command"], "second",
"Second tool call should have command 'second', got {:?}", tools[1].args);
}
#[test]
fn test_find_first_vs_last_tool_call() {
let text = r#"{"tool": "first"} and {"tool": "second"}"#;
let first_pos = StreamingToolParser::find_first_tool_call_start(text);
let last_pos = StreamingToolParser::find_last_tool_call_start(text);
assert!(first_pos.is_some());
assert!(last_pos.is_some());
assert!(first_pos.unwrap() < last_pos.unwrap(),
"First position ({:?}) should be less than last position ({:?})", first_pos, last_pos);
}
} }

View File

@@ -25,6 +25,8 @@ pub struct ToolContext<'a, W: UiWriter> {
pub pending_images: &'a mut Vec<g3_providers::ImageContent>, pub pending_images: &'a mut Vec<g3_providers::ImageContent>,
pub is_autonomous: bool, pub is_autonomous: bool,
pub requirements_sha: Option<&'a str>, pub requirements_sha: Option<&'a str>,
pub context_total_tokens: u32,
pub context_used_tokens: u32,
} }
impl<'a, W: UiWriter> ToolContext<'a, W> { impl<'a, W: UiWriter> ToolContext<'a, W> {

View File

@@ -10,10 +10,50 @@ use crate::ToolCall;
use super::executor::ToolContext; use super::executor::ToolContext;
/// Bytes per token heuristic (conservative estimate for code/text mix)
const BYTES_PER_TOKEN: f32 = 3.5;
/// Maximum percentage of context window a single file read can consume
const MAX_FILE_READ_PERCENT: f32 = 0.20; // 20%
/// Estimate token count from byte size
fn estimate_tokens_from_bytes(bytes: usize) -> u32 {
((bytes as f32 / BYTES_PER_TOKEN) * 1.1).ceil() as u32 // 10% safety buffer
}
/// Calculate the maximum bytes we should read based on context window state.
/// Returns None if no limit needed, Some(max_bytes) if limiting required.
fn calculate_read_limit(file_bytes: usize, total_tokens: u32, used_tokens: u32) -> Option<usize> {
let file_tokens = estimate_tokens_from_bytes(file_bytes);
let max_tokens_for_file = (total_tokens as f32 * MAX_FILE_READ_PERCENT) as u32;
// Tier 1: File is small enough (< 20% of context) - no limit
if file_tokens < max_tokens_for_file {
return None;
}
// Calculate available context
let available_tokens = total_tokens.saturating_sub(used_tokens);
let half_available = available_tokens / 2;
// Tier 3: If 20% would exceed half of available, cap at half available
let effective_max_tokens = if max_tokens_for_file > half_available {
half_available
} else {
// Tier 2: Cap at 20% of total context
max_tokens_for_file
};
// Convert tokens back to bytes
let max_bytes = (effective_max_tokens as f32 * BYTES_PER_TOKEN / 1.1) as usize;
Some(max_bytes)
}
/// Execute the `read_file` tool. /// Execute the `read_file` tool.
pub async fn execute_read_file<W: UiWriter>( pub async fn execute_read_file<W: UiWriter>(
tool_call: &ToolCall, tool_call: &ToolCall,
_ctx: &ToolContext<'_, W>, ctx: &ToolContext<'_, W>,
) -> Result<String> { ) -> Result<String> {
debug!("Processing read_file tool call"); debug!("Processing read_file tool call");
@@ -47,54 +87,83 @@ pub async fn execute_read_file<W: UiWriter>(
match std::fs::read_to_string(path_str) { match std::fs::read_to_string(path_str) {
Ok(content) => { Ok(content) => {
// Validate and apply range if specified let total_file_len = content.len();
let start = start_char.unwrap_or(0);
let end = end_char.unwrap_or(content.len());
// Validation // Calculate token-aware limit for the content we're about to read
if start > content.len() { let read_limit = calculate_read_limit(
total_file_len,
ctx.context_total_tokens,
ctx.context_used_tokens,
);
// Validate user-specified range
let user_start = start_char.unwrap_or(0);
if user_start > total_file_len {
return Ok(format!( return Ok(format!(
"❌ Start position {} exceeds file length {}", "❌ Start position {} exceeds file length {}",
start, user_start,
content.len() total_file_len
));
}
if end > content.len() {
return Ok(format!(
"❌ End position {} exceeds file length {}",
end,
content.len()
));
}
if start > end {
return Ok(format!(
"❌ Start position {} is greater than end position {}",
start, end
)); ));
} }
let user_end = end_char.unwrap_or(total_file_len);
if user_end > total_file_len {
return Ok(format!(
"❌ End position {} exceeds file length {}",
user_end,
total_file_len
));
}
if user_start > user_end {
return Ok(format!(
"❌ Start position {} is greater than end position {}",
user_start, user_end
));
}
// Calculate the range we'll actually read
let user_range_len = user_end - user_start;
// Determine if we need to apply token-aware limiting
let (effective_end, was_truncated) = match read_limit {
Some(max_bytes) if user_range_len > max_bytes => {
// Truncate to max_bytes from the start position
(user_start + max_bytes, true)
}
_ => (user_end, false),
};
// Extract the requested portion, ensuring we're at char boundaries // Extract the requested portion, ensuring we're at char boundaries
let start_boundary = if start == 0 { let start_boundary = if user_start == 0 {
0 0
} else { } else {
content content
.char_indices() .char_indices()
.find(|(i, _)| *i >= start) .find(|(i, _)| *i >= user_start)
.map(|(i, _)| i) .map(|(i, _)| i)
.unwrap_or(start) .unwrap_or(user_start)
}; };
let end_boundary = content let end_boundary = content
.char_indices() .char_indices()
.find(|(i, _)| *i >= end) .find(|(i, _)| *i >= effective_end)
.map(|(i, _)| i) .map(|(i, _)| i)
.unwrap_or(content.len()); .unwrap_or(total_file_len);
let partial_content = &content[start_boundary..end_boundary]; let partial_content = &content[start_boundary..end_boundary];
let line_count = partial_content.lines().count(); let line_count = partial_content.lines().count();
let total_lines = content.lines().count(); let total_lines = content.lines().count();
// Format output with range info if partial // Format output based on whether truncation occurred
if start_char.is_some() || end_char.is_some() { if was_truncated {
// Token-aware truncation header
let context_pct = (ctx.context_used_tokens as f32 / ctx.context_total_tokens as f32 * 100.0) as u32;
Ok(format!(
"⚠️ FILE TRUNCATED: Reading chars {}-{} of {} total (file exceeds 20% context window threshold, context at {}%)\n\
📄 File content ({} lines of {} total):\n{}",
start_boundary, end_boundary, total_file_len, context_pct,
line_count, total_lines, partial_content
))
} else if start_char.is_some() || end_char.is_some() {
Ok(format!( Ok(format!(
"📄 File content (chars {}-{}, {} lines of {} total):\n{}", "📄 File content (chars {}-{}, {} lines of {} total):\n{}",
start_boundary, end_boundary, line_count, total_lines, partial_content start_boundary, end_boundary, line_count, total_lines, partial_content

View File

@@ -0,0 +1,143 @@
//! Tests for token-aware read_file limiting.
//!
//! These tests verify that read_file properly limits output based on
//! context window state to prevent blowing out the context.
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
/// Helper to create a test file with specified size
fn create_test_file(dir: &TempDir, name: &str, size_bytes: usize) -> PathBuf {
let path = dir.path().join(name);
let content: String = "x".repeat(size_bytes);
fs::write(&path, &content).unwrap();
path
}
/// Test the helper functions directly
mod helper_tests {
/// Bytes per token heuristic (must match file_ops.rs)
const BYTES_PER_TOKEN: f32 = 3.5;
/// Estimate token count from byte size (must match file_ops.rs)
fn estimate_tokens_from_bytes(bytes: usize) -> u32 {
((bytes as f32 / BYTES_PER_TOKEN) * 1.1).ceil() as u32
}
/// Calculate the maximum bytes we should read based on context window state.
fn calculate_read_limit(file_bytes: usize, total_tokens: u32, used_tokens: u32) -> Option<usize> {
let file_tokens = estimate_tokens_from_bytes(file_bytes);
let max_tokens_for_file = (total_tokens as f32 * 0.20) as u32; // 20%
// Tier 1: File is small enough (< 20% of context) - no limit
if file_tokens < max_tokens_for_file {
return None;
}
// Calculate available context
let available_tokens = total_tokens.saturating_sub(used_tokens);
let half_available = available_tokens / 2;
// Tier 3: If 20% would exceed half of available, cap at half available
let effective_max_tokens = if max_tokens_for_file > half_available {
half_available
} else {
// Tier 2: Cap at 20% of total context
max_tokens_for_file
};
// Convert tokens back to bytes
let max_bytes = (effective_max_tokens as f32 * BYTES_PER_TOKEN / 1.1) as usize;
Some(max_bytes)
}
#[test]
fn test_estimate_tokens_from_bytes() {
// 3500 bytes at 3.5 bytes/token = 1000 tokens, +10% = 1100
let tokens = estimate_tokens_from_bytes(3500);
assert_eq!(tokens, 1100);
// Small file
let tokens = estimate_tokens_from_bytes(100);
assert!(tokens > 0 && tokens < 50);
}
#[test]
fn test_tier1_small_file_no_limit() {
// 100k token context, file would be ~1000 tokens (well under 20% = 20k)
let limit = calculate_read_limit(3500, 100_000, 0);
assert!(limit.is_none(), "Small file should have no limit");
}
#[test]
fn test_tier2_large_file_capped_at_20_percent() {
// 100k token context, file would be ~100k tokens (way over 20%)
// Should cap at 20% = 20k tokens worth of bytes
let limit = calculate_read_limit(350_000, 100_000, 0);
assert!(limit.is_some(), "Large file should be limited");
let max_bytes = limit.unwrap();
// 20% of 100k = 20k tokens, at 3.5 bytes/token / 1.1 = ~63k bytes
assert!(max_bytes > 50_000 && max_bytes < 70_000,
"Expected ~63k bytes, got {}", max_bytes);
}
#[test]
fn test_tier3_context_nearly_full() {
// 100k token context, already at 70% (30k available)
// 20% of total = 20k tokens, but half of available = 15k tokens
// Should cap at 15k tokens worth of bytes
let limit = calculate_read_limit(350_000, 100_000, 70_000);
assert!(limit.is_some(), "Large file should be limited");
let max_bytes = limit.unwrap();
// 15k tokens at 3.5 bytes/token / 1.1 = ~47k bytes
assert!(max_bytes > 40_000 && max_bytes < 55_000,
"Expected ~47k bytes when context at 70%, got {}", max_bytes);
}
#[test]
fn test_tier3_context_very_full() {
// 100k token context, already at 90% (10k available)
// 20% of total = 20k tokens, but half of available = 5k tokens
// Should cap at 5k tokens worth of bytes
let limit = calculate_read_limit(350_000, 100_000, 90_000);
assert!(limit.is_some(), "Large file should be limited");
let max_bytes = limit.unwrap();
// 5k tokens at 3.5 bytes/token / 1.1 = ~15.9k bytes
assert!(max_bytes > 10_000 && max_bytes < 20_000,
"Expected ~16k bytes when context at 90%, got {}", max_bytes);
}
#[test]
fn test_boundary_exactly_20_percent() {
// File that's exactly at the 20% boundary
// 100k context, 20% = 20k tokens
// 20k tokens at 3.5 bytes/token * 1.1 = ~77k bytes would trigger limit
// Just under - should not limit
let limit = calculate_read_limit(60_000, 100_000, 0);
assert!(limit.is_none(), "File just under 20% should not be limited");
// Just over - should limit
let limit = calculate_read_limit(80_000, 100_000, 0);
assert!(limit.is_some(), "File just over 20% should be limited");
}
}
/// Integration-style tests that would test the actual tool execution
/// These are commented out as they require more setup (ToolContext, etc.)
/// but document the expected behavior.
#[cfg(test)]
mod integration_notes {
// To fully test execute_read_file with token limiting:
// 1. Create a mock ToolContext with specific context_total_tokens and context_used_tokens
// 2. Create a large test file
// 3. Call execute_read_file and verify:
// - Output contains truncation warning header
// - Content is actually truncated to expected size
// - Header shows correct character range and context percentage
}

View File

@@ -23,7 +23,6 @@ fallback_default_max_tokens = 8192
enable_streaming = true enable_streaming = true
timeout_seconds = 60 timeout_seconds = 60
auto_compact = true auto_compact = true
allow_multiple_tool_calls = false
max_retry_attempts = 3 max_retry_attempts = 3
autonomous_max_retry_attempts = 6 autonomous_max_retry_attempts = 6