fix: remove allow_multiple_tool_calls config and simplify tool execution flow
This fixes a bug where the agent would stop responding abruptly without calling final_output. The root cause was the allow_multiple_tool_calls config option (default: false) which caused the agent to break out of the streaming loop mid-stream after executing the first tool, losing any subsequent content. Changes: - Remove allow_multiple_tool_calls config option entirely - Always process all tool calls without breaking mid-stream - Simplify system prompt generation (no longer needs boolean param) - Let the stream complete fully before continuing to next iteration - Change find_last_tool_call_start to find_first_tool_call_start - Remove parser.reset() call on duplicate detection Benefits: - Simpler logic with less conditional branching - No lost content after tool calls - Consistent behavior for all users - Reduced config complexity
This commit is contained in:
143
crates/g3-core/tests/read_file_token_limit_test.rs
Normal file
143
crates/g3-core/tests/read_file_token_limit_test.rs
Normal file
@@ -0,0 +1,143 @@
|
||||
//! Tests for token-aware read_file limiting.
|
||||
//!
|
||||
//! These tests verify that read_file properly limits output based on
|
||||
//! context window state to prevent blowing out the context.
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Helper to create a test file with specified size
|
||||
fn create_test_file(dir: &TempDir, name: &str, size_bytes: usize) -> PathBuf {
|
||||
let path = dir.path().join(name);
|
||||
let content: String = "x".repeat(size_bytes);
|
||||
fs::write(&path, &content).unwrap();
|
||||
path
|
||||
}
|
||||
|
||||
/// Test the helper functions directly
|
||||
mod helper_tests {
|
||||
/// Bytes per token heuristic (must match file_ops.rs)
|
||||
const BYTES_PER_TOKEN: f32 = 3.5;
|
||||
|
||||
/// Estimate token count from byte size (must match file_ops.rs)
|
||||
fn estimate_tokens_from_bytes(bytes: usize) -> u32 {
|
||||
((bytes as f32 / BYTES_PER_TOKEN) * 1.1).ceil() as u32
|
||||
}
|
||||
|
||||
/// Calculate the maximum bytes we should read based on context window state.
|
||||
fn calculate_read_limit(file_bytes: usize, total_tokens: u32, used_tokens: u32) -> Option<usize> {
|
||||
let file_tokens = estimate_tokens_from_bytes(file_bytes);
|
||||
let max_tokens_for_file = (total_tokens as f32 * 0.20) as u32; // 20%
|
||||
|
||||
// Tier 1: File is small enough (< 20% of context) - no limit
|
||||
if file_tokens < max_tokens_for_file {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Calculate available context
|
||||
let available_tokens = total_tokens.saturating_sub(used_tokens);
|
||||
let half_available = available_tokens / 2;
|
||||
|
||||
// Tier 3: If 20% would exceed half of available, cap at half available
|
||||
let effective_max_tokens = if max_tokens_for_file > half_available {
|
||||
half_available
|
||||
} else {
|
||||
// Tier 2: Cap at 20% of total context
|
||||
max_tokens_for_file
|
||||
};
|
||||
|
||||
// Convert tokens back to bytes
|
||||
let max_bytes = (effective_max_tokens as f32 * BYTES_PER_TOKEN / 1.1) as usize;
|
||||
|
||||
Some(max_bytes)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_estimate_tokens_from_bytes() {
|
||||
// 3500 bytes at 3.5 bytes/token = 1000 tokens, +10% = 1100
|
||||
let tokens = estimate_tokens_from_bytes(3500);
|
||||
assert_eq!(tokens, 1100);
|
||||
|
||||
// Small file
|
||||
let tokens = estimate_tokens_from_bytes(100);
|
||||
assert!(tokens > 0 && tokens < 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tier1_small_file_no_limit() {
|
||||
// 100k token context, file would be ~1000 tokens (well under 20% = 20k)
|
||||
let limit = calculate_read_limit(3500, 100_000, 0);
|
||||
assert!(limit.is_none(), "Small file should have no limit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tier2_large_file_capped_at_20_percent() {
|
||||
// 100k token context, file would be ~100k tokens (way over 20%)
|
||||
// Should cap at 20% = 20k tokens worth of bytes
|
||||
let limit = calculate_read_limit(350_000, 100_000, 0);
|
||||
assert!(limit.is_some(), "Large file should be limited");
|
||||
|
||||
let max_bytes = limit.unwrap();
|
||||
// 20% of 100k = 20k tokens, at 3.5 bytes/token / 1.1 = ~63k bytes
|
||||
assert!(max_bytes > 50_000 && max_bytes < 70_000,
|
||||
"Expected ~63k bytes, got {}", max_bytes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tier3_context_nearly_full() {
|
||||
// 100k token context, already at 70% (30k available)
|
||||
// 20% of total = 20k tokens, but half of available = 15k tokens
|
||||
// Should cap at 15k tokens worth of bytes
|
||||
let limit = calculate_read_limit(350_000, 100_000, 70_000);
|
||||
assert!(limit.is_some(), "Large file should be limited");
|
||||
|
||||
let max_bytes = limit.unwrap();
|
||||
// 15k tokens at 3.5 bytes/token / 1.1 = ~47k bytes
|
||||
assert!(max_bytes > 40_000 && max_bytes < 55_000,
|
||||
"Expected ~47k bytes when context at 70%, got {}", max_bytes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tier3_context_very_full() {
|
||||
// 100k token context, already at 90% (10k available)
|
||||
// 20% of total = 20k tokens, but half of available = 5k tokens
|
||||
// Should cap at 5k tokens worth of bytes
|
||||
let limit = calculate_read_limit(350_000, 100_000, 90_000);
|
||||
assert!(limit.is_some(), "Large file should be limited");
|
||||
|
||||
let max_bytes = limit.unwrap();
|
||||
// 5k tokens at 3.5 bytes/token / 1.1 = ~15.9k bytes
|
||||
assert!(max_bytes > 10_000 && max_bytes < 20_000,
|
||||
"Expected ~16k bytes when context at 90%, got {}", max_bytes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_boundary_exactly_20_percent() {
|
||||
// File that's exactly at the 20% boundary
|
||||
// 100k context, 20% = 20k tokens
|
||||
// 20k tokens at 3.5 bytes/token * 1.1 = ~77k bytes would trigger limit
|
||||
|
||||
// Just under - should not limit
|
||||
let limit = calculate_read_limit(60_000, 100_000, 0);
|
||||
assert!(limit.is_none(), "File just under 20% should not be limited");
|
||||
|
||||
// Just over - should limit
|
||||
let limit = calculate_read_limit(80_000, 100_000, 0);
|
||||
assert!(limit.is_some(), "File just over 20% should be limited");
|
||||
}
|
||||
}
|
||||
|
||||
/// Integration-style tests that would test the actual tool execution
|
||||
/// These are commented out as they require more setup (ToolContext, etc.)
|
||||
/// but document the expected behavior.
|
||||
#[cfg(test)]
|
||||
mod integration_notes {
|
||||
// To fully test execute_read_file with token limiting:
|
||||
// 1. Create a mock ToolContext with specific context_total_tokens and context_used_tokens
|
||||
// 2. Create a large test file
|
||||
// 3. Call execute_read_file and verify:
|
||||
// - Output contains truncation warning header
|
||||
// - Content is actually truncated to expected size
|
||||
// - Header shows correct character range and context percentage
|
||||
}
|
||||
Reference in New Issue
Block a user