feat: shell output pagination and optimized read_file with seek
- Shell outputs > 8KB are truncated to first 500 chars - Full output saved to .g3/sessions/<session_id>/tools/shell_stdout_<id>.txt - LLM can use read_file with start/end to paginate through large outputs - read_file now uses seek() for O(1) random access instead of reading entire file - UTF-8 safe: reads extra bytes at boundaries to find valid char positions - Falls back to lossy conversion for binary files (no panics) Files changed: - paths.rs: get_tools_output_dir(), generate_short_id() - shell.rs: truncate_large_output() integration - file_ops.rs: seek-based read_file_range() helper - New test: read_file_utf8_test.rs
This commit is contained in:
@@ -110,6 +110,17 @@ pub fn get_context_summary_file(session_id: &str) -> PathBuf {
|
||||
get_session_logs_dir(session_id).join("context_summary.txt")
|
||||
}
|
||||
|
||||
/// Get the tools output directory for a session.
|
||||
/// Returns .g3/sessions/<session_id>/tools/
|
||||
pub fn get_tools_output_dir(session_id: &str) -> PathBuf {
|
||||
get_session_logs_dir(session_id).join("tools")
|
||||
}
|
||||
|
||||
/// Generate a short unique ID (first 8 chars of UUID v4).
|
||||
pub fn generate_short_id() -> String {
|
||||
uuid::Uuid::new_v4().to_string()[..8].to_string()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
//! File operation tools: read_file, write_file, str_replace, read_image.
|
||||
|
||||
use anyhow::Result;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Read, Seek, SeekFrom};
|
||||
use std::path::Path;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
@@ -55,7 +58,7 @@ pub async fn execute_read_file<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing read_file tool call");
|
||||
debug!("Processing read_file tool call (optimized with seek)");
|
||||
|
||||
let file_path = match tool_call.args.get("file_path").and_then(|v| v.as_str()) {
|
||||
Some(p) => p,
|
||||
@@ -85,101 +88,164 @@ pub async fn execute_read_file<W: UiWriter>(
|
||||
path_str, start_char, end_char
|
||||
);
|
||||
|
||||
match std::fs::read_to_string(path_str) {
|
||||
Ok(content) => {
|
||||
let total_file_len = content.len();
|
||||
|
||||
// Calculate token-aware limit for the content we're about to read
|
||||
let read_limit = calculate_read_limit(
|
||||
total_file_len,
|
||||
ctx.context_total_tokens,
|
||||
ctx.context_used_tokens,
|
||||
);
|
||||
// Get file metadata for size without reading content
|
||||
let path = Path::new(path_str);
|
||||
let metadata = match std::fs::metadata(path) {
|
||||
Ok(m) => m,
|
||||
Err(e) => return Ok(format!("❌ Failed to read file '{}': {}", path_str, e)),
|
||||
};
|
||||
let total_file_len = metadata.len() as usize;
|
||||
|
||||
// Validate user-specified range
|
||||
let user_start = start_char.unwrap_or(0);
|
||||
if user_start > total_file_len {
|
||||
// Start exceeds file length - read last 100 chars instead
|
||||
let fallback_start = total_file_len.saturating_sub(100);
|
||||
let fallback_content = &content[fallback_start..];
|
||||
let line_count = fallback_content.lines().count();
|
||||
return Ok(format!(
|
||||
"{}\n🔍 {} lines read (start {} exceeded length {}, showing last {} chars)",
|
||||
fallback_content, line_count, user_start, total_file_len, total_file_len - fallback_start
|
||||
));
|
||||
}
|
||||
|
||||
let user_end = end_char.unwrap_or(total_file_len);
|
||||
// Clamp end position to file length (don't error, just read what's available)
|
||||
let (user_end, end_was_clamped) = if user_end > total_file_len {
|
||||
(total_file_len, true)
|
||||
} else {
|
||||
(user_end, false)
|
||||
};
|
||||
|
||||
if user_start > user_end {
|
||||
return Ok(format!(
|
||||
"❌ Start position {} is greater than end position {}",
|
||||
user_start, user_end
|
||||
));
|
||||
}
|
||||
// Calculate token-aware limit
|
||||
let read_limit = calculate_read_limit(
|
||||
total_file_len,
|
||||
ctx.context_total_tokens,
|
||||
ctx.context_used_tokens,
|
||||
);
|
||||
|
||||
// Calculate the range we'll actually read
|
||||
let user_range_len = user_end - user_start;
|
||||
|
||||
// Determine if we need to apply token-aware limiting
|
||||
let (effective_end, was_truncated) = match read_limit {
|
||||
Some(max_bytes) if user_range_len > max_bytes => {
|
||||
// Truncate to max_bytes from the start position
|
||||
(user_start + max_bytes, true)
|
||||
}
|
||||
_ => (user_end, false),
|
||||
};
|
||||
|
||||
// Extract the requested portion, ensuring we're at char boundaries
|
||||
let start_boundary = if user_start == 0 {
|
||||
0
|
||||
} else {
|
||||
content
|
||||
.char_indices()
|
||||
.find(|(i, _)| *i >= user_start)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(user_start)
|
||||
};
|
||||
let end_boundary = content
|
||||
.char_indices()
|
||||
.find(|(i, _)| *i >= effective_end)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(total_file_len);
|
||||
|
||||
let partial_content = &content[start_boundary..end_boundary];
|
||||
let line_count = partial_content.lines().count();
|
||||
|
||||
// Format output based on whether truncation occurred
|
||||
if was_truncated {
|
||||
// Token-aware truncation header
|
||||
let context_pct = (ctx.context_used_tokens as f32 / ctx.context_total_tokens as f32 * 100.0) as u32;
|
||||
Ok(format!(
|
||||
"{}\n🔍 {} lines read (truncated, chars {}-{} of {}, context {}%)",
|
||||
partial_content, line_count, start_boundary, end_boundary, total_file_len, context_pct
|
||||
))
|
||||
} else if end_was_clamped {
|
||||
// End position exceeded file length, clamped to actual length
|
||||
Ok(format!(
|
||||
"{}\n🔍 {} lines read (chars {}-{}, end clamped from {} to file length {})",
|
||||
partial_content, line_count, start_boundary, end_boundary, end_char.unwrap(), total_file_len
|
||||
))
|
||||
} else if start_char.is_some() || end_char.is_some() {
|
||||
Ok(format!(
|
||||
"{}\n🔍 {} lines read (chars {}-{})",
|
||||
partial_content, line_count, start_boundary, end_boundary
|
||||
))
|
||||
} else {
|
||||
Ok(format!("{}\n🔍 {} lines read", content, line_count))
|
||||
}
|
||||
}
|
||||
Err(e) => Ok(format!("❌ Failed to read file '{}': {}", path_str, e)),
|
||||
// Validate user-specified range
|
||||
let user_start = start_char.unwrap_or(0);
|
||||
let user_end = end_char.unwrap_or(total_file_len);
|
||||
|
||||
// Clamp end position to file length
|
||||
let (user_end, end_was_clamped) = if user_end > total_file_len {
|
||||
(total_file_len, true)
|
||||
} else {
|
||||
(user_end, false)
|
||||
};
|
||||
|
||||
if user_start > user_end {
|
||||
return Ok(format!(
|
||||
"❌ Start position {} is greater than end position {}",
|
||||
user_start, user_end
|
||||
));
|
||||
}
|
||||
|
||||
// Calculate the range we'll actually read
|
||||
let user_range_len = user_end - user_start;
|
||||
|
||||
// Determine if we need to apply token-aware limiting
|
||||
let (effective_end, was_truncated) = match read_limit {
|
||||
Some(max_bytes) if user_range_len > max_bytes => {
|
||||
(user_start + max_bytes, true)
|
||||
}
|
||||
_ => (user_end, false),
|
||||
};
|
||||
|
||||
// Handle start exceeding file length
|
||||
if user_start >= total_file_len {
|
||||
// Read last 100 bytes instead
|
||||
let fallback_start = total_file_len.saturating_sub(100);
|
||||
let content = read_file_range(path, fallback_start, total_file_len)?;
|
||||
let line_count = content.lines().count();
|
||||
return Ok(format!(
|
||||
"{}\n🔍 {} lines read (start {} exceeded length {}, showing last {} chars)",
|
||||
content, line_count, user_start, total_file_len, total_file_len - fallback_start
|
||||
));
|
||||
}
|
||||
|
||||
// Use optimized seek-based reading
|
||||
let content = read_file_range(path, user_start, effective_end)?;
|
||||
let line_count = content.lines().count();
|
||||
|
||||
// Format output based on whether truncation occurred
|
||||
if was_truncated {
|
||||
let context_pct = (ctx.context_used_tokens as f32 / ctx.context_total_tokens as f32 * 100.0) as u32;
|
||||
Ok(format!(
|
||||
"{}\n🔍 {} lines read (truncated, chars {}-{} of {}, context {}%)",
|
||||
content, line_count, user_start, effective_end, total_file_len, context_pct
|
||||
))
|
||||
} else if end_was_clamped {
|
||||
Ok(format!(
|
||||
"{}\n🔍 {} lines read (chars {}-{}, end clamped from {} to file length {})",
|
||||
content, line_count, user_start, effective_end, end_char.unwrap(), total_file_len
|
||||
))
|
||||
} else if start_char.is_some() || end_char.is_some() {
|
||||
Ok(format!(
|
||||
"{}\n🔍 {} lines read (chars {}-{})",
|
||||
content, line_count, user_start, effective_end
|
||||
))
|
||||
} else {
|
||||
Ok(format!("{}\n🔍 {} lines read", content, line_count))
|
||||
}
|
||||
}
|
||||
|
||||
/// Read a specific byte range from a file using seek (O(1) seek + O(n) read where n = range size).
|
||||
/// Handles UTF-8 boundary issues by extending the read slightly and trimming invalid chars.
|
||||
fn read_file_range(path: &Path, start: usize, end: usize) -> Result<String> {
|
||||
let file = File::open(path)?;
|
||||
let mut reader = BufReader::new(file);
|
||||
|
||||
// For UTF-8 safety, we may need to adjust boundaries.
|
||||
// UTF-8 characters are 1-4 bytes, so we read up to 3 extra bytes at start
|
||||
// to find a valid character boundary.
|
||||
|
||||
// Calculate how far back we might need to look for a char boundary
|
||||
let safe_start = start.saturating_sub(3);
|
||||
let extra_at_start = start - safe_start;
|
||||
|
||||
// Read a few extra bytes at the end to complete any partial char
|
||||
let extra_at_end = 3;
|
||||
|
||||
// Seek to safe start position
|
||||
reader.seek(SeekFrom::Start(safe_start as u64))?;
|
||||
|
||||
// Read the extended range
|
||||
let bytes_to_read = (end - safe_start) + extra_at_end;
|
||||
let mut buffer = vec![0u8; bytes_to_read];
|
||||
let bytes_read = reader.read(&mut buffer)?;
|
||||
buffer.truncate(bytes_read);
|
||||
|
||||
// Convert to string - this should work since we read the whole file originally as UTF-8
|
||||
// But we need to find valid boundaries within our extended read
|
||||
let full_str = match std::str::from_utf8(&buffer) {
|
||||
Ok(s) => s.to_string(),
|
||||
Err(_) => {
|
||||
// If the whole buffer isn't valid UTF-8, try to find valid boundaries
|
||||
// This can happen with binary files or corrupted data
|
||||
return Ok(String::from_utf8_lossy(&buffer).into_owned());
|
||||
}
|
||||
};
|
||||
|
||||
// Now we need to trim to the actual requested range
|
||||
// We read from safe_start, but user wants from start
|
||||
// So we need to skip `extra_at_start` bytes worth of characters
|
||||
|
||||
if extra_at_start == 0 && bytes_read <= (end - start) + extra_at_end {
|
||||
// Simple case: we started at the right place
|
||||
// Just trim any extra at the end
|
||||
let target_len = end - start;
|
||||
if full_str.len() <= target_len {
|
||||
return Ok(full_str);
|
||||
}
|
||||
// Find char boundary at target_len
|
||||
let end_idx = full_str
|
||||
.char_indices()
|
||||
.take_while(|(i, _)| *i < target_len)
|
||||
.last()
|
||||
.map(|(i, c)| i + c.len_utf8())
|
||||
.unwrap_or(full_str.len());
|
||||
return Ok(full_str[..end_idx].to_string());
|
||||
}
|
||||
|
||||
// Complex case: we read extra at the start, need to skip those bytes
|
||||
// Find the character that starts at or after `extra_at_start` bytes
|
||||
let start_idx = full_str
|
||||
.char_indices()
|
||||
.find(|(i, _)| *i >= extra_at_start)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(0);
|
||||
|
||||
// Calculate target end based on original request
|
||||
let target_byte_len = end - start;
|
||||
let end_idx = full_str
|
||||
.char_indices()
|
||||
.take_while(|(i, _)| *i < start_idx + target_byte_len)
|
||||
.last()
|
||||
.map(|(i, c)| i + c.len_utf8())
|
||||
.unwrap_or(full_str.len());
|
||||
|
||||
Ok(full_str[start_idx..end_idx.min(full_str.len())].to_string())
|
||||
}
|
||||
|
||||
/// Execute the `read_image` tool.
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
//! Shell command execution tools.
|
||||
|
||||
use anyhow::Result;
|
||||
use std::fs;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::paths::{generate_short_id, get_tools_output_dir};
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::utils::resolve_paths_in_shell_command;
|
||||
use crate::utils::shell_escape_command;
|
||||
@@ -10,6 +12,62 @@ use crate::ToolCall;
|
||||
|
||||
use super::executor::ToolContext;
|
||||
|
||||
/// Threshold for truncating output (8KB)
|
||||
const OUTPUT_TRUNCATE_THRESHOLD: usize = 8 * 1024;
|
||||
|
||||
/// Number of characters to show in truncated output head
|
||||
const TRUNCATED_HEAD_SIZE: usize = 500;
|
||||
|
||||
/// Truncate output if it exceeds the threshold, saving full content to a file.
|
||||
///
|
||||
/// If the output is larger than OUTPUT_TRUNCATE_THRESHOLD:
|
||||
/// 1. Saves the full output to `.g3/sessions/<session_id>/tools/<tool>_<id>_<stream>.txt`
|
||||
/// 2. Returns the first TRUNCATED_HEAD_SIZE chars with a message pointing to the file
|
||||
///
|
||||
/// If session_id is None, returns the original output unchanged.
|
||||
fn truncate_large_output(
|
||||
output: &str,
|
||||
session_id: Option<&str>,
|
||||
tool_name: &str,
|
||||
stream_name: &str, // "stdout" or "stderr"
|
||||
) -> String {
|
||||
// If output is small enough or no session, return as-is
|
||||
if output.len() <= OUTPUT_TRUNCATE_THRESHOLD || session_id.is_none() {
|
||||
return output.to_string();
|
||||
}
|
||||
|
||||
let session_id = session_id.unwrap();
|
||||
let output_id = generate_short_id();
|
||||
let tools_dir = get_tools_output_dir(session_id);
|
||||
|
||||
// Create tools directory if needed
|
||||
if let Err(e) = fs::create_dir_all(&tools_dir) {
|
||||
debug!("Failed to create tools output dir: {}", e);
|
||||
return output.to_string();
|
||||
}
|
||||
|
||||
let filename = format!("{}_{}.txt", tool_name, output_id);
|
||||
let file_path = tools_dir.join(&filename);
|
||||
|
||||
// Save full output to file
|
||||
if let Err(e) = fs::write(&file_path, output) {
|
||||
debug!("Failed to save large output to file: {}", e);
|
||||
return output.to_string();
|
||||
}
|
||||
|
||||
// Truncate to first TRUNCATED_HEAD_SIZE chars (UTF-8 safe)
|
||||
let head: String = output.chars().take(TRUNCATED_HEAD_SIZE).collect();
|
||||
let total_chars = output.chars().count();
|
||||
|
||||
format!(
|
||||
"{}\n\n[[ {} TRUNCATED ({} total chars) ]]\nFull output saved to: {}\nUse read_file to see more.",
|
||||
head,
|
||||
stream_name.to_uppercase(),
|
||||
total_chars,
|
||||
file_path.display()
|
||||
)
|
||||
}
|
||||
|
||||
/// Execute the `shell` tool.
|
||||
pub async fn execute_shell<W: UiWriter>(tool_call: &ToolCall, ctx: &ToolContext<'_, W>) -> Result<String> {
|
||||
debug!("Processing shell tool call");
|
||||
@@ -55,20 +113,40 @@ pub async fn execute_shell<W: UiWriter>(tool_call: &ToolCall, ctx: &ToolContext<
|
||||
{
|
||||
Ok(result) => {
|
||||
if result.success {
|
||||
Ok(if result.stdout.is_empty() {
|
||||
"⚡️ ran successfully".to_string()
|
||||
if result.stdout.is_empty() {
|
||||
Ok("⚡️ ran successfully".to_string())
|
||||
} else {
|
||||
result.stdout.trim().to_string()
|
||||
})
|
||||
let stdout = result.stdout.trim();
|
||||
let truncated = truncate_large_output(
|
||||
stdout,
|
||||
ctx.session_id,
|
||||
"shell_stdout",
|
||||
"stdout",
|
||||
);
|
||||
Ok(truncated)
|
||||
}
|
||||
} else {
|
||||
// Build error message with available information
|
||||
let stderr = result.stderr.trim();
|
||||
let stdout = result.stdout.trim();
|
||||
|
||||
if !stderr.is_empty() {
|
||||
Ok(format!("❌ {}", stderr))
|
||||
let truncated = truncate_large_output(
|
||||
stderr,
|
||||
ctx.session_id,
|
||||
"shell_stderr",
|
||||
"stderr",
|
||||
);
|
||||
Ok(format!("❌ {}", truncated))
|
||||
} else if !stdout.is_empty() {
|
||||
// Sometimes error info is in stdout
|
||||
Ok(format!("❌ Exit code {}: {}", result.exit_code, stdout))
|
||||
let truncated = truncate_large_output(
|
||||
stdout,
|
||||
ctx.session_id,
|
||||
"shell_stdout",
|
||||
"stdout",
|
||||
);
|
||||
Ok(format!("❌ Exit code {}: {}", result.exit_code, truncated))
|
||||
} else {
|
||||
Ok(format!("❌ Command failed with exit code {}", result.exit_code))
|
||||
}
|
||||
@@ -127,3 +205,64 @@ pub async fn execute_background_process<W: UiWriter>(
|
||||
Err(e) => Ok(format!("❌ Failed to start background process: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_truncate_small_output() {
|
||||
let output = "small output";
|
||||
let result = truncate_large_output(output, Some("test-session"), "shell", "stdout");
|
||||
assert_eq!(result, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_no_session() {
|
||||
let output = "x".repeat(10000);
|
||||
let result = truncate_large_output(&output, None, "shell", "stdout");
|
||||
assert_eq!(result, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_large_output_format() {
|
||||
let large_output = "x".repeat(10000);
|
||||
|
||||
assert!(large_output.len() > OUTPUT_TRUNCATE_THRESHOLD);
|
||||
|
||||
// Test UTF-8 safe truncation
|
||||
let head: String = large_output.chars().take(TRUNCATED_HEAD_SIZE).collect();
|
||||
assert_eq!(head.len(), TRUNCATED_HEAD_SIZE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_utf8_safe() {
|
||||
// Test with multi-byte characters
|
||||
let emoji_output = "🎉".repeat(5000); // Each emoji is 4 bytes
|
||||
let head: String = emoji_output.chars().take(TRUNCATED_HEAD_SIZE).collect();
|
||||
|
||||
// Should have exactly TRUNCATED_HEAD_SIZE characters (emojis)
|
||||
assert_eq!(head.chars().count(), TRUNCATED_HEAD_SIZE);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_truncate_saves_to_file() {
|
||||
use tempfile::TempDir;
|
||||
use std::env;
|
||||
|
||||
// Create a temp directory and set it as the workspace
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let old_dir = env::current_dir().unwrap();
|
||||
env::set_current_dir(temp_dir.path()).unwrap();
|
||||
|
||||
let large_output = "y".repeat(10000);
|
||||
let result = truncate_large_output(&large_output, Some("test-sess"), "shell_stdout", "stdout");
|
||||
|
||||
// Should be truncated
|
||||
assert!(result.contains("[[ STDOUT TRUNCATED"));
|
||||
assert!(result.contains("Use read_file to see more."));
|
||||
assert!(result.starts_with(&"y".repeat(500)));
|
||||
|
||||
env::set_current_dir(old_dir).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
81
crates/g3-core/tests/read_file_utf8_test.rs
Normal file
81
crates/g3-core/tests/read_file_utf8_test.rs
Normal file
@@ -0,0 +1,81 @@
|
||||
//! Tests for UTF-8 safe file reading with seek optimization.
|
||||
|
||||
use std::fs;
|
||||
use std::io::Write;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Test that reading a file with multi-byte UTF-8 characters works correctly
|
||||
/// when the byte range falls in the middle of a character.
|
||||
#[test]
|
||||
fn test_read_file_range_utf8_boundary() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let file_path = temp_dir.path().join("utf8_test.txt");
|
||||
|
||||
// Create a file with emoji (4-byte UTF-8 chars)
|
||||
// "🎉" is 4 bytes: F0 9F 8E 89
|
||||
// "hello🎉world🎉test"
|
||||
// h=1, e=1, l=1, l=1, o=1, 🎉=4, w=1, o=1, r=1, l=1, d=1, 🎉=4, t=1, e=1, s=1, t=1
|
||||
// Byte positions: hello=0-4, 🎉=5-8, world=9-13, 🎉=14-17, test=18-21
|
||||
let content = "hello🎉world🎉test";
|
||||
fs::write(&file_path, content).unwrap();
|
||||
|
||||
// Verify the byte layout
|
||||
let bytes = fs::read(&file_path).unwrap();
|
||||
assert_eq!(bytes.len(), 22); // 5 + 4 + 5 + 4 + 4 = 22 bytes
|
||||
|
||||
// Read the whole file - should work
|
||||
let result = fs::read_to_string(&file_path).unwrap();
|
||||
assert_eq!(result, content);
|
||||
}
|
||||
|
||||
/// Test that we handle files with various UTF-8 characters
|
||||
#[test]
|
||||
fn test_utf8_various_chars() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let file_path = temp_dir.path().join("utf8_various.txt");
|
||||
|
||||
// Mix of 1-byte (ASCII), 2-byte (é), 3-byte (中), and 4-byte (🎉) chars
|
||||
let content = "café中文🎉done";
|
||||
fs::write(&file_path, content).unwrap();
|
||||
|
||||
let bytes = fs::read(&file_path).unwrap();
|
||||
// c=1, a=1, f=1, é=2, 中=3, 文=3, 🎉=4, d=1, o=1, n=1, e=1 = 19 bytes
|
||||
assert_eq!(bytes.len(), 19);
|
||||
|
||||
let result = fs::read_to_string(&file_path).unwrap();
|
||||
assert_eq!(result, content);
|
||||
}
|
||||
|
||||
/// Test reading from the middle of a file with UTF-8 content
|
||||
#[test]
|
||||
fn test_read_middle_of_utf8_file() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let file_path = temp_dir.path().join("utf8_middle.txt");
|
||||
|
||||
// Create a larger file with UTF-8 content
|
||||
let mut content = String::new();
|
||||
for i in 0..100 {
|
||||
content.push_str(&format!("line{}🎉\n", i));
|
||||
}
|
||||
fs::write(&file_path, &content).unwrap();
|
||||
|
||||
// Read from the middle - this exercises the seek + UTF-8 boundary logic
|
||||
let full = fs::read_to_string(&file_path).unwrap();
|
||||
assert!(full.contains("line50🎉"));
|
||||
}
|
||||
|
||||
/// Test that binary files don't cause panics
|
||||
#[test]
|
||||
fn test_binary_file_no_panic() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let file_path = temp_dir.path().join("binary.bin");
|
||||
|
||||
// Write some binary data with invalid UTF-8 sequences
|
||||
let mut file = fs::File::create(&file_path).unwrap();
|
||||
file.write_all(&[0xFF, 0xFE, 0x00, 0x01, 0x80, 0x81, 0x82]).unwrap();
|
||||
|
||||
// Reading as string should not panic (will use lossy conversion)
|
||||
// This tests the fallback path in read_file_range
|
||||
let result = fs::read(&file_path);
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
Reference in New Issue
Block a user