Fix Unicode space handling in macOS screenshot filenames
macOS uses U+202F (Narrow No-Break Space) in screenshot filenames between the time and am/pm. When users type or paste these paths, they use regular spaces, causing file-not-found errors. Changes: - Add resolve_path_with_unicode_fallback() to try U+202F variants - Add resolve_paths_in_shell_command() for shell command paths - Apply fix to read_file, read_image, and shell tools - Fix read_image prompt docs: file_path -> file_paths (array) - Add 6 unit tests for Unicode space normalization
This commit is contained in:
@@ -260,8 +260,8 @@ Short description for providers without native calling specs:
|
||||
- Example (partial): {\"tool\": \"read_file\", \"args\": {\"file_path\": \"large.log\", \"start\": 0, \"end\": 1000}
|
||||
|
||||
- **read_image**: Read an image file for visual analysis (PNG, JPEG, GIF, WebP)
|
||||
- Format: {\"tool\": \"read_image\", \"args\": {\"file_path\": \"path/to/image.png\"}}
|
||||
- Example: {\"tool\": \"read_image\", \"args\": {\"file_path\": \"sprites/fairy.png\"}}
|
||||
- Format: {\"tool\": \"read_image\", \"args\": {\"file_paths\": [\"path/to/image.png\"]}}
|
||||
- Example: {\"tool\": \"read_image\", \"args\": {\"file_paths\": [\"sprites/fairy.png\"]}}
|
||||
|
||||
- **write_file**: Write content to a file (creates or overwrites)
|
||||
- Format: {\"tool\": \"write_file\", \"args\": {\"file_path\": \"path/to/file\", \"content\": \"file content\"}
|
||||
|
||||
@@ -4,6 +4,7 @@ use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::utils::resolve_path_with_unicode_fallback;
|
||||
use crate::utils::apply_unified_diff_to_string;
|
||||
use crate::ToolCall;
|
||||
|
||||
@@ -23,7 +24,9 @@ pub async fn execute_read_file<W: UiWriter>(
|
||||
|
||||
// Expand tilde (~) to home directory
|
||||
let expanded_path = shellexpand::tilde(file_path);
|
||||
let path_str = expanded_path.as_ref();
|
||||
// Try to resolve with Unicode space fallback (macOS uses U+202F in screenshot names)
|
||||
let resolved_path = resolve_path_with_unicode_fallback(expanded_path.as_ref());
|
||||
let path_str = resolved_path.as_ref();
|
||||
|
||||
// Check if this is an image file
|
||||
let is_image = path_str.to_lowercase().ends_with(".png")
|
||||
@@ -166,7 +169,9 @@ pub async fn execute_read_image<W: UiWriter>(
|
||||
for path_str in &paths {
|
||||
// Expand tilde (~) to home directory
|
||||
let expanded_path = shellexpand::tilde(path_str);
|
||||
let path = std::path::Path::new(expanded_path.as_ref());
|
||||
// Try to resolve with Unicode space fallback (macOS uses U+202F in screenshot names)
|
||||
let resolved_path = resolve_path_with_unicode_fallback(expanded_path.as_ref());
|
||||
let path = std::path::Path::new(resolved_path.as_ref());
|
||||
|
||||
// Check file exists
|
||||
if !path.exists() {
|
||||
|
||||
@@ -4,6 +4,7 @@ use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
use crate::ui_writer::UiWriter;
|
||||
use crate::utils::resolve_paths_in_shell_command;
|
||||
use crate::utils::shell_escape_command;
|
||||
use crate::ToolCall;
|
||||
|
||||
@@ -22,7 +23,10 @@ pub async fn execute_shell<W: UiWriter>(tool_call: &ToolCall, ctx: &ToolContext<
|
||||
};
|
||||
|
||||
debug!("Command string: {}", command);
|
||||
let escaped_command = shell_escape_command(command);
|
||||
// First resolve any file paths with Unicode space fallback (macOS screenshot names)
|
||||
let resolved_command = resolve_paths_in_shell_command(command);
|
||||
debug!("Resolved command: {}", resolved_command);
|
||||
let escaped_command = shell_escape_command(&resolved_command);
|
||||
|
||||
let executor = g3_execution::CodeExecutor::new();
|
||||
|
||||
|
||||
@@ -8,6 +8,130 @@
|
||||
use anyhow::Result;
|
||||
use tracing::debug;
|
||||
|
||||
/// Normalize Unicode space characters in a file path to regular ASCII spaces.
|
||||
///
|
||||
/// macOS uses special Unicode space characters in certain filenames:
|
||||
/// - U+202F (Narrow No-Break Space) in screenshot filenames before "am"/"pm"
|
||||
/// - U+00A0 (No-Break Space) in some contexts
|
||||
///
|
||||
/// This function replaces these with regular ASCII spaces (0x20) so that
|
||||
/// file paths typed or copied by users will match the actual filenames.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `path` - The file path that may contain Unicode space characters
|
||||
///
|
||||
/// # Returns
|
||||
/// A new string with Unicode spaces normalized to ASCII spaces
|
||||
pub fn normalize_path_unicode_spaces(path: &str) -> String {
|
||||
path.chars()
|
||||
.map(|c| match c {
|
||||
'\u{202F}' => ' ', // Narrow No-Break Space
|
||||
'\u{00A0}' => ' ', // No-Break Space
|
||||
'\u{2007}' => ' ', // Figure Space
|
||||
'\u{2008}' => ' ', // Punctuation Space
|
||||
'\u{2009}' => ' ', // Thin Space
|
||||
'\u{200A}' => ' ', // Hair Space
|
||||
'\u{200B}' => ' ', // Zero Width Space (remove)
|
||||
'\u{FEFF}' => ' ', // Zero Width No-Break Space / BOM
|
||||
_ => c,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Try to resolve a file path, handling Unicode space normalization.
|
||||
///
|
||||
/// This function attempts to find a file in the following order:
|
||||
/// 1. Try the path as-is
|
||||
/// 2. If not found and path contains spaces, try with Unicode narrow no-break spaces
|
||||
/// (macOS uses U+202F in screenshot filenames)
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `path` - The file path to resolve
|
||||
///
|
||||
/// # Returns
|
||||
/// The resolved path that exists, or the original path if no match found
|
||||
pub fn resolve_path_with_unicode_fallback(path: &str) -> std::borrow::Cow<'_, str> {
|
||||
use std::borrow::Cow;
|
||||
use std::path::Path;
|
||||
|
||||
// First, try the path as-is
|
||||
if Path::new(path).exists() {
|
||||
return Cow::Borrowed(path);
|
||||
}
|
||||
|
||||
// If the path contains regular spaces, try replacing them with U+202F
|
||||
// (narrow no-break space) which macOS uses in screenshot filenames
|
||||
if path.contains(' ') {
|
||||
// Try with narrow no-break space before am/pm (common macOS pattern)
|
||||
let unicode_path = path
|
||||
.replace(" am.", "\u{202F}am.")
|
||||
.replace(" pm.", "\u{202F}pm.")
|
||||
.replace(" AM.", "\u{202F}AM.")
|
||||
.replace(" PM.", "\u{202F}PM.");
|
||||
|
||||
if unicode_path != path && Path::new(&unicode_path).exists() {
|
||||
return Cow::Owned(unicode_path);
|
||||
}
|
||||
}
|
||||
|
||||
// Return original path if no Unicode variant found
|
||||
Cow::Borrowed(path)
|
||||
}
|
||||
|
||||
/// Resolve file paths within a shell command, handling Unicode space normalization.
|
||||
///
|
||||
/// This function finds quoted file paths in a shell command and resolves them
|
||||
/// using Unicode space fallback (for macOS screenshot filenames with U+202F).
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `command` - The shell command that may contain file paths
|
||||
///
|
||||
/// # Returns
|
||||
/// The command with file paths resolved to their actual filesystem paths
|
||||
pub fn resolve_paths_in_shell_command(command: &str) -> String {
|
||||
use std::path::Path;
|
||||
|
||||
let mut result = command.to_string();
|
||||
|
||||
// Find all double-quoted strings that look like file paths
|
||||
let mut i = 0;
|
||||
let chars: Vec<char> = command.chars().collect();
|
||||
|
||||
while i < chars.len() {
|
||||
if chars[i] == '"' {
|
||||
// Found start of quoted string
|
||||
let start = i;
|
||||
i += 1;
|
||||
while i < chars.len() && chars[i] != '"' {
|
||||
if chars[i] == '\\' && i + 1 < chars.len() {
|
||||
i += 2; // Skip escaped character
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
if i < chars.len() {
|
||||
// Extract the quoted content (without quotes)
|
||||
let quoted_content: String = chars[start + 1..i].iter().collect();
|
||||
|
||||
// Check if it looks like a file path and doesn't exist
|
||||
if (quoted_content.starts_with('/') || quoted_content.starts_with('~'))
|
||||
&& !Path::new("ed_content).exists()
|
||||
{
|
||||
let resolved = resolve_path_with_unicode_fallback("ed_content);
|
||||
if resolved.as_ref() != quoted_content {
|
||||
let old_quoted: String = chars[start..=i].iter().collect();
|
||||
let new_quoted = format!("\"{}\"", resolved);
|
||||
result = result.replace(&old_quoted, &new_quoted);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Apply unified diff to an input string with optional [start, end) bounds.
|
||||
///
|
||||
/// # Arguments
|
||||
@@ -437,4 +561,47 @@ mod tests {
|
||||
let result = fix_mixed_quotes_in_json(input);
|
||||
assert_eq!(result, "{\"key\": \"value\"}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_path_unicode_spaces_converts_narrow_no_break_space() {
|
||||
// U+202F is Narrow No-Break Space (used by macOS in screenshot filenames)
|
||||
let path_with_unicode = "/Users/test/Screenshot 2025-01-03 at 4.41.27\u{202F}pm.png";
|
||||
let normalized = normalize_path_unicode_spaces(path_with_unicode);
|
||||
assert_eq!(normalized, "/Users/test/Screenshot 2025-01-03 at 4.41.27 pm.png");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_path_unicode_spaces_converts_no_break_space() {
|
||||
// U+00A0 is No-Break Space
|
||||
let path_with_unicode = "/Users/test/file\u{00A0}name.txt";
|
||||
let normalized = normalize_path_unicode_spaces(path_with_unicode);
|
||||
assert_eq!(normalized, "/Users/test/file name.txt");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_path_unicode_spaces_preserves_regular_spaces() {
|
||||
let path = "/Users/test/file with spaces.txt";
|
||||
let normalized = normalize_path_unicode_spaces(path);
|
||||
assert_eq!(normalized, path);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_path_unicode_spaces_handles_multiple_unicode_spaces() {
|
||||
// Multiple different Unicode space types
|
||||
let path = "/Users/test/a\u{202F}b\u{00A0}c\u{2009}d.txt";
|
||||
let normalized = normalize_path_unicode_spaces(path);
|
||||
assert_eq!(normalized, "/Users/test/a b c d.txt");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_paths_in_shell_command_preserves_commands_without_paths() {
|
||||
let cmd = "echo hello world";
|
||||
assert_eq!(resolve_paths_in_shell_command(cmd), cmd);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_paths_in_shell_command_preserves_existing_paths() {
|
||||
let cmd = "cat \"/etc/hosts\"";
|
||||
assert_eq!(resolve_paths_in_shell_command(cmd), cmd);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user