Fix Unicode space handling in macOS screenshot filenames

macOS uses U+202F (Narrow No-Break Space) in screenshot filenames
between the time and am/pm. When users type or paste these paths,
they use regular spaces, causing file-not-found errors.

Changes:
- Add resolve_path_with_unicode_fallback() to try U+202F variants
- Add resolve_paths_in_shell_command() for shell command paths
- Apply fix to read_file, read_image, and shell tools
- Fix read_image prompt docs: file_path -> file_paths (array)
- Add 6 unit tests for Unicode space normalization
This commit is contained in:
Dhanji R. Prasanna
2026-01-03 17:17:08 +11:00
parent f7e2f38fe9
commit 29e263ac49
4 changed files with 181 additions and 5 deletions

View File

@@ -260,8 +260,8 @@ Short description for providers without native calling specs:
- Example (partial): {\"tool\": \"read_file\", \"args\": {\"file_path\": \"large.log\", \"start\": 0, \"end\": 1000}
- **read_image**: Read an image file for visual analysis (PNG, JPEG, GIF, WebP)
- Format: {\"tool\": \"read_image\", \"args\": {\"file_path\": \"path/to/image.png\"}}
- Example: {\"tool\": \"read_image\", \"args\": {\"file_path\": \"sprites/fairy.png\"}}
- Format: {\"tool\": \"read_image\", \"args\": {\"file_paths\": [\"path/to/image.png\"]}}
- Example: {\"tool\": \"read_image\", \"args\": {\"file_paths\": [\"sprites/fairy.png\"]}}
- **write_file**: Write content to a file (creates or overwrites)
- Format: {\"tool\": \"write_file\", \"args\": {\"file_path\": \"path/to/file\", \"content\": \"file content\"}

View File

@@ -4,6 +4,7 @@ use anyhow::Result;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::utils::resolve_path_with_unicode_fallback;
use crate::utils::apply_unified_diff_to_string;
use crate::ToolCall;
@@ -23,7 +24,9 @@ pub async fn execute_read_file<W: UiWriter>(
// Expand tilde (~) to home directory
let expanded_path = shellexpand::tilde(file_path);
let path_str = expanded_path.as_ref();
// Try to resolve with Unicode space fallback (macOS uses U+202F in screenshot names)
let resolved_path = resolve_path_with_unicode_fallback(expanded_path.as_ref());
let path_str = resolved_path.as_ref();
// Check if this is an image file
let is_image = path_str.to_lowercase().ends_with(".png")
@@ -166,7 +169,9 @@ pub async fn execute_read_image<W: UiWriter>(
for path_str in &paths {
// Expand tilde (~) to home directory
let expanded_path = shellexpand::tilde(path_str);
let path = std::path::Path::new(expanded_path.as_ref());
// Try to resolve with Unicode space fallback (macOS uses U+202F in screenshot names)
let resolved_path = resolve_path_with_unicode_fallback(expanded_path.as_ref());
let path = std::path::Path::new(resolved_path.as_ref());
// Check file exists
if !path.exists() {

View File

@@ -4,6 +4,7 @@ use anyhow::Result;
use tracing::debug;
use crate::ui_writer::UiWriter;
use crate::utils::resolve_paths_in_shell_command;
use crate::utils::shell_escape_command;
use crate::ToolCall;
@@ -22,7 +23,10 @@ pub async fn execute_shell<W: UiWriter>(tool_call: &ToolCall, ctx: &ToolContext<
};
debug!("Command string: {}", command);
let escaped_command = shell_escape_command(command);
// First resolve any file paths with Unicode space fallback (macOS screenshot names)
let resolved_command = resolve_paths_in_shell_command(command);
debug!("Resolved command: {}", resolved_command);
let escaped_command = shell_escape_command(&resolved_command);
let executor = g3_execution::CodeExecutor::new();

View File

@@ -8,6 +8,130 @@
use anyhow::Result;
use tracing::debug;
/// Normalize Unicode space characters in a file path to regular ASCII spaces.
///
/// macOS uses special Unicode space characters in certain filenames:
/// - U+202F (Narrow No-Break Space) in screenshot filenames before "am"/"pm"
/// - U+00A0 (No-Break Space) in some contexts
///
/// This function replaces these with regular ASCII spaces (0x20) so that
/// file paths typed or copied by users will match the actual filenames.
///
/// # Arguments
/// * `path` - The file path that may contain Unicode space characters
///
/// # Returns
/// A new string with Unicode spaces normalized to ASCII spaces
pub fn normalize_path_unicode_spaces(path: &str) -> String {
path.chars()
.map(|c| match c {
'\u{202F}' => ' ', // Narrow No-Break Space
'\u{00A0}' => ' ', // No-Break Space
'\u{2007}' => ' ', // Figure Space
'\u{2008}' => ' ', // Punctuation Space
'\u{2009}' => ' ', // Thin Space
'\u{200A}' => ' ', // Hair Space
'\u{200B}' => ' ', // Zero Width Space (remove)
'\u{FEFF}' => ' ', // Zero Width No-Break Space / BOM
_ => c,
})
.collect()
}
/// Try to resolve a file path, handling Unicode space normalization.
///
/// This function attempts to find a file in the following order:
/// 1. Try the path as-is
/// 2. If not found and path contains spaces, try with Unicode narrow no-break spaces
/// (macOS uses U+202F in screenshot filenames)
///
/// # Arguments
/// * `path` - The file path to resolve
///
/// # Returns
/// The resolved path that exists, or the original path if no match found
pub fn resolve_path_with_unicode_fallback(path: &str) -> std::borrow::Cow<'_, str> {
use std::borrow::Cow;
use std::path::Path;
// First, try the path as-is
if Path::new(path).exists() {
return Cow::Borrowed(path);
}
// If the path contains regular spaces, try replacing them with U+202F
// (narrow no-break space) which macOS uses in screenshot filenames
if path.contains(' ') {
// Try with narrow no-break space before am/pm (common macOS pattern)
let unicode_path = path
.replace(" am.", "\u{202F}am.")
.replace(" pm.", "\u{202F}pm.")
.replace(" AM.", "\u{202F}AM.")
.replace(" PM.", "\u{202F}PM.");
if unicode_path != path && Path::new(&unicode_path).exists() {
return Cow::Owned(unicode_path);
}
}
// Return original path if no Unicode variant found
Cow::Borrowed(path)
}
/// Resolve file paths within a shell command, handling Unicode space normalization.
///
/// This function finds quoted file paths in a shell command and resolves them
/// using Unicode space fallback (for macOS screenshot filenames with U+202F).
///
/// # Arguments
/// * `command` - The shell command that may contain file paths
///
/// # Returns
/// The command with file paths resolved to their actual filesystem paths
pub fn resolve_paths_in_shell_command(command: &str) -> String {
use std::path::Path;
let mut result = command.to_string();
// Find all double-quoted strings that look like file paths
let mut i = 0;
let chars: Vec<char> = command.chars().collect();
while i < chars.len() {
if chars[i] == '"' {
// Found start of quoted string
let start = i;
i += 1;
while i < chars.len() && chars[i] != '"' {
if chars[i] == '\\' && i + 1 < chars.len() {
i += 2; // Skip escaped character
} else {
i += 1;
}
}
if i < chars.len() {
// Extract the quoted content (without quotes)
let quoted_content: String = chars[start + 1..i].iter().collect();
// Check if it looks like a file path and doesn't exist
if (quoted_content.starts_with('/') || quoted_content.starts_with('~'))
&& !Path::new(&quoted_content).exists()
{
let resolved = resolve_path_with_unicode_fallback(&quoted_content);
if resolved.as_ref() != quoted_content {
let old_quoted: String = chars[start..=i].iter().collect();
let new_quoted = format!("\"{}\"", resolved);
result = result.replace(&old_quoted, &new_quoted);
}
}
}
}
i += 1;
}
result
}
/// Apply unified diff to an input string with optional [start, end) bounds.
///
/// # Arguments
@@ -437,4 +561,47 @@ mod tests {
let result = fix_mixed_quotes_in_json(input);
assert_eq!(result, "{\"key\": \"value\"}");
}
#[test]
fn normalize_path_unicode_spaces_converts_narrow_no_break_space() {
// U+202F is Narrow No-Break Space (used by macOS in screenshot filenames)
let path_with_unicode = "/Users/test/Screenshot 2025-01-03 at 4.41.27\u{202F}pm.png";
let normalized = normalize_path_unicode_spaces(path_with_unicode);
assert_eq!(normalized, "/Users/test/Screenshot 2025-01-03 at 4.41.27 pm.png");
}
#[test]
fn normalize_path_unicode_spaces_converts_no_break_space() {
// U+00A0 is No-Break Space
let path_with_unicode = "/Users/test/file\u{00A0}name.txt";
let normalized = normalize_path_unicode_spaces(path_with_unicode);
assert_eq!(normalized, "/Users/test/file name.txt");
}
#[test]
fn normalize_path_unicode_spaces_preserves_regular_spaces() {
let path = "/Users/test/file with spaces.txt";
let normalized = normalize_path_unicode_spaces(path);
assert_eq!(normalized, path);
}
#[test]
fn normalize_path_unicode_spaces_handles_multiple_unicode_spaces() {
// Multiple different Unicode space types
let path = "/Users/test/a\u{202F}b\u{00A0}c\u{2009}d.txt";
let normalized = normalize_path_unicode_spaces(path);
assert_eq!(normalized, "/Users/test/a b c d.txt");
}
#[test]
fn resolve_paths_in_shell_command_preserves_commands_without_paths() {
let cmd = "echo hello world";
assert_eq!(resolve_paths_in_shell_command(cmd), cmd);
}
#[test]
fn resolve_paths_in_shell_command_preserves_existing_paths() {
let cmd = "cat \"/etc/hosts\"";
assert_eq!(resolve_paths_in_shell_command(cmd), cmd);
}
}