From 29e263ac49704715f7f2846c7b927d666bf4b012 Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Sat, 3 Jan 2026 17:17:08 +1100 Subject: [PATCH] Fix Unicode space handling in macOS screenshot filenames macOS uses U+202F (Narrow No-Break Space) in screenshot filenames between the time and am/pm. When users type or paste these paths, they use regular spaces, causing file-not-found errors. Changes: - Add resolve_path_with_unicode_fallback() to try U+202F variants - Add resolve_paths_in_shell_command() for shell command paths - Apply fix to read_file, read_image, and shell tools - Fix read_image prompt docs: file_path -> file_paths (array) - Add 6 unit tests for Unicode space normalization --- crates/g3-core/src/prompts.rs | 4 +- crates/g3-core/src/tools/file_ops.rs | 9 +- crates/g3-core/src/tools/shell.rs | 6 +- crates/g3-core/src/utils.rs | 167 +++++++++++++++++++++++++++ 4 files changed, 181 insertions(+), 5 deletions(-) diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs index 4736049..7e93e66 100644 --- a/crates/g3-core/src/prompts.rs +++ b/crates/g3-core/src/prompts.rs @@ -260,8 +260,8 @@ Short description for providers without native calling specs: - Example (partial): {\"tool\": \"read_file\", \"args\": {\"file_path\": \"large.log\", \"start\": 0, \"end\": 1000} - **read_image**: Read an image file for visual analysis (PNG, JPEG, GIF, WebP) - - Format: {\"tool\": \"read_image\", \"args\": {\"file_path\": \"path/to/image.png\"}} - - Example: {\"tool\": \"read_image\", \"args\": {\"file_path\": \"sprites/fairy.png\"}} + - Format: {\"tool\": \"read_image\", \"args\": {\"file_paths\": [\"path/to/image.png\"]}} + - Example: {\"tool\": \"read_image\", \"args\": {\"file_paths\": [\"sprites/fairy.png\"]}} - **write_file**: Write content to a file (creates or overwrites) - Format: {\"tool\": \"write_file\", \"args\": {\"file_path\": \"path/to/file\", \"content\": \"file content\"} diff --git a/crates/g3-core/src/tools/file_ops.rs b/crates/g3-core/src/tools/file_ops.rs index 328d01f..711f030 100644 --- a/crates/g3-core/src/tools/file_ops.rs +++ b/crates/g3-core/src/tools/file_ops.rs @@ -4,6 +4,7 @@ use anyhow::Result; use tracing::debug; use crate::ui_writer::UiWriter; +use crate::utils::resolve_path_with_unicode_fallback; use crate::utils::apply_unified_diff_to_string; use crate::ToolCall; @@ -23,7 +24,9 @@ pub async fn execute_read_file( // Expand tilde (~) to home directory let expanded_path = shellexpand::tilde(file_path); - let path_str = expanded_path.as_ref(); + // Try to resolve with Unicode space fallback (macOS uses U+202F in screenshot names) + let resolved_path = resolve_path_with_unicode_fallback(expanded_path.as_ref()); + let path_str = resolved_path.as_ref(); // Check if this is an image file let is_image = path_str.to_lowercase().ends_with(".png") @@ -166,7 +169,9 @@ pub async fn execute_read_image( for path_str in &paths { // Expand tilde (~) to home directory let expanded_path = shellexpand::tilde(path_str); - let path = std::path::Path::new(expanded_path.as_ref()); + // Try to resolve with Unicode space fallback (macOS uses U+202F in screenshot names) + let resolved_path = resolve_path_with_unicode_fallback(expanded_path.as_ref()); + let path = std::path::Path::new(resolved_path.as_ref()); // Check file exists if !path.exists() { diff --git a/crates/g3-core/src/tools/shell.rs b/crates/g3-core/src/tools/shell.rs index 5ec93c6..e0d06de 100644 --- a/crates/g3-core/src/tools/shell.rs +++ b/crates/g3-core/src/tools/shell.rs @@ -4,6 +4,7 @@ use anyhow::Result; use tracing::debug; use crate::ui_writer::UiWriter; +use crate::utils::resolve_paths_in_shell_command; use crate::utils::shell_escape_command; use crate::ToolCall; @@ -22,7 +23,10 @@ pub async fn execute_shell(tool_call: &ToolCall, ctx: &ToolContext< }; debug!("Command string: {}", command); - let escaped_command = shell_escape_command(command); + // First resolve any file paths with Unicode space fallback (macOS screenshot names) + let resolved_command = resolve_paths_in_shell_command(command); + debug!("Resolved command: {}", resolved_command); + let escaped_command = shell_escape_command(&resolved_command); let executor = g3_execution::CodeExecutor::new(); diff --git a/crates/g3-core/src/utils.rs b/crates/g3-core/src/utils.rs index 547e871..1255d49 100644 --- a/crates/g3-core/src/utils.rs +++ b/crates/g3-core/src/utils.rs @@ -8,6 +8,130 @@ use anyhow::Result; use tracing::debug; +/// Normalize Unicode space characters in a file path to regular ASCII spaces. +/// +/// macOS uses special Unicode space characters in certain filenames: +/// - U+202F (Narrow No-Break Space) in screenshot filenames before "am"/"pm" +/// - U+00A0 (No-Break Space) in some contexts +/// +/// This function replaces these with regular ASCII spaces (0x20) so that +/// file paths typed or copied by users will match the actual filenames. +/// +/// # Arguments +/// * `path` - The file path that may contain Unicode space characters +/// +/// # Returns +/// A new string with Unicode spaces normalized to ASCII spaces +pub fn normalize_path_unicode_spaces(path: &str) -> String { + path.chars() + .map(|c| match c { + '\u{202F}' => ' ', // Narrow No-Break Space + '\u{00A0}' => ' ', // No-Break Space + '\u{2007}' => ' ', // Figure Space + '\u{2008}' => ' ', // Punctuation Space + '\u{2009}' => ' ', // Thin Space + '\u{200A}' => ' ', // Hair Space + '\u{200B}' => ' ', // Zero Width Space (remove) + '\u{FEFF}' => ' ', // Zero Width No-Break Space / BOM + _ => c, + }) + .collect() +} + +/// Try to resolve a file path, handling Unicode space normalization. +/// +/// This function attempts to find a file in the following order: +/// 1. Try the path as-is +/// 2. If not found and path contains spaces, try with Unicode narrow no-break spaces +/// (macOS uses U+202F in screenshot filenames) +/// +/// # Arguments +/// * `path` - The file path to resolve +/// +/// # Returns +/// The resolved path that exists, or the original path if no match found +pub fn resolve_path_with_unicode_fallback(path: &str) -> std::borrow::Cow<'_, str> { + use std::borrow::Cow; + use std::path::Path; + + // First, try the path as-is + if Path::new(path).exists() { + return Cow::Borrowed(path); + } + + // If the path contains regular spaces, try replacing them with U+202F + // (narrow no-break space) which macOS uses in screenshot filenames + if path.contains(' ') { + // Try with narrow no-break space before am/pm (common macOS pattern) + let unicode_path = path + .replace(" am.", "\u{202F}am.") + .replace(" pm.", "\u{202F}pm.") + .replace(" AM.", "\u{202F}AM.") + .replace(" PM.", "\u{202F}PM."); + + if unicode_path != path && Path::new(&unicode_path).exists() { + return Cow::Owned(unicode_path); + } + } + + // Return original path if no Unicode variant found + Cow::Borrowed(path) +} + +/// Resolve file paths within a shell command, handling Unicode space normalization. +/// +/// This function finds quoted file paths in a shell command and resolves them +/// using Unicode space fallback (for macOS screenshot filenames with U+202F). +/// +/// # Arguments +/// * `command` - The shell command that may contain file paths +/// +/// # Returns +/// The command with file paths resolved to their actual filesystem paths +pub fn resolve_paths_in_shell_command(command: &str) -> String { + use std::path::Path; + + let mut result = command.to_string(); + + // Find all double-quoted strings that look like file paths + let mut i = 0; + let chars: Vec = command.chars().collect(); + + while i < chars.len() { + if chars[i] == '"' { + // Found start of quoted string + let start = i; + i += 1; + while i < chars.len() && chars[i] != '"' { + if chars[i] == '\\' && i + 1 < chars.len() { + i += 2; // Skip escaped character + } else { + i += 1; + } + } + if i < chars.len() { + // Extract the quoted content (without quotes) + let quoted_content: String = chars[start + 1..i].iter().collect(); + + // Check if it looks like a file path and doesn't exist + if (quoted_content.starts_with('/') || quoted_content.starts_with('~')) + && !Path::new("ed_content).exists() + { + let resolved = resolve_path_with_unicode_fallback("ed_content); + if resolved.as_ref() != quoted_content { + let old_quoted: String = chars[start..=i].iter().collect(); + let new_quoted = format!("\"{}\"", resolved); + result = result.replace(&old_quoted, &new_quoted); + } + } + } + } + i += 1; + } + + result +} + /// Apply unified diff to an input string with optional [start, end) bounds. /// /// # Arguments @@ -437,4 +561,47 @@ mod tests { let result = fix_mixed_quotes_in_json(input); assert_eq!(result, "{\"key\": \"value\"}"); } + + #[test] + fn normalize_path_unicode_spaces_converts_narrow_no_break_space() { + // U+202F is Narrow No-Break Space (used by macOS in screenshot filenames) + let path_with_unicode = "/Users/test/Screenshot 2025-01-03 at 4.41.27\u{202F}pm.png"; + let normalized = normalize_path_unicode_spaces(path_with_unicode); + assert_eq!(normalized, "/Users/test/Screenshot 2025-01-03 at 4.41.27 pm.png"); + } + + #[test] + fn normalize_path_unicode_spaces_converts_no_break_space() { + // U+00A0 is No-Break Space + let path_with_unicode = "/Users/test/file\u{00A0}name.txt"; + let normalized = normalize_path_unicode_spaces(path_with_unicode); + assert_eq!(normalized, "/Users/test/file name.txt"); + } + + #[test] + fn normalize_path_unicode_spaces_preserves_regular_spaces() { + let path = "/Users/test/file with spaces.txt"; + let normalized = normalize_path_unicode_spaces(path); + assert_eq!(normalized, path); + } + + #[test] + fn normalize_path_unicode_spaces_handles_multiple_unicode_spaces() { + // Multiple different Unicode space types + let path = "/Users/test/a\u{202F}b\u{00A0}c\u{2009}d.txt"; + let normalized = normalize_path_unicode_spaces(path); + assert_eq!(normalized, "/Users/test/a b c d.txt"); + } + + #[test] + fn resolve_paths_in_shell_command_preserves_commands_without_paths() { + let cmd = "echo hello world"; + assert_eq!(resolve_paths_in_shell_command(cmd), cmd); + } + + #[test] + fn resolve_paths_in_shell_command_preserves_existing_paths() { + let cmd = "cat \"/etc/hosts\""; + assert_eq!(resolve_paths_in_shell_command(cmd), cmd); + } }