From 92318ff51ce589a2308932170f37e29fe40110e2 Mon Sep 17 00:00:00 2001 From: Dhanji Prasanna Date: Tue, 30 Sep 2025 22:24:54 +1000 Subject: [PATCH] str_replace fixes --- crates/g3-core/src/lib.rs | 352 +++++++++++++++++++++++--------------- 1 file changed, 210 insertions(+), 142 deletions(-) diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index c36fe56..0bdc57e 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -915,7 +915,7 @@ The tool will execute immediately and you'll receive the result (success or erro // }, Tool { name: "str_replace".to_string(), - description: "Replace text in a file using a unified diff. The diff specifies exact text to find and replace. Character ranges are 0-indexed and end is EXCLUSIVE (like Python slicing). For example, text[0:5] gets characters 0,1,2,3,4 (not 5).".to_string(), + description: "Apply a unified diff to a file. Supports multiple hunks and context lines. Optionally constrain the search to a [start, end) character range (0-indexed; end is EXCLUSIVE). Useful to disambiguate matches or limit scope in large files.".to_string(), input_schema: json!({ "type": "object", "properties": { @@ -925,7 +925,7 @@ The tool will execute immediately and you'll receive the result (success or erro }, "diff": { "type": "string", - "description": "A unified diff showing what to replace. Use --- for old content and +++ for new content" + "description": "A unified diff showing what to replace. Supports @@ hunk headers, context lines, and multiple hunks (---/+++ headers optional for minimal diffs)." }, "start": { "type": "integer", @@ -1784,10 +1784,7 @@ The tool will execute immediately and you'll receive the result (success or erro .and_then(|v| v.as_u64()) .map(|n| n as usize); - debug!( - "str_replace: path={}, start={:?}, end={:?}", - file_path, start_char, end_char - ); + debug!("str_replace: path={}, start={:?}, end={:?}", file_path, start_char, end_char); // Read the existing file let file_content = match std::fs::read_to_string(file_path) { @@ -1795,99 +1792,16 @@ The tool will execute immediately and you'll receive the result (success or erro Err(e) => return Ok(format!("❌ Failed to read file '{}': {}", file_path, e)), }; - // Parse the diff to extract old and new content - let (old_content, new_content) = match parse_unified_diff(diff) { - Some((old, new)) => (old, new), - None => return Ok("❌ Invalid diff format. Expected unified diff with --- (old) and +++ (new) sections".to_string()), + // Apply unified diff to content + let result = match apply_unified_diff_to_string(&file_content, diff, start_char, end_char) { + Ok(r) => r, + Err(e) => return Ok(format!("❌ {}", e)), }; - debug!( - "Parsed diff: old_len={}, new_len={}", - old_content.len(), - new_content.len() - ); - - // Determine the search range - let search_start = start_char.unwrap_or(0); - let search_end = end_char.unwrap_or(file_content.len()); - - // Validate the range - if search_start > file_content.len() { - return Ok(format!( - "❌ start position {} exceeds file length {}", - search_start, - file_content.len() - )); - } - if search_end > file_content.len() { - return Ok(format!( - "❌ end position {} exceeds file length {}", - search_end, - file_content.len() - )); - } - if search_start > search_end { - return Ok(format!( - "❌ start position {} is greater than end position {}", - search_start, search_end - )); - } - - // Extract the search region - let search_region = &file_content[search_start..search_end]; - - // Find the old content within the search region - let position_in_region = match search_region.find(&old_content) { - Some(pos) => pos, - None => { - // Provide helpful context about what wasn't found - let preview_len = 100.min(old_content.len()); - let old_preview = if old_content.len() > preview_len { - format!("{}...", &old_content[..preview_len]) - } else { - old_content.clone() - }; - - return Ok(format!( - "❌ Pattern not found in file{}\nSearched for: {}", - if start_char.is_some() || end_char.is_some() { - format!(" (within character range {}:{})", search_start, search_end) - } else { - String::new() - }, - old_preview - )); - } - }; - - // Calculate the absolute position in the file - let absolute_position = search_start + position_in_region; - let replace_end = absolute_position + old_content.len(); - - debug!( - "Found pattern at position {} (absolute), replacing until {}", - absolute_position, replace_end - ); - - // Perform the replacement - let mut result = String::with_capacity(file_content.len()); - result.push_str(&file_content[..absolute_position]); - result.push_str(&new_content); - result.push_str(&file_content[replace_end..]); - // Write the result back to the file match std::fs::write(file_path, &result) { Ok(()) => { - // Count lines for reporting - let old_lines = old_content.lines().count(); - let new_lines = new_content.lines().count(); - let chars_replaced = old_content.len(); - let chars_added = new_content.len(); - - Ok(format!( - "✅ Successfully replaced text in '{}'\n Replaced {} characters ({} lines) at position {} with {} characters ({} lines)", - file_path, chars_replaced, old_lines, absolute_position, chars_added, new_lines - )) + Ok(format!("✅ Successfully applied unified diff to '{}'", file_path)) } Err(e) => Ok(format!("❌ Failed to write to file '{}': {}", file_path, e)), } @@ -1978,62 +1892,167 @@ fn filter_json_tool_calls(content: &str) -> String { } } -// Helper function to parse a unified diff into old and new content -fn parse_unified_diff(diff: &str) -> Option<(String, String)> { - // Simple approach: look for lines starting with - (old) and + (new) - // This is a simplified parser that handles basic diffs - let mut old_lines = Vec::new(); - let mut new_lines = Vec::new(); - let mut found_old = false; - let mut found_new = false; +// Apply unified diff to an input string with optional [start, end) bounds +pub fn apply_unified_diff_to_string( + file_content: &str, + diff: &str, + start_char: Option, + end_char: Option, +) -> Result { + // Parse full unified diff into hunks and apply sequentially. + let hunks = parse_unified_diff_hunks(diff); + if hunks.is_empty() { + anyhow::bail!( + "Invalid diff format. Expected unified diff with @@ hunks or +/- with context lines" + ); + } - for line in diff.lines() { - if line.starts_with("---") || line.starts_with("+++") || line.starts_with("@@") { - // Skip diff headers + // Normalize line endings to avoid CRLF/CR mismatches + let content_norm = file_content + .replace("\r\n", "\n") + .replace('\r', "\n"); + + // Determine and validate the search range + let search_start = start_char.unwrap_or(0); + let search_end = end_char.unwrap_or(content_norm.len()); + + if search_start > content_norm.len() { + anyhow::bail!( + "start position {} exceeds file length {}", + search_start, + content_norm.len() + ); + } + if search_end > content_norm.len() { + anyhow::bail!( + "end position {} exceeds file length {}", + search_end, + content_norm.len() + ); + } + if search_start > search_end { + anyhow::bail!( + "start position {} is greater than end position {}", + search_start, search_end + ); + } + + // Extract the region we're going to modify (whole file if no bounds provided) + let mut region_content = content_norm[search_start..search_end].to_string(); + + // Apply hunks in order + for (idx, (old_block, new_block)) in hunks.iter().enumerate() { + debug!( + "Applying hunk {}: old_len={}, new_len={}", + idx + 1, + old_block.len(), + new_block.len() + ); + + if let Some(pos) = region_content.find(old_block) { + let endpos = pos + old_block.len(); + region_content.replace_range(pos..endpos, new_block); + } else { + // Not found; provide helpful diagnostics with a short preview + let preview_len = old_block.len().min(200); + let mut old_preview = old_block[..preview_len].to_string(); + if old_block.len() > preview_len { + old_preview.push_str("..."); + } + + let range_note = if start_char.is_some() || end_char.is_some() { + format!(" (within character range {}:{})", search_start, search_end) + } else { + String::new() + }; + + anyhow::bail!( + "Pattern not found in file{}\nHunk {} failed. Searched for:\n{}", + range_note, + idx + 1, + old_preview + ); + } + } + + // Reconstruct the full content with the modified region + let mut result = String::with_capacity(content_norm.len() + region_content.len()); + result.push_str(&content_norm[..search_start]); + result.push_str(®ion_content); + result.push_str(&content_norm[search_end..]); + Ok(result) +} + +// Parse a unified diff into a list of hunks as (old_block, new_block) +// Each hunk contains the exact text to search for and the replacement text including context lines. +fn parse_unified_diff_hunks(diff: &str) -> Vec<(String, String)> { + let mut hunks: Vec<(String, String)> = Vec::new(); + + let mut old_lines: Vec = Vec::new(); + let mut new_lines: Vec = Vec::new(); + let mut in_hunk = false; + + for raw_line in diff.lines() { + let line = raw_line; + + // Skip common diff headers + if line.starts_with("diff ") + || line.starts_with("index ") + || line.starts_with("new file mode") + || line.starts_with("deleted file mode") + { continue; - } else if line.starts_with("-") && !line.starts_with("---") { - // Old content line - old_lines.push(&line[1..]); // Remove the leading - - found_old = true; - } else if line.starts_with("+") && !line.starts_with("+++") { - // New content line - new_lines.push(&line[1..]); // Remove the leading + - found_new = true; - } else if line.starts_with(" ") { - // Context line (unchanged) - add to both old and new - let _content = &line[1..]; - // Only add context lines if we're building a diff - if found_old || found_new { - // Context lines should be added to the side we're currently building - // This is a simplified approach + } + + if line.starts_with("--- ") || line.starts_with("+++ ") { + // File header lines — ignore + continue; + } + + if line.starts_with("@@") { + // Starting a new hunk — flush previous if present + if in_hunk && (!old_lines.is_empty() || !new_lines.is_empty()) { + hunks.push((old_lines.join("\n"), new_lines.join("\n"))); + old_lines.clear(); + new_lines.clear(); + } + in_hunk = true; + continue; + } + + if !in_hunk { + // Some minimal diffs may omit @@; start collecting once we see diff markers + if line.starts_with(' ') + || (line.starts_with('-') && !line.starts_with("---")) + || (line.starts_with('+') && !line.starts_with("+++")) + { + in_hunk = true; + } else { + continue; } } - } - // If we didn't find explicit diff markers, try a simpler format: - // Just look for "old content" followed by "new content" separated by some delimiter - if !found_old && !found_new { - // Alternative: split on common separators - if let Some(separator_pos) = diff - .find("\n===\n") - .or_else(|| diff.find("\n---\n")) - .or_else(|| diff.find("\n\n")) - { - let old_content = diff[..separator_pos].trim(); - let new_content = diff[separator_pos..] - .trim() - .trim_start_matches("===") - .trim_start_matches("---") - .trim(); - return Some((old_content.to_string(), new_content.to_string())); + if line.starts_with(' ') { + let content = &line[1..]; + old_lines.push(content.to_string()); + new_lines.push(content.to_string()); + } else if line.starts_with('+') && !line.starts_with("+++") { + new_lines.push(line[1..].to_string()); + } else if line.starts_with('-') && !line.starts_with("---") { + old_lines.push(line[1..].to_string()); + } else if line.starts_with('\\') { + // Example: "\\ No newline at end of file" — ignore + continue; + } else { + // Unknown line type — ignore } - // If no separator found, treat entire diff as old content to be replaced with empty - return Some((diff.trim().to_string(), String::new())); } - let old_content = old_lines.join("\n"); - let new_content = new_lines.join("\n"); - Some((old_content, new_content)) + if in_hunk && (!old_lines.is_empty() || !new_lines.is_empty()) { + hunks.push((old_lines.join("\n"), new_lines.join("\n"))); + } + + hunks } // Helper function to properly escape shell commands @@ -2235,3 +2254,52 @@ fn fix_mixed_quotes_in_json(json_str: &str) -> String { pub mod providers { pub mod embedded; } + +#[cfg(test)] +mod tests { + use super::parse_unified_diff_hunks; + + #[test] + fn parses_minimal_unified_diff_without_hunk_header() { + let diff = "--- old\n-old text\n+++ new\n+new text\n"; + let hunks = parse_unified_diff_hunks(diff); + assert_eq!(hunks.len(), 1); + assert_eq!(hunks[0].0, "old text"); + assert_eq!(hunks[0].1, "new text"); + } + + #[test] + fn parses_diff_with_context_and_hunk_headers() { + let diff = "@@ -1,3 +1,3 @@\n common\n-old\n+new\n common2\n"; + let hunks = parse_unified_diff_hunks(diff); + assert_eq!(hunks.len(), 1); + assert_eq!(hunks[0].0, "common\nold\ncommon2"); + assert_eq!(hunks[0].1, "common\nnew\ncommon2"); + } +} + +#[cfg(test)] +mod integration_tests { + use super::apply_unified_diff_to_string; + + #[test] + fn apply_multi_hunk_unified_diff_to_string() { + let original = "line 1\nkeep\nold A\nkeep 2\nold B\nkeep 3\n"; + let diff = "@@ -1,6 +1,6 @@\n line 1\n keep\n-old A\n+new A\n keep 2\n-old B\n+new B\n keep 3\n"; + let result = apply_unified_diff_to_string(original, diff, None, None).unwrap(); + let expected = "line 1\nkeep\nnew A\nkeep 2\nnew B\nkeep 3\n"; + assert_eq!(result, expected); + } + + #[test] + fn apply_diff_within_range_only() { + let original = "A\nold\nB\nold\nC\n"; + // Only the first 'old' should be replaced due to range + let diff = "@@ -1,3 +1,3 @@\n A\n-old\n+NEW\n B\n"; + let start = 0usize; // Start of file + let end = original.find("B\n").unwrap() + 2; // up to end of line 'B\n' + let result = apply_unified_diff_to_string(original, diff, Some(start), Some(end)).unwrap(); + let expected = "A\nNEW\nB\nold\nC\n"; + assert_eq!(result, expected); + } +}