fix: auto-resize images exceeding 1568px dimension to prevent 413 Payload Too Large

The Anthropic API was rejecting requests with multiple high-resolution images
(~2000x3000 pixels each) even though individual file sizes were under limits.

Root cause: Code only checked per-image file size (3.75MB), not dimensions.
Claude recommends images ≤1568px on longest edge and has 32MB total request limit.

Changes:
- Add MAX_IMAGE_DIMENSION (1568px) and MAX_TOTAL_IMAGE_PAYLOAD (20MB) constants
- Trigger resize when dimensions > 1568px (not just file size > 3.75MB)
- Add new resize_image_to_dimensions() for dimension-constrained resizing
- Track cumulative payload size across multiple images
- Warn if total payload exceeds recommended limit

Test results with Walking Dead comic images:
- WD_0001_0001.jpg: 800KB 1987x3057 → 321KB 1019x1568
- WD_0001_1064.png: 150KB 1988x3057 → 143KB 1020x1568
- WD_0002_0001.jpg: 1023KB 1988x3056 → 292KB 1020x1568
- Total payload: ~2.5MB → ~1MB base64
This commit is contained in:
Dhanji R. Prasanna
2026-01-18 10:05:45 +05:30
parent 3a03ed0585
commit 02655110d6

View File

@@ -19,6 +19,15 @@ const MAX_BASE64_SIZE: usize = 5 * 1024 * 1024;
/// Maximum raw image size before base64 encoding (~3.75MB to stay under 5MB after encoding)
const MAX_IMAGE_SIZE: usize = (MAX_BASE64_SIZE * 3) / 4;
/// Maximum recommended image dimension (longest edge) for optimal API performance.
/// Images larger than this are auto-scaled by Claude anyway, wasting bandwidth.
/// Per Anthropic docs: "We recommend resizing images to no more than 1.15 megapixels"
const MAX_IMAGE_DIMENSION: u32 = 1568;
/// Maximum total payload size for all images combined (leave room for context).
/// Anthropic's limit is 32MB total request size; we target 20MB for images to leave headroom.
const MAX_TOTAL_IMAGE_PAYLOAD: usize = 20 * 1024 * 1024;
/// Bytes per token heuristic (conservative estimate for code/text mix)
const BYTES_PER_TOKEN: f32 = 3.5;
@@ -280,6 +289,7 @@ pub async fn execute_read_image<W: UiWriter>(
let mut results: Vec<String> = Vec::new();
let mut success_count = 0;
let mut cumulative_payload_size: usize = 0;
// Print └─ and newline before images to break out of tool output box
println!("└─\n");
@@ -321,16 +331,33 @@ pub async fn execute_read_image<W: UiWriter>(
let original_size = bytes.len();
// Resize image if it exceeds MAX_IMAGE_SIZE (~3.75MB raw = ~5MB base64)
// Target slightly smaller to leave margin for base64 overhead
let (bytes, was_resized) = if original_size >= MAX_IMAGE_SIZE {
match resize_image_if_needed(&bytes, path, MAX_IMAGE_SIZE - 150 * 1024) {
// Get dimensions early to decide if we need to resize
let original_dimensions = get_image_dimensions(&bytes, media_type);
// Determine if resize is needed based on:
// 1. Dimensions exceed MAX_IMAGE_DIMENSION (1568px) - Claude auto-scales anyway
// 2. File size exceeds MAX_IMAGE_SIZE (~3.75MB)
// 3. Adding this image would exceed cumulative payload limit
let needs_resize = original_size >= MAX_IMAGE_SIZE
|| original_dimensions
.map(|(w, h)| w > MAX_IMAGE_DIMENSION || h > MAX_IMAGE_DIMENSION)
.unwrap_or(false)
|| (cumulative_payload_size + (original_size * 4 / 3)) > MAX_TOTAL_IMAGE_PAYLOAD;
let (bytes, was_resized) = if needs_resize {
// Calculate target size: either fit under per-image limit or leave room in cumulative budget
let remaining_budget = MAX_TOTAL_IMAGE_PAYLOAD.saturating_sub(cumulative_payload_size);
let target_raw_size = (remaining_budget * 3 / 4).min(MAX_IMAGE_SIZE - 150 * 1024);
match resize_image_to_dimensions(&bytes, path, MAX_IMAGE_DIMENSION, target_raw_size) {
Ok(resized) => {
let resized_size = resized.len();
if resized_size < original_size {
(resized, true)
} else {
(bytes, false)
// Resize didn't help, use original but warn if it's huge
debug!("Resize didn't reduce size, using original");
(bytes, original_dimensions.map(|(w, h)| w > MAX_IMAGE_DIMENSION || h > MAX_IMAGE_DIMENSION).unwrap_or(false))
}
}
Err(e) => {
@@ -344,8 +371,8 @@ pub async fn execute_read_image<W: UiWriter>(
let file_size = bytes.len();
// Try to get image dimensions
let dimensions = get_image_dimensions(&bytes, media_type);
// Get final dimensions (may have changed if resized)
let dimensions = if was_resized { get_image_dimensions(&bytes, "image/jpeg") } else { original_dimensions };
// Build info string
let dim_str = dimensions
@@ -385,6 +412,18 @@ pub async fn execute_read_image<W: UiWriter>(
// Store the image to be attached to the next user message
use base64::Engine;
let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
let encoded_size = encoded.len();
// Track cumulative payload and warn if approaching limit
cumulative_payload_size += encoded_size;
if cumulative_payload_size > MAX_TOTAL_IMAGE_PAYLOAD {
results.push(format!(
"⚠️ Warning: Total image payload ({:.1} MB) exceeds recommended limit ({:.1} MB). Request may fail.",
cumulative_payload_size as f64 / (1024.0 * 1024.0),
MAX_TOTAL_IMAGE_PAYLOAD as f64 / (1024.0 * 1024.0)
));
}
let image = g3_providers::ImageContent::new(final_media_type, encoded);
ctx.pending_images.push(image);
@@ -581,6 +620,79 @@ fn extract_path_and_content(args: &serde_json::Value) -> (Option<&str>, Option<&
}
}
/// Resize an image to fit within max_dimension pixels (longest edge) and target_size bytes.
/// This is the primary resize function that handles both dimension and size constraints.
///
/// Uses ImageMagick to:
/// 1. First resize to fit within max_dimension (if needed)
/// 2. Then reduce quality/scale to fit within target_size (if needed)
pub fn resize_image_to_dimensions(
bytes: &[u8],
path: &std::path::Path,
max_dimension: u32,
target_size: usize,
) -> std::io::Result<Vec<u8>> {
debug!(
"Resizing image {} to max {}px and under {} bytes",
path.display(),
max_dimension,
target_size
);
// Create temp files for processing
let temp_dir = std::env::temp_dir();
let input_path = temp_dir.join(format!("g3_resize_input_{}", std::process::id()));
let output_path = temp_dir.join(format!("g3_resize_output_{}.jpg", std::process::id()));
// Write input bytes to temp file
std::fs::write(&input_path, bytes)?;
// Quality levels to try (start high for best quality)
let quality_levels = [90, 80, 70, 60, 50, 40];
for &quality in &quality_levels {
// Use ImageMagick to resize: constrain to max_dimension on longest edge
// The "WxH>" syntax means "resize only if larger, maintain aspect ratio"
let resize_spec = format!("{}x{}>", max_dimension, max_dimension);
let result = std::process::Command::new("convert")
.arg(&input_path)
.arg("-resize")
.arg(&resize_spec)
.arg("-quality")
.arg(format!("{}", quality))
.arg(&output_path)
.output();
if let Ok(output) = result {
if output.status.success() {
if let Ok(resized_bytes) = std::fs::read(&output_path) {
if resized_bytes.len() <= target_size {
debug!(
"Resized image to {} bytes (max_dim={}, quality={})",
resized_bytes.len(),
max_dimension,
quality
);
// Clean up temp files
let _ = std::fs::remove_file(&input_path);
let _ = std::fs::remove_file(&output_path);
return Ok(resized_bytes);
}
}
}
}
}
// Clean up temp files
let _ = std::fs::remove_file(&input_path);
let _ = std::fs::remove_file(&output_path);
// If all attempts failed, return original bytes
debug!("Failed to resize image to target constraints, using original");
Ok(bytes.to_vec())
}
/// Resize an image to be under the target size using ImageMagick.
/// Returns the resized image bytes, or the original bytes if resizing fails or isn't needed.
///