Enhance read_image tool with magic byte detection and multi-image support

- Fix media type detection using magic bytes instead of file extension
  - Correctly identifies JPEG files with .png extension (and vice versa)
  - Supports PNG, JPEG, GIF, and WebP formats

- Add multi-image support with file_paths array parameter
  - Load multiple images in a single tool call
  - All images queued for LLM analysis

- Enhanced CLI output:
  - Inline image preview via iTerm2 imgcat protocol (height=5)
  - Dimmed info line showing: path | dimensions | media type | file size
  - Proper │ prefix alignment with tool output boxing
  - Human-readable file sizes (bytes, KB, MB)

- Add image dimension extraction from file headers
  - PNG, JPEG, GIF, WebP dimension parsing

- Add comprehensive tests for magic byte detection and dimensions
This commit is contained in:
Dhanji R. Prasanna
2025-12-26 11:19:37 +11:00
parent 3ece02ff31
commit 3601cc0547
7 changed files with 521 additions and 9 deletions

View File

@@ -274,15 +274,29 @@ impl AnthropicProvider {
}
}
MessageRole::User => {
// Build content blocks - images first, then text
let mut content_blocks: Vec<AnthropicContent> = Vec::new();
// Add any images attached to this message
for image in &message.images {
content_blocks.push(AnthropicContent::Image {
source: AnthropicImageSource {
source_type: "base64".to_string(),
media_type: image.media_type.clone(),
data: image.data.clone(),
},
});
}
// Add text content
content_blocks.push(AnthropicContent::Text {
text: message.content.clone(),
cache_control: message.cache_control.as_ref().map(Self::convert_cache_control),
});
anthropic_messages.push(AnthropicMessage {
role: "user".to_string(),
content: vec![AnthropicContent::Text {
text: message.content.clone(),
cache_control: message
.cache_control
.as_ref()
.map(Self::convert_cache_control),
}],
content: content_blocks,
});
}
MessageRole::Assistant => {
@@ -924,6 +938,19 @@ enum AnthropicContent {
name: String,
input: serde_json::Value,
},
#[serde(rename = "image")]
Image {
source: AnthropicImageSource,
},
}
/// Image source for Anthropic API
#[derive(Debug, Clone, Serialize, Deserialize)]
struct AnthropicImageSource {
#[serde(rename = "type")]
source_type: String, // Always "base64"
media_type: String, // e.g., "image/png", "image/jpeg"
data: String, // Base64-encoded image data
}
#[derive(Debug, Deserialize)]