Enhance read_image tool with magic byte detection and multi-image support
- Fix media type detection using magic bytes instead of file extension - Correctly identifies JPEG files with .png extension (and vice versa) - Supports PNG, JPEG, GIF, and WebP formats - Add multi-image support with file_paths array parameter - Load multiple images in a single tool call - All images queued for LLM analysis - Enhanced CLI output: - Inline image preview via iTerm2 imgcat protocol (height=5) - Dimmed info line showing: path | dimensions | media type | file size - Proper │ prefix alignment with tool output boxing - Human-readable file sizes (bytes, KB, MB) - Add image dimension extraction from file headers - PNG, JPEG, GIF, WebP dimension parsing - Add comprehensive tests for magic byte detection and dimensions
This commit is contained in:
@@ -274,15 +274,29 @@ impl AnthropicProvider {
|
||||
}
|
||||
}
|
||||
MessageRole::User => {
|
||||
// Build content blocks - images first, then text
|
||||
let mut content_blocks: Vec<AnthropicContent> = Vec::new();
|
||||
|
||||
// Add any images attached to this message
|
||||
for image in &message.images {
|
||||
content_blocks.push(AnthropicContent::Image {
|
||||
source: AnthropicImageSource {
|
||||
source_type: "base64".to_string(),
|
||||
media_type: image.media_type.clone(),
|
||||
data: image.data.clone(),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Add text content
|
||||
content_blocks.push(AnthropicContent::Text {
|
||||
text: message.content.clone(),
|
||||
cache_control: message.cache_control.as_ref().map(Self::convert_cache_control),
|
||||
});
|
||||
|
||||
anthropic_messages.push(AnthropicMessage {
|
||||
role: "user".to_string(),
|
||||
content: vec![AnthropicContent::Text {
|
||||
text: message.content.clone(),
|
||||
cache_control: message
|
||||
.cache_control
|
||||
.as_ref()
|
||||
.map(Self::convert_cache_control),
|
||||
}],
|
||||
content: content_blocks,
|
||||
});
|
||||
}
|
||||
MessageRole::Assistant => {
|
||||
@@ -924,6 +938,19 @@ enum AnthropicContent {
|
||||
name: String,
|
||||
input: serde_json::Value,
|
||||
},
|
||||
#[serde(rename = "image")]
|
||||
Image {
|
||||
source: AnthropicImageSource,
|
||||
},
|
||||
}
|
||||
|
||||
/// Image source for Anthropic API
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct AnthropicImageSource {
|
||||
#[serde(rename = "type")]
|
||||
source_type: String, // Always "base64"
|
||||
media_type: String, // e.g., "image/png", "image/jpeg"
|
||||
data: String, // Base64-encoded image data
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
|
||||
@@ -88,6 +88,8 @@ pub struct Message {
|
||||
pub role: MessageRole,
|
||||
pub content: String,
|
||||
#[serde(skip)]
|
||||
pub images: Vec<ImageContent>,
|
||||
#[serde(skip)]
|
||||
pub id: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub cache_control: Option<CacheControl>,
|
||||
@@ -101,6 +103,65 @@ pub enum MessageRole {
|
||||
Assistant,
|
||||
}
|
||||
|
||||
/// Image content for multimodal messages
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ImageContent {
|
||||
/// Media type (e.g., "image/png", "image/jpeg", "image/gif", "image/webp")
|
||||
pub media_type: String,
|
||||
/// Base64-encoded image data
|
||||
pub data: String,
|
||||
}
|
||||
|
||||
impl ImageContent {
|
||||
pub fn new(media_type: &str, data: String) -> Self {
|
||||
Self {
|
||||
media_type: media_type.to_string(),
|
||||
data,
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect media type from file extension
|
||||
pub fn media_type_from_extension(ext: &str) -> Option<&'static str> {
|
||||
match ext.to_lowercase().as_str() {
|
||||
"png" => Some("image/png"),
|
||||
"jpg" | "jpeg" => Some("image/jpeg"),
|
||||
"gif" => Some("image/gif"),
|
||||
"webp" => Some("image/webp"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Detect media type from image data magic bytes (file signature)
|
||||
/// This is more reliable than file extension as it checks actual content
|
||||
pub fn media_type_from_bytes(bytes: &[u8]) -> Option<&'static str> {
|
||||
if bytes.len() < 12 {
|
||||
return None;
|
||||
}
|
||||
|
||||
// PNG: 89 50 4E 47 0D 0A 1A 0A
|
||||
if bytes.starts_with(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]) {
|
||||
return Some("image/png");
|
||||
}
|
||||
|
||||
// JPEG: FF D8 FF
|
||||
if bytes.starts_with(&[0xFF, 0xD8, 0xFF]) {
|
||||
return Some("image/jpeg");
|
||||
}
|
||||
|
||||
// GIF: 47 49 46 38 (GIF8)
|
||||
if bytes.starts_with(&[0x47, 0x49, 0x46, 0x38]) {
|
||||
return Some("image/gif");
|
||||
}
|
||||
|
||||
// WebP: 52 49 46 46 ... 57 45 42 50 (RIFF....WEBP)
|
||||
if bytes.starts_with(&[0x52, 0x49, 0x46, 0x46]) && bytes.len() >= 12 && &bytes[8..12] == b"WEBP" {
|
||||
return Some("image/webp");
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CompletionResponse {
|
||||
pub content: String,
|
||||
@@ -174,6 +235,7 @@ impl Message {
|
||||
Self {
|
||||
role,
|
||||
content,
|
||||
images: Vec::new(),
|
||||
id: Self::generate_id(),
|
||||
cache_control: None,
|
||||
}
|
||||
@@ -188,6 +250,7 @@ impl Message {
|
||||
Self {
|
||||
role,
|
||||
content,
|
||||
images: Vec::new(),
|
||||
id: Self::generate_id(),
|
||||
cache_control: Some(cache_control),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user