Remove vision tools (except take_screenshot) and macax tools
Vision tools removed: - extract_text (OCR from image files) - extract_text_with_boxes (OCR with bounding boxes) - vision_find_text (find text in app windows) - vision_click_text (find and click on text) - vision_click_near_text (click near text labels) macax tools removed: - macax_list_apps - macax_get_frontmost_app - macax_activate_app - macax_press_key - macax_type_text The LLM can now read images directly via read_image tool. take_screenshot is retained for capturing application windows. Files deleted: - crates/g3-core/src/tools/vision.rs - crates/g3-core/src/tools/macax.rs - docs/macax-tools.md Updated tool counts: 12 core + 15 webdriver = 27 total
This commit is contained in:
@@ -13,7 +13,7 @@ use super::executor::ToolContext;
|
||||
/// Execute the `read_file` tool.
|
||||
pub async fn execute_read_file<W: UiWriter>(
|
||||
tool_call: &ToolCall,
|
||||
ctx: &ToolContext<'_, W>,
|
||||
_ctx: &ToolContext<'_, W>,
|
||||
) -> Result<String> {
|
||||
debug!("Processing read_file tool call");
|
||||
|
||||
@@ -28,35 +28,6 @@ pub async fn execute_read_file<W: UiWriter>(
|
||||
let resolved_path = resolve_path_with_unicode_fallback(expanded_path.as_ref());
|
||||
let path_str = resolved_path.as_ref();
|
||||
|
||||
// Check if this is an image file
|
||||
let is_image = path_str.to_lowercase().ends_with(".png")
|
||||
|| path_str.to_lowercase().ends_with(".jpg")
|
||||
|| path_str.to_lowercase().ends_with(".jpeg")
|
||||
|| path_str.to_lowercase().ends_with(".gif")
|
||||
|| path_str.to_lowercase().ends_with(".bmp")
|
||||
|| path_str.to_lowercase().ends_with(".tiff")
|
||||
|| path_str.to_lowercase().ends_with(".tif")
|
||||
|| path_str.to_lowercase().ends_with(".webp");
|
||||
|
||||
// If it's an image file, use OCR via extract_text
|
||||
if is_image {
|
||||
if let Some(controller) = ctx.computer_controller {
|
||||
match controller.extract_text_from_image(path_str).await {
|
||||
Ok(text) => {
|
||||
return Ok(format!("📄 Image file (OCR extracted):\n{}", text));
|
||||
}
|
||||
Err(e) => {
|
||||
return Ok(format!(
|
||||
"❌ Failed to extract text from image '{}': {}",
|
||||
path_str, e
|
||||
));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Ok("❌ Computer control not enabled. Cannot perform OCR on image files. Set computer_control.enabled = true in config.".to_string());
|
||||
}
|
||||
}
|
||||
|
||||
// Extract optional start and end positions
|
||||
let start_char = tool_call
|
||||
.args
|
||||
|
||||
Reference in New Issue
Block a user