Remove VisionBridge OCR (unused)

VisionBridge was a Swift library for Apple Vision OCR that was built every compile but never actually used by any g3 tool. Removed: - vision-bridge/ Swift package directory - src/ocr/ module (vision.rs, tesseract.rs, mod.rs) - OCR methods from ComputerController trait - OCR-related code from platform implementations - TextLocation type (no longer needed) - test_vision.rs example Simplified: - build.rs (now empty, no Swift compilation) - MacOSController (no longer holds OCR engine) - LinuxController and WindowsController (stub implementations) Build time improvement: No more 'Building VisionBridge Swift package...' messages on every compile.
2026-01-21 06:42:01 +05:30
parent 38b0019ad4
commit a89cad955a
13 changed files with 22 additions and 1292 deletions
--- a/crates/g3-computer-control/build.rs
+++ b/crates/g3-computer-control/build.rs
@@ -1,100 +1,4 @@
-use std::env;
-use std::path::PathBuf;
-use std::process::Command;
-
 fn main() {
-    // Only build Vision bridge on macOS
-    if env::var("CARGO_CFG_TARGET_OS").unwrap() != "macos" {
-        return;
-    }
-
-    println!("cargo:rerun-if-changed=vision-bridge/Sources/VisionBridge/VisionOCR.swift");
-    println!("cargo:rerun-if-changed=vision-bridge/Sources/VisionBridge/VisionBridge.h");
-    println!("cargo:rerun-if-changed=vision-bridge/Package.swift");
-
-    let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
-    let vision_bridge_dir = manifest_dir.join("vision-bridge");
-
-    // Build Swift package
-    println!("cargo:warning=Building VisionBridge Swift package...");
-    let build_status = Command::new("swift")
-        .args(&["build", "-c", "release"])
-        .current_dir(&vision_bridge_dir)
-        .status()
-        .expect("Failed to build Swift package");
-
-    if !build_status.success() {
-        panic!("Swift build failed");
-    }
-
-    // Find the built library
-    let lib_path = vision_bridge_dir
-        .join(".build/release")
-        .canonicalize()
-        .expect("Failed to find .build/release directory");
-
-    // Copy the dylib to the output directory so it can be found at runtime
-    let target_dir = manifest_dir
-        .parent()
-        .unwrap()
-        .parent()
-        .unwrap()
-        .join("target");
-    let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string());
-
-    // Determine the actual target directory (could be llvm-cov-target or regular target)
-    let target_dir_name =
-        env::var("CARGO_TARGET_DIR").unwrap_or_else(|_| target_dir.to_string_lossy().to_string());
-    let actual_target_dir = PathBuf::from(&target_dir_name);
-    let output_dir = actual_target_dir.join(&profile);
-
-    let dylib_src = lib_path.join("libVisionBridge.dylib");
-    let dylib_dst = output_dir.join("libVisionBridge.dylib");
-
-    // Create output directory if it doesn't exist
-    std::fs::create_dir_all(&output_dir).expect(&format!(
-        "Failed to create output directory {}",
-        output_dir.display()
-    ));
-
-    std::fs::copy(&dylib_src, &dylib_dst).expect(&format!(
-        "Failed to copy dylib from {} to {}",
-        dylib_src.display(),
-        dylib_dst.display()
-    ));
-
-    println!(
-        "cargo:warning=Copied libVisionBridge.dylib to {}",
-        dylib_dst.display()
-    );
-
-    // Re-sign the dylib with ad-hoc signature to fix code signing issues on Apple Silicon
-    // This is necessary because incremental compilation can invalidate signatures
-    let codesign_status = Command::new("codesign")
-        .args(&["-f", "-s", "-", dylib_dst.to_str().unwrap()])
-        .status();
-
-    if let Ok(status) = codesign_status {
-        if !status.success() {
-            println!("cargo:warning=Failed to codesign libVisionBridge.dylib (non-fatal)");
-        }
-    }
-
-    // Add rpath so the dylib can be found at runtime
-    println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
-    println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
-    println!("cargo:rustc-link-search=native={}", lib_path.display());
-    println!("cargo:rustc-link-lib=dylib=VisionBridge");
-
-    // Link required frameworks
-    println!("cargo:rustc-link-lib=framework=Vision");
-    println!("cargo:rustc-link-lib=framework=AppKit");
-    println!("cargo:rustc-link-lib=framework=Foundation");
-    println!("cargo:rustc-link-lib=framework=CoreGraphics");
-    println!("cargo:rustc-link-lib=framework=CoreImage");
-
-    println!(
-        "cargo:warning=VisionBridge built successfully at {}",
-        lib_path.display()
-    );
+    // No build-time dependencies required
+    // VisionBridge OCR has been removed
 }
--- a/crates/g3-computer-control/examples/test_vision.rs
+++ b/crates/g3-computer-control/examples/test_vision.rs
@@ -1,92 +0,0 @@
-use anyhow::Result;
-use g3_computer_control::ocr::{DefaultOCR, OCREngine};
-
-#[tokio::main]
-async fn main() -> Result<()> {
-    println!("🧪 Testing Apple Vision OCR");
-    println!("===========================\n");
-
-    // Initialize OCR engine
-    println!("📦 Initializing OCR engine...");
-    let ocr = DefaultOCR::new()?;
-    println!("✅ OCR engine: {}\n", ocr.name());
-
-    // Check if test image exists
-    let test_image = "/tmp/safari_test.png";
-    if !std::path::Path::new(test_image).exists() {
-        println!("⚠️  Test image not found: {}", test_image);
-        println!("   Creating a screenshot...");
-
-        let status = std::process::Command::new("screencapture")
-            .arg("-x")
-            .arg("-R")
-            .arg("0,0,1200,800")
-            .arg(test_image)
-            .status()?;
-
-        if !status.success() {
-            anyhow::bail!("Failed to create screenshot");
-        }
-
-        println!("✅ Screenshot created\n");
-    }
-
-    // Run OCR
-    println!("🔍 Running Apple Vision OCR on {}...", test_image);
-    let start = std::time::Instant::now();
-    let locations = ocr.extract_text_with_locations(test_image).await?;
-    let duration = start.elapsed();
-
-    println!("✅ OCR completed in {:.3}s\n", duration.as_secs_f64());
-
-    // Display results
-    println!("📊 Results:");
-    println!("   Found {} text elements\n", locations.len());
-
-    if locations.is_empty() {
-        println!("⚠️  No text found in image");
-    } else {
-        println!("   Top 20 results:");
-        println!(
-            "   {:<4} {:<40} {:<15} {:<12} {:<8}",
-            "#", "Text", "Position", "Size", "Conf"
-        );
-        println!("   {}", "-".repeat(85));
-
-        for (i, loc) in locations.iter().take(20).enumerate() {
-            let text = if loc.text.len() > 37 {
-                format!("{}...", &loc.text[..37])
-            } else {
-                loc.text.clone()
-            };
-
-            println!(
-                "   {:<4} {:<40} ({:>4},{:>4})    {:>4}x{:<4}  {:.2}",
-                i + 1,
-                text,
-                loc.x,
-                loc.y,
-                loc.width,
-                loc.height,
-                loc.confidence
-            );
-        }
-
-        if locations.len() > 20 {
-            println!("\n   ... and {} more", locations.len() - 20);
-        }
-
-        // Performance comparison
-        println!("\n📈 Performance:");
-        println!("   OCR Speed: {:.3}s", duration.as_secs_f64());
-        println!("   Text elements: {}", locations.len());
-        println!(
-            "   Avg per element: {:.1}ms",
-            duration.as_millis() as f64 / locations.len() as f64
-        );
-    }
-
-    println!("\n✅ Test complete!");
-
-    Ok(())
-}
--- a/crates/g3-computer-control/src/lib.rs
+++ b/crates/g3-computer-control/src/lib.rs
@@ -2,7 +2,6 @@
 #![allow(unexpected_cfgs)]

 pub mod macax;
-pub mod ocr;
 pub mod platform;
 pub mod types;
 pub mod webdriver;
@@ -30,16 +29,6 @@ pub trait ComputerController: Send + Sync {
        window_id: Option<&str>,
    ) -> Result<()>;

-    // OCR operations
-    async fn extract_text_from_screen(&self, region: Rect, window_id: &str) -> Result<String>;
-    async fn extract_text_from_image(&self, path: &str) -> Result<String>;
-    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>>;
-    async fn find_text_in_app(
-        &self,
-        app_name: &str,
-        search_text: &str,
-    ) -> Result<Option<TextLocation>>;
-
    // Mouse operations
    fn move_mouse(&self, x: i32, y: i32) -> Result<()>;
    fn click_at(&self, x: i32, y: i32, app_name: Option<&str>) -> Result<()>;
--- a/crates/g3-computer-control/src/ocr/mod.rs
+++ b/crates/g3-computer-control/src/ocr/mod.rs
@@ -1,26 +0,0 @@
-use crate::types::TextLocation;
-use anyhow::Result;
-use async_trait::async_trait;
-
-/// OCR engine trait for text recognition with bounding boxes
-#[async_trait]
-pub trait OCREngine: Send + Sync {
-    /// Extract text with locations from an image file
-    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>>;
-
-    /// Get the name of the OCR engine
-    fn name(&self) -> &str;
-}
-
-// Platform-specific modules
-#[cfg(target_os = "macos")]
-pub mod vision;
-
-pub mod tesseract;
-
-// Re-export the default OCR engine for the platform
-#[cfg(target_os = "macos")]
-pub use vision::AppleVisionOCR as DefaultOCR;
-
-#[cfg(not(target_os = "macos"))]
-pub use tesseract::TesseractOCR as DefaultOCR;
--- a/crates/g3-computer-control/src/ocr/tesseract.rs
+++ b/crates/g3-computer-control/src/ocr/tesseract.rs
@@ -1,91 +0,0 @@
-use super::OCREngine;
-use crate::types::TextLocation;
-use anyhow::Result;
-use async_trait::async_trait;
-
-/// Tesseract OCR engine (fallback/cross-platform)
-pub struct TesseractOCR;
-
-impl TesseractOCR {
-    pub fn new() -> Result<Self> {
-        // Check if tesseract is available
-        let tesseract_check = std::process::Command::new("which")
-            .arg("tesseract")
-            .output();
-
-        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!(
-                "Tesseract OCR is not installed on your system.\n\n\
-                To install tesseract:\n  macOS:   brew install tesseract\n  \
-                Linux:   sudo apt-get install tesseract-ocr (Ubuntu/Debian)\n           \
-                sudo yum install tesseract (RHEL/CentOS)\n  \
-                Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki\n\n\
-                After installation, restart your terminal and try again."
-            );
-        }
-
-        Ok(Self)
-    }
-}
-
-#[async_trait]
-impl OCREngine for TesseractOCR {
-    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
-        // Use tesseract CLI with TSV output to get bounding boxes
-        let output = std::process::Command::new("tesseract")
-            .arg(path)
-            .arg("stdout")
-            .arg("tsv")
-            .output()
-            .map_err(|e| anyhow::anyhow!("Failed to run tesseract: {}", e))?;
-
-        if !output.status.success() {
-            anyhow::bail!(
-                "Tesseract failed: {}",
-                String::from_utf8_lossy(&output.stderr)
-            );
-        }
-
-        let tsv_text = String::from_utf8_lossy(&output.stdout);
-        let mut locations = Vec::new();
-
-        // Parse TSV output (skip header line)
-        for (i, line) in tsv_text.lines().enumerate() {
-            if i == 0 {
-                continue;
-            } // Skip header
-
-            let parts: Vec<&str> = line.split('\t').collect();
-            if parts.len() >= 12 {
-                // TSV format: level, page_num, block_num, par_num, line_num, word_num,
-                //             left, top, width, height, conf, text
-                if let (Ok(x), Ok(y), Ok(w), Ok(h), Ok(conf), text) = (
-                    parts[6].parse::<i32>(),
-                    parts[7].parse::<i32>(),
-                    parts[8].parse::<i32>(),
-                    parts[9].parse::<i32>(),
-                    parts[10].parse::<f32>(),
-                    parts[11],
-                ) {
-                    let trimmed = text.trim();
-                    if !trimmed.is_empty() && conf > 0.0 {
-                        locations.push(TextLocation {
-                            text: trimmed.to_string(),
-                            x,
-                            y,
-                            width: w,
-                            height: h,
-                            confidence: conf / 100.0, // Convert from 0-100 to 0-1
-                        });
-                    }
-                }
-            }
-        }
-
-        Ok(locations)
-    }
-
-    fn name(&self) -> &str {
-        "Tesseract OCR"
-    }
-}
--- a/crates/g3-computer-control/src/ocr/vision.rs
+++ b/crates/g3-computer-control/src/ocr/vision.rs
@@ -1,100 +0,0 @@
-use super::OCREngine;
-use crate::types::TextLocation;
-use anyhow::{Context, Result};
-use async_trait::async_trait;
-use std::ffi::{CStr, CString};
-use std::os::raw::{c_char, c_float, c_uint};
-
-// FFI bindings to Swift VisionBridge
-#[repr(C)]
-struct VisionTextBox {
-    text: *const c_char,
-    text_len: c_uint,
-    x: i32,
-    y: i32,
-    width: i32,
-    height: i32,
-    confidence: c_float,
-}
-
-extern "C" {
-    fn vision_recognize_text(
-        image_path: *const c_char,
-        image_path_len: c_uint,
-        out_boxes: *mut *mut std::ffi::c_void,
-        out_count: *mut c_uint,
-    ) -> bool;
-
-    fn vision_free_boxes(boxes: *mut std::ffi::c_void, count: c_uint);
-}
-
-/// Apple Vision Framework OCR engine
-pub struct AppleVisionOCR;
-
-impl AppleVisionOCR {
-    pub fn new() -> Result<Self> {
-        Ok(Self)
-    }
-}
-
-#[async_trait]
-impl OCREngine for AppleVisionOCR {
-    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
-        // Convert path to C string
-        let c_path = CString::new(path).context("Failed to convert path to C string")?;
-
-        let mut boxes_ptr: *mut std::ffi::c_void = std::ptr::null_mut();
-        let mut count: c_uint = 0;
-
-        // Call Swift Vision API
-        let success = unsafe {
-            vision_recognize_text(
-                c_path.as_ptr(),
-                path.len() as c_uint,
-                &mut boxes_ptr,
-                &mut count,
-            )
-        };
-
-        if !success || boxes_ptr.is_null() {
-            anyhow::bail!("Apple Vision OCR failed");
-        }
-
-        // Convert C array to Rust Vec
-        let mut locations = Vec::new();
-
-        unsafe {
-            let typed_boxes = boxes_ptr as *const VisionTextBox;
-            let boxes_slice = std::slice::from_raw_parts(typed_boxes, count as usize);
-
-            for box_data in boxes_slice {
-                // Convert C string to Rust String
-                let text = if !box_data.text.is_null() {
-                    CStr::from_ptr(box_data.text).to_string_lossy().into_owned()
-                } else {
-                    String::new()
-                };
-
-                if !text.is_empty() {
-                    locations.push(TextLocation {
-                        text,
-                        x: box_data.x,
-                        y: box_data.y,
-                        width: box_data.width,
-                        height: box_data.height,
-                        confidence: box_data.confidence,
-                    });
-                }
-            }
-
-            // Free the C array
-            vision_free_boxes(boxes_ptr, count);
-        }
-
-        Ok(locations)
-    }
-
-    fn name(&self) -> &str {
-        "Apple Vision Framework"
-    }
-}
--- a/crates/g3-computer-control/src/platform/linux.rs
+++ b/crates/g3-computer-control/src/platform/linux.rs
@@ -1,188 +1,32 @@
-use crate::{types::*, ComputerController};
+use crate::{types::Rect, ComputerController};
 use anyhow::Result;
 use async_trait::async_trait;
-use tesseract::Tesseract;
-use uuid::Uuid;

-pub struct LinuxController {
-    // Placeholder for X11 connection or other state
-}
+pub struct LinuxController;

 impl LinuxController {
    pub fn new() -> Result<Self> {
-        // Initialize X11 connection
        tracing::warn!("Linux computer control not fully implemented");
-        Ok(Self {})
+        Ok(Self)
    }
 }

 #[async_trait]
 impl ComputerController for LinuxController {
-    async fn move_mouse(&self, _x: i32, _y: i32) -> Result<()> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn click(&self, _button: MouseButton) -> Result<()> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn double_click(&self, _button: MouseButton) -> Result<()> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn type_text(&self, _text: &str) -> Result<()> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn press_key(&self, _key: &str) -> Result<()> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn list_windows(&self) -> Result<Vec<Window>> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn focus_window(&self, _window_id: &str) -> Result<()> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn get_window_bounds(&self, _window_id: &str) -> Result<Rect> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn find_element(&self, _selector: &ElementSelector) -> Result<Option<UIElement>> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn get_element_text(&self, _element_id: &str) -> Result<String> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
-    async fn get_element_bounds(&self, _element_id: &str) -> Result<Rect> {
-        anyhow::bail!("Linux implementation not yet available")
-    }
-
    async fn take_screenshot(
        &self,
        _path: &str,
        _region: Option<Rect>,
        _window_id: Option<&str>,
    ) -> Result<()> {
-        // Enforce that window_id must be provided
-        if _window_id.is_none() {
-            anyhow::bail!("window_id is required. You must specify which window to capture (e.g., 'Firefox', 'Terminal', 'gedit'). Use list_windows to see available windows.");
-        }
-
-        anyhow::bail!("Linux implementation not yet available")
+        anyhow::bail!("Linux screenshot implementation not yet available")
    }

-    async fn extract_text_from_screen(&self, _region: Rect, _window_id: &str) -> Result<String> {
-        anyhow::bail!("Linux implementation not yet available")
+    fn move_mouse(&self, _x: i32, _y: i32) -> Result<()> {
+        anyhow::bail!("Linux mouse control not yet available")
    }

-    async fn extract_text_from_image(&self, _path: &str) -> Result<OCRResult> {
-        // Check if tesseract is available on the system
-        let tesseract_check = std::process::Command::new("which")
-            .arg("tesseract")
-            .output();
-
-        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!(
-                "Tesseract OCR is not installed on your system.\n\n\
-                To install tesseract:\n  \
-                Ubuntu/Debian: sudo apt-get install tesseract-ocr\n  \
-                RHEL/CentOS:   sudo yum install tesseract\n  \
-                Arch Linux:    sudo pacman -S tesseract\n\n\
-                After installation, restart your terminal and try again."
-            );
-        }
-
-        // Initialize Tesseract
-        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
-            anyhow::anyhow!(
-                "Failed to initialize Tesseract: {}\n\n\
-                    This usually means:\n1. Tesseract is not properly installed\n\
-                    2. Language data files are missing\n\nTo fix:\n  \
-                    Ubuntu/Debian: sudo apt-get install tesseract-ocr-eng\n  \
-                    RHEL/CentOS:   sudo yum install tesseract-langpack-eng\n  \
-                    Arch Linux:    sudo pacman -S tesseract-data-eng",
-                e
-            )
-        })?;
-
-        let text = tess
-            .set_image(_path)
-            .map_err(|e| anyhow::anyhow!("Failed to load image '{}': {}", _path, e))?
-            .get_text()
-            .map_err(|e| anyhow::anyhow!("Failed to extract text from image: {}", e))?;
-
-        // Get confidence (simplified - would need more complex API calls for per-word confidence)
-        let confidence = 0.85; // Placeholder
-
-        Ok(OCRResult {
-            text,
-            confidence,
-            bounds: Rect {
-                x: 0,
-                y: 0,
-                width: 0,
-                height: 0,
-            }, // Would need image dimensions
-        })
-    }
-
-    async fn find_text_on_screen(&self, _text: &str) -> Result<Option<Point>> {
-        // Check if tesseract is available on the system
-        let tesseract_check = std::process::Command::new("which")
-            .arg("tesseract")
-            .output();
-
-        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!(
-                "Tesseract OCR is not installed on your system.\n\n\
-                To install tesseract:\n  \
-                Ubuntu/Debian: sudo apt-get install tesseract-ocr\n  \
-                RHEL/CentOS:   sudo yum install tesseract\n  \
-                Arch Linux:    sudo pacman -S tesseract\n\n\
-                After installation, restart your terminal and try again."
-            );
-        }
-
-        // Take full screen screenshot
-        let temp_path = format!("/tmp/g3_ocr_search_{}.png", uuid::Uuid::new_v4());
-        self.take_screenshot(&temp_path, None, None).await?;
-
-        // Use Tesseract to find text with bounding boxes
-        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
-            anyhow::anyhow!(
-                "Failed to initialize Tesseract: {}\n\n\
-                    This usually means:\n1. Tesseract is not properly installed\n\
-                    2. Language data files are missing\n\nTo fix:\n  \
-                    Ubuntu/Debian: sudo apt-get install tesseract-ocr-eng\n  \
-                    RHEL/CentOS:   sudo yum install tesseract-langpack-eng\n  \
-                    Arch Linux:    sudo pacman -S tesseract-data-eng",
-                e
-            )
-        })?;
-
-        let full_text = tess
-            .set_image(temp_path.as_str())
-            .map_err(|e| anyhow::anyhow!("Failed to load screenshot: {}", e))?
-            .get_text()
-            .map_err(|e| anyhow::anyhow!("Failed to extract text from screen: {}", e))?;
-
-        // Clean up temp file
-        let _ = std::fs::remove_file(&temp_path);
-
-        // Simple text search - full implementation would use get_component_images
-        // to get bounding boxes for each word
-        if full_text.contains(_text) {
-            tracing::warn!(
-                "Text found but precise coordinates not available in simplified implementation"
-            );
-            Ok(Some(Point { x: 0, y: 0 }))
-        } else {
-            Ok(None)
-        }
+    fn click_at(&self, _x: i32, _y: i32, _app_name: Option<&str>) -> Result<()> {
+        anyhow::bail!("Linux click control not yet available")
    }
 }
--- a/crates/g3-computer-control/src/platform/macos.rs
+++ b/crates/g3-computer-control/src/platform/macos.rs
@@ -1,7 +1,5 @@
-use crate::ocr::{DefaultOCR, OCREngine};
 use crate::{
-    types::{Rect, TextLocation},
-    ComputerController,
+    types::Rect, ComputerController,
 };
 use anyhow::{Context, Result};
 use async_trait::async_trait;
@@ -14,21 +12,12 @@ use core_graphics::window::{
 };
 use std::path::Path;

-pub struct MacOSController {
-    ocr_engine: Box<dyn OCREngine>,
-    #[allow(dead_code)]
-    ocr_name: String,
-}
+pub struct MacOSController;

 impl MacOSController {
    pub fn new() -> Result<Self> {
-        let ocr = Box::new(DefaultOCR::new()?);
-        let ocr_name = ocr.name().to_string();
-        tracing::debug!("Initialized macOS controller with OCR engine: {}", ocr_name);
-        Ok(Self {
-            ocr_engine: ocr,
-            ocr_name,
-        })
+        tracing::debug!("Initialized macOS controller");
+        Ok(Self)
    }
 }

@@ -215,78 +204,6 @@ impl ComputerController for MacOSController {
        Ok(())
    }

-    async fn extract_text_from_screen(&self, region: Rect, window_id: &str) -> Result<String> {
-        // Take screenshot of region first
-        let temp_path = format!("/tmp/g3_ocr_{}.png", uuid::Uuid::new_v4());
-        self.take_screenshot(&temp_path, Some(region), Some(window_id))
-            .await?;
-
-        // Extract text from the screenshot
-        let result = self.extract_text_from_image(&temp_path).await?;
-
-        // Clean up temp file
-        let _ = std::fs::remove_file(&temp_path);
-
-        Ok(result)
-    }
-
-    async fn extract_text_from_image(&self, path: &str) -> Result<String> {
-        // Extract all text and concatenate
-        let locations = self.ocr_engine.extract_text_with_locations(path).await?;
-        Ok(locations
-            .iter()
-            .map(|loc| loc.text.as_str())
-            .collect::<Vec<_>>()
-            .join(" "))
-    }
-
-    async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
-        // Use the OCR engine
-        self.ocr_engine.extract_text_with_locations(path).await
-    }
-
-    async fn find_text_in_app(
-        &self,
-        app_name: &str,
-        search_text: &str,
-    ) -> Result<Option<TextLocation>> {
-        // Take screenshot of specific app window
-        let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
-        let temp_path = format!(
-            "{}/tmp/g3_find_text_{}_{}.png",
-            home,
-            app_name,
-            uuid::Uuid::new_v4()
-        );
-        self.take_screenshot(&temp_path, None, Some(app_name))
-            .await?;
-
-        // Get screenshot dimensions before we delete it
-        let screenshot_dims = get_image_dimensions(&temp_path)?;
-
-        // Extract all text with locations
-        let locations = self.extract_text_with_locations(&temp_path).await?;
-
-        // Get window bounds to calculate coordinate transformation
-        let window_bounds = self.get_window_bounds(app_name)?;
-
-        // Clean up temp file
-        let _ = std::fs::remove_file(&temp_path);
-
-        // Find matching text (case-insensitive)
-        let search_lower = search_text.to_lowercase();
-        for location in locations {
-            if location.text.to_lowercase().contains(&search_lower) {
-                // Transform coordinates from screenshot space to screen space
-                let transformed =
-                    transform_screenshot_to_screen_coords(location, window_bounds, screenshot_dims);
-                return Ok(Some(transformed));
-            }
-        }
-
-        Ok(None)
-    }
-
    fn move_mouse(&self, x: i32, y: i32) -> Result<()> {
        use core_graphics::event::{CGEvent, CGEventTapLocation, CGEventType, CGMouseButton};
        use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
@@ -379,246 +296,6 @@ impl ComputerController for MacOSController {
    }
 }

-impl MacOSController {
-    /// Get window bounds for an application (helper method)
-    fn get_window_bounds(&self, app_name: &str) -> Result<(i32, i32, i32, i32)> {
-        unsafe {
-            let window_list =
-                CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID);
-
-            let array = CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
-            let count = array.len();
-
-            let app_name_lower = app_name.to_lowercase();
-
-            for i in 0..count {
-                let dict = array.get(i).unwrap();
-
-                // Get owner name
-                let owner_key = CFString::from_static_string("kCGWindowOwnerName");
-                let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
-                    let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
-                    s.to_string()
-                } else {
-                    continue;
-                };
-
-                let owner_lower = owner.to_lowercase();
-
-                // Normalize by removing spaces for exact matching
-                let app_name_normalized = app_name_lower.replace(" ", "");
-                let owner_normalized = owner_lower.replace(" ", "");
-
-                // ONLY accept exact matches (case-insensitive, with or without spaces)
-                // This prevents "Goose" from matching "GooseStudio"
-                let is_match =
-                    owner_lower == app_name_lower || owner_normalized == app_name_normalized;
-
-                if is_match {
-                    // Get window layer to filter out menu bar windows
-                    let layer_key = CFString::from_static_string("kCGWindowLayer");
-                    let layer: i32 = if let Some(value) = dict.find(layer_key.to_void()) {
-                        let num: core_foundation::number::CFNumber =
-                            TCFType::wrap_under_get_rule(*value as *const _);
-                        num.to_i32().unwrap_or(0)
-                    } else {
-                        0
-                    };
-
-                    // Skip menu bar windows (layer >= 20)
-                    if layer >= 20 {
-                        tracing::debug!(
-                            "Skipping window for '{}' at layer {} (menu bar)",
-                            owner,
-                            layer
-                        );
-                        continue;
-                    }
-
-                    // Get window bounds to verify it's a real window
-                    let bounds_key = CFString::from_static_string("kCGWindowBounds");
-                    if let Some(value) = dict.find(bounds_key.to_void()) {
-                        let bounds_dict: CFDictionary =
-                            TCFType::wrap_under_get_rule(*value as *const _);
-
-                        let x_key = CFString::from_static_string("X");
-                        let y_key = CFString::from_static_string("Y");
-                        let width_key = CFString::from_static_string("Width");
-                        let height_key = CFString::from_static_string("Height");
-
-                        if let (Some(x_val), Some(y_val), Some(w_val), Some(h_val)) = (
-                            bounds_dict.find(x_key.to_void()),
-                            bounds_dict.find(y_key.to_void()),
-                            bounds_dict.find(width_key.to_void()),
-                            bounds_dict.find(height_key.to_void()),
-                        ) {
-                            let x_num: core_foundation::number::CFNumber =
-                                TCFType::wrap_under_get_rule(*x_val as *const _);
-                            let y_num: core_foundation::number::CFNumber =
-                                TCFType::wrap_under_get_rule(*y_val as *const _);
-                            let w_num: core_foundation::number::CFNumber =
-                                TCFType::wrap_under_get_rule(*w_val as *const _);
-                            let h_num: core_foundation::number::CFNumber =
-                                TCFType::wrap_under_get_rule(*h_val as *const _);
-
-                            let x: i32 = x_num.to_i64().unwrap_or(0) as i32;
-                            let y: i32 = y_num.to_i64().unwrap_or(0) as i32;
-                            let w: i32 = w_num.to_i64().unwrap_or(0) as i32;
-                            let h: i32 = h_num.to_i64().unwrap_or(0) as i32;
-
-                            // Only accept windows with real bounds (>= 100x100 pixels)
-                            if w >= 100 && h >= 100 {
-                                tracing::debug!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
-                                return Ok((x, y, w, h));
-                            } else {
-                                tracing::debug!(
-                                    "Skipping window for '{}': too small ({}x{})",
-                                    owner,
-                                    w,
-                                    h
-                                );
-                                continue;
-                            }
-                        } else {
-                            continue;
-                        }
-                    }
-                }
-            }
-        }
-
-        Err(anyhow::anyhow!(
-            "Could not find window bounds for '{}'",
-            app_name
-        ))
-    }
-}
-
-/// Get image dimensions from a PNG file
-fn get_image_dimensions(path: &str) -> Result<(i32, i32)> {
-    use std::fs::File;
-    use std::io::Read;
-
-    let mut file = File::open(path)?;
-    let mut buffer = vec![0u8; 24];
-    file.read_exact(&mut buffer)?;
-
-    // PNG signature check
-    if &buffer[0..8] != b"\x89PNG\r\n\x1a\n" {
-        anyhow::bail!("Not a valid PNG file");
-    }
-
-    // Read IHDR chunk (width and height are at bytes 16-23)
-    let width = u32::from_be_bytes([buffer[16], buffer[17], buffer[18], buffer[19]]) as i32;
-    let height = u32::from_be_bytes([buffer[20], buffer[21], buffer[22], buffer[23]]) as i32;
-
-    Ok((width, height))
-}
-
-/// Transform coordinates from screenshot space to screen space
-///
-/// The screenshot is taken of a window, and Vision OCR returns coordinates
-/// relative to the screenshot image. We need to transform these to actual
-/// screen coordinates for clicking.
-///
-/// On Retina displays, screenshots are taken at 2x resolution, so we need
-/// to account for this scaling factor.
-fn transform_screenshot_to_screen_coords(
-    location: TextLocation,
-    window_bounds: (i32, i32, i32, i32), // (x, y, width, height) in screen space
-    screenshot_dims: (i32, i32),         // (width, height) in pixels
-) -> TextLocation {
-    let (win_x, win_y, win_width, win_height) = window_bounds;
-    let (screenshot_width, screenshot_height) = screenshot_dims;
-
-    // Calculate scale factors
-    // On Retina displays, screenshot is typically 2x the window size
-    let scale_x = win_width as f64 / screenshot_width as f64;
-    let scale_y = win_height as f64 / screenshot_height as f64;
-
-    tracing::debug!(
-        "Transform: screenshot={}x{}, window={}x{} at ({},{}), scale=({:.2},{:.2})",
-        screenshot_width,
-        screenshot_height,
-        win_width,
-        win_height,
-        win_x,
-        win_y,
-        scale_x,
-        scale_y
-    );
-
-    // Transform coordinates from image space to screen space
-    // IMPORTANT: macOS screen coordinates have origin at BOTTOM-LEFT (Y increases upward)
-    // Image coordinates have origin at TOP-LEFT (Y increases downward)
-    // win_y is the BOTTOM of the window in screen coordinates
-    // So we need to: (win_y + win_height) to get window TOP, then subtract screenshot_y
-    let window_top_y = win_y + win_height;
-
-    tracing::debug!(
-        "[transform] Input location in image space: x={}, y={}, width={}, height={}",
-        location.x,
-        location.y,
-        location.width,
-        location.height
-    );
-    tracing::debug!(
-        "[transform] Scale factors: scale_x={:.4}, scale_y={:.4}",
-        scale_x,
-        scale_y
-    );
-
-    let transformed_x = win_x + (location.x as f64 * scale_x) as i32;
-    let transformed_y = window_top_y - (location.y as f64 * scale_y) as i32;
-    let transformed_width = (location.width as f64 * scale_x) as i32;
-    let transformed_height = (location.height as f64 * scale_y) as i32;
-
-    tracing::debug!("[transform] Calculation details:");
-    tracing::debug!(
-        "  - transformed_x = {} + ({} * {:.4}) = {} + {:.2} = {}",
-        win_x,
-        location.x,
-        scale_x,
-        win_x,
-        location.x as f64 * scale_x,
-        transformed_x
-    );
-    tracing::debug!(
-        "  - transformed_width = ({} * {:.4}) = {:.2} -> {}",
-        location.width,
-        scale_x,
-        location.width as f64 * scale_x,
-        transformed_width
-    );
-    tracing::debug!(
-        "  - transformed_height = ({} * {:.4}) = {:.2} -> {}",
-        location.height,
-        scale_y,
-        location.height as f64 * scale_y,
-        transformed_height
-    );
-
-    tracing::debug!(
-        "Transformed location: screenshot=({},{}) {}x{} -> screen=({},{}) {}x{}",
-        location.x,
-        location.y,
-        location.width,
-        location.height,
-        transformed_x,
-        transformed_y,
-        transformed_width,
-        transformed_height
-    );
-
-    TextLocation {
-        text: location.text,
-        x: transformed_x,
-        y: transformed_y,
-        width: transformed_width,
-        height: transformed_height,
-        confidence: location.confidence,
-    }
-}

 #[path = "macos_window_matching_test.rs"]
 #[cfg(test)]
--- a/crates/g3-computer-control/src/platform/windows.rs
+++ b/crates/g3-computer-control/src/platform/windows.rs
@@ -1,189 +1,32 @@
-use crate::{types::*, ComputerController};
+use crate::{types::Rect, ComputerController};
 use anyhow::Result;
 use async_trait::async_trait;
-use tesseract::Tesseract;
-use uuid::Uuid;

-pub struct WindowsController {
-    // Placeholder for Windows-specific state
-}
+pub struct WindowsController;

 impl WindowsController {
    pub fn new() -> Result<Self> {
        tracing::warn!("Windows computer control not fully implemented");
-        Ok(Self {})
+        Ok(Self)
    }
 }

 #[async_trait]
 impl ComputerController for WindowsController {
-    async fn move_mouse(&self, _x: i32, _y: i32) -> Result<()> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn click(&self, _button: MouseButton) -> Result<()> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn double_click(&self, _button: MouseButton) -> Result<()> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn type_text(&self, _text: &str) -> Result<()> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn press_key(&self, _key: &str) -> Result<()> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn list_windows(&self) -> Result<Vec<Window>> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn focus_window(&self, _window_id: &str) -> Result<()> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn get_window_bounds(&self, _window_id: &str) -> Result<Rect> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn find_element(&self, _selector: &ElementSelector) -> Result<Option<UIElement>> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn get_element_text(&self, _element_id: &str) -> Result<String> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
-    async fn get_element_bounds(&self, _element_id: &str) -> Result<Rect> {
-        anyhow::bail!("Windows implementation not yet available")
-    }
-
    async fn take_screenshot(
        &self,
        _path: &str,
        _region: Option<Rect>,
        _window_id: Option<&str>,
    ) -> Result<()> {
-        // Enforce that window_id must be provided
-        if _window_id.is_none() {
-            anyhow::bail!("window_id is required. You must specify which window to capture (e.g., 'Chrome', 'Terminal', 'Notepad'). Use list_windows to see available windows.");
-        }
-
-        anyhow::bail!("Windows implementation not yet available")
+        anyhow::bail!("Windows screenshot implementation not yet available")
    }

-    async fn extract_text_from_screen(&self, _region: Rect, _window_id: &str) -> Result<String> {
-        anyhow::bail!("Windows implementation not yet available")
+    fn move_mouse(&self, _x: i32, _y: i32) -> Result<()> {
+        anyhow::bail!("Windows mouse control not yet available")
    }

-    async fn extract_text_from_image(&self, _path: &str) -> Result<OCRResult> {
-        // Check if tesseract is available on the system
-        let tesseract_check = std::process::Command::new("where")
-            .arg("tesseract")
-            .output();
-
-        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!(
-                "Tesseract OCR is not installed on your system.\n\n\
-                To install tesseract on Windows:\n  \
-                1. Download the installer from: https://github.com/UB-Mannheim/tesseract/wiki\n  \
-                2. Run the installer and follow the instructions\n  \
-                3. Add tesseract to your PATH environment variable\n  \
-                4. Restart your terminal/command prompt\n\n\
-                After installation, restart your terminal and try again."
-            );
-        }
-
-        // Initialize Tesseract
-        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
-            anyhow::anyhow!(
-                "Failed to initialize Tesseract: {}\n\n\
-                    This usually means:\n1. Tesseract is not properly installed\n\
-                    2. Language data files are missing\n\nTo fix:\n  \
-                    1. Reinstall tesseract from https://github.com/UB-Mannheim/tesseract/wiki\n  \
-                    2. Make sure to select 'Additional language data' during installation\n  \
-                    3. Ensure tesseract is in your PATH",
-                e
-            )
-        })?;
-
-        let text = tess
-            .set_image(_path)
-            .map_err(|e| anyhow::anyhow!("Failed to load image '{}': {}", _path, e))?
-            .get_text()
-            .map_err(|e| anyhow::anyhow!("Failed to extract text from image: {}", e))?;
-
-        // Get confidence (simplified - would need more complex API calls for per-word confidence)
-        let confidence = 0.85; // Placeholder
-
-        Ok(OCRResult {
-            text,
-            confidence,
-            bounds: Rect {
-                x: 0,
-                y: 0,
-                width: 0,
-                height: 0,
-            }, // Would need image dimensions
-        })
-    }
-
-    async fn find_text_on_screen(&self, _text: &str) -> Result<Option<Point>> {
-        // Check if tesseract is available on the system
-        let tesseract_check = std::process::Command::new("where")
-            .arg("tesseract")
-            .output();
-
-        if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
-            anyhow::bail!(
-                "Tesseract OCR is not installed on your system.\n\n\
-                To install tesseract on Windows:\n  \
-                1. Download the installer from: https://github.com/UB-Mannheim/tesseract/wiki\n  \
-                2. Run the installer and follow the instructions\n  \
-                3. Add tesseract to your PATH environment variable\n  \
-                4. Restart your terminal/command prompt\n\n\
-                After installation, restart your terminal and try again."
-            );
-        }
-
-        // Take full screen screenshot
-        let temp_path = format!("C:\\\\Temp\\\\g3_ocr_search_{}.png", uuid::Uuid::new_v4());
-        self.take_screenshot(&temp_path, None, None).await?;
-
-        // Use Tesseract to find text with bounding boxes
-        let tess = Tesseract::new(None, Some("eng")).map_err(|e| {
-            anyhow::anyhow!(
-                "Failed to initialize Tesseract: {}\n\n\
-                    This usually means:\n1. Tesseract is not properly installed\n\
-                    2. Language data files are missing\n\nTo fix:\n  \
-                    1. Reinstall tesseract from https://github.com/UB-Mannheim/tesseract/wiki\n  \
-                    2. Make sure to select 'Additional language data' during installation\n  \
-                    3. Ensure tesseract is in your PATH",
-                e
-            )
-        })?;
-
-        let full_text = tess
-            .set_image(temp_path.as_str())
-            .map_err(|e| anyhow::anyhow!("Failed to load screenshot: {}", e))?
-            .get_text()
-            .map_err(|e| anyhow::anyhow!("Failed to extract text from screen: {}", e))?;
-
-        // Clean up temp file
-        let _ = std::fs::remove_file(&temp_path);
-
-        // Simple text search - full implementation would use get_component_images
-        // to get bounding boxes for each word
-        if full_text.contains(_text) {
-            tracing::warn!(
-                "Text found but precise coordinates not available in simplified implementation"
-            );
-            Ok(Some(Point { x: 0, y: 0 }))
-        } else {
-            Ok(None)
-        }
+    fn click_at(&self, _x: i32, _y: i32, _app_name: Option<&str>) -> Result<()> {
+        anyhow::bail!("Windows click control not yet available")
    }
 }
--- a/crates/g3-computer-control/src/types.rs
+++ b/crates/g3-computer-control/src/types.rs
@@ -7,13 +7,3 @@ pub struct Rect {
    pub width: i32,
    pub height: i32,
 }
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct TextLocation {
-    pub text: String,
-    pub x: i32,
-    pub y: i32,
-    pub width: i32,
-    pub height: i32,
-    pub confidence: f32,
-}
--- a/crates/g3-computer-control/vision-bridge/Package.swift
+++ b/crates/g3-computer-control/vision-bridge/Package.swift
@@ -1,24 +0,0 @@
-// swift-tools-version:5.9
-import PackageDescription
-
-let package = Package(
-    name: "VisionBridge",
-    platforms: [
-        .macOS(.v11)
-    ],
-    products: [
-        .library(
-            name: "VisionBridge",
-            type: .dynamic,
-            targets: ["VisionBridge"]
-        ),
-    ],
-    targets: [
-        .target(
-            name: "VisionBridge",
-            dependencies: [],
-            path: "Sources/VisionBridge",
-            publicHeadersPath: "."
-        ),
-    ]
-)
--- a/crates/g3-computer-control/vision-bridge/Sources/VisionBridge/VisionBridge.h
+++ b/crates/g3-computer-control/vision-bridge/Sources/VisionBridge/VisionBridge.h
@@ -1,39 +0,0 @@
-#ifndef VisionBridge_h
-#define VisionBridge_h
-
-#include <stdint.h>
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Text box structure for FFI
-typedef struct {
-    const char* text;
-    uint32_t text_len;
-    int32_t x;
-    int32_t y;
-    int32_t width;
-    int32_t height;
-    float confidence;
-} VisionTextBox;
-
-// Recognize text in an image and return bounding boxes
-// Returns true on success, false on failure
-// Caller must free the returned boxes using vision_free_boxes
-bool vision_recognize_text(
-    const char* image_path,
-    uint32_t image_path_len,
-    VisionTextBox** out_boxes,
-    uint32_t* out_count
-);
-
-// Free memory allocated by vision_recognize_text
-void vision_free_boxes(VisionTextBox* boxes, uint32_t count);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* VisionBridge_h */
--- a/crates/g3-computer-control/vision-bridge/Sources/VisionBridge/VisionOCR.swift
+++ b/crates/g3-computer-control/vision-bridge/Sources/VisionBridge/VisionOCR.swift
@@ -1,145 +0,0 @@
-import Foundation
-import Vision
-import AppKit
-import CoreGraphics
-
-// MARK: - C Bridge Functions
-
-@_cdecl("vision_recognize_text")
-public func vision_recognize_text(
-    _ imagePath: UnsafePointer<CChar>,
-    _ imagePathLen: UInt32,
-    _ outBoxes: UnsafeMutablePointer<UnsafeMutableRawPointer?>,
-    _ outCount: UnsafeMutablePointer<UInt32>
-) -> Bool {
-    // Convert C string to Swift String
-    guard let pathData = Data(bytes: imagePath, count: Int(imagePathLen)).withUnsafeBytes({
-        String(bytes: $0, encoding: .utf8)
-    }) else {
-        return false
-    }
-    
-    let path = pathData.trimmingCharacters(in: .whitespaces)
-    
-    // Load image
-    guard let image = NSImage(contentsOfFile: path),
-          let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
-        return false
-    }
-    
-    // Perform OCR
-    var textBoxes: [CTextBox] = []
-    let semaphore = DispatchSemaphore(value: 0)
-    var success = false
-    
-    let request = VNRecognizeTextRequest { request, error in
-        defer { semaphore.signal() }
-        
-        if let error = error {
-            print("Vision OCR error: \(error.localizedDescription)")
-            return
-        }
-        
-        guard let observations = request.results as? [VNRecognizedTextObservation] else {
-            return
-        }
-        
-        let imageSize = CGSize(width: cgImage.width, height: cgImage.height)
-        
-        for observation in observations {
-            guard let candidate = observation.topCandidates(1).first else { continue }
-            
-            let text = candidate.string
-            let boundingBox = observation.boundingBox
-            
-            // Convert normalized coordinates (bottom-left origin) to pixel coordinates (top-left origin)
-            let x = Int32(boundingBox.origin.x * imageSize.width)
-            let y = Int32((1.0 - boundingBox.origin.y - boundingBox.height) * imageSize.height)
-            let width = Int32(boundingBox.width * imageSize.width)
-            let height = Int32(boundingBox.height * imageSize.height)
-            
-            // Allocate C string for text
-            let cString = strdup(text)
-            
-            textBoxes.append(CTextBox(
-                text: cString,
-                text_len: UInt32(text.utf8.count),
-                x: x,
-                y: y,
-                width: width,
-                height: height,
-                confidence: observation.confidence
-            ))
-        }
-        
-        success = true
-    }
-    
-    // Configure request for best accuracy
-    request.recognitionLevel = .accurate
-    request.usesLanguageCorrection = true
-    request.recognitionLanguages = ["en-US"]
-    
-    // Perform request
-    let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
-    do {
-        try handler.perform([request])
-    } catch {
-        print("Vision request failed: \(error.localizedDescription)")
-        return false
-    }
-    
-    // Wait for completion
-    semaphore.wait()
-    
-    if !success {
-        return false
-    }
-    
-    // Allocate array for results
-    let boxesPtr = UnsafeMutablePointer<CTextBox>.allocate(capacity: textBoxes.count)
-    for (index, box) in textBoxes.enumerated() {
-        boxesPtr[index] = box
-    }
-    
-    outBoxes.pointee = UnsafeMutableRawPointer(boxesPtr)
-    outCount.pointee = UInt32(textBoxes.count)
-    
-    return true
-}
-
-@_cdecl("vision_free_boxes")
-public func vision_free_boxes(
-    _ boxes: UnsafeMutableRawPointer,
-    _ count: UInt32
-) {
-    let typedBoxes = boxes.assumingMemoryBound(to: CTextBox.self)
-    for i in 0..<Int(count) {
-        if let text = typedBoxes[i].text {
-            free(UnsafeMutableRawPointer(mutating: text))
-        }
-    }
-    typedBoxes.deallocate()
-}
-
-// MARK: - C-Compatible Structure
-
-public struct CTextBox {
-    public let text: UnsafePointer<CChar>?
-    public let text_len: UInt32
-    public let x: Int32
-    public let y: Int32
-    public let width: Int32
-    public let height: Int32
-    public let confidence: Float
-    
-    public init(text: UnsafePointer<CChar>?, text_len: UInt32, x: Int32, y: Int32, width: Int32, height: Int32, confidence: Float) {
-        self.text = text
-        self.text_len = text_len
-        self.x = x
-        self.y = y
-        self.width = width
-        self.height = height
-        self.confidence = confidence
-    }
-}