add context window monitor

Writes the current context window to logs/current_context_window (uses a symlink to a session ID).

This PR was unfortunately generated by a different LLM and did a ton of superficial reformating, it's actually a fairly small and benign change, but I don't want to roll back everything. Hope that's ok.
This commit is contained in:
Jochen
2025-11-27 21:00:02 +11:00
parent 93dc4acf86
commit 52f78653b4
89 changed files with 4040 additions and 2576 deletions

View File

@@ -3,19 +3,19 @@ use core_graphics::display::CGDisplay;
fn main() {
let display = CGDisplay::main();
let image = display.image().expect("Failed to capture screen");
println!("CGImage properties:");
println!(" Width: {}", image.width());
println!(" Height: {}", image.height());
println!(" Bits per component: {}", image.bits_per_component());
println!(" Bits per pixel: {}", image.bits_per_pixel());
println!(" Bytes per row: {}", image.bytes_per_row());
let data = image.data();
let expected_size = image.width() * image.height() * 4;
println!(" Data length: {}", data.len());
println!(" Expected (w*h*4): {}", expected_size);
// Check if there's padding in rows
let bytes_per_row = image.bytes_per_row();
let width = image.width();
@@ -23,16 +23,25 @@ fn main() {
println!("\nRow alignment:");
println!(" Actual bytes per row: {}", bytes_per_row);
println!(" Expected (width * 4): {}", expected_bytes_per_row);
println!(" Padding per row: {}", bytes_per_row - expected_bytes_per_row);
println!(
" Padding per row: {}",
bytes_per_row - expected_bytes_per_row
);
// Sample some pixels from different locations
println!("\nFirst 3 pixels (raw bytes):");
for i in 0..3 {
let offset = i * 4;
println!(" Pixel {}: [{:3}, {:3}, {:3}, {:3}]",
i, data[offset], data[offset+1], data[offset+2], data[offset+3]);
println!(
" Pixel {}: [{:3}, {:3}, {:3}, {:3}]",
i,
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3]
);
}
// Check a pixel from the middle
let mid_row = image.height() / 2;
let mid_col = image.width() / 2;
@@ -40,7 +49,12 @@ fn main() {
println!("\nMiddle pixel (row {}, col {}):", mid_row, mid_col);
println!(" Offset: {}", mid_offset);
if mid_offset + 3 < data.len() as usize {
println!(" Bytes: [{:3}, {:3}, {:3}, {:3}]",
data[mid_offset], data[mid_offset+1], data[mid_offset+2], data[mid_offset+3]);
println!(
" Bytes: [{:3}, {:3}, {:3}, {:3}]",
data[mid_offset],
data[mid_offset + 1],
data[mid_offset + 2],
data[mid_offset + 3]
);
}
}

View File

@@ -1,34 +1,38 @@
use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
use core_foundation::base::{TCFType, ToVoid};
use core_foundation::dictionary::CFDictionary;
use core_foundation::string::CFString;
use core_foundation::base::{TCFType, ToVoid};
use core_graphics::window::{
kCGNullWindowID, kCGWindowListOptionOnScreenOnly, CGWindowListCopyWindowInfo,
};
fn main() {
println!("Listing all on-screen windows...");
println!("{:<10} {:<25} {}", "Window ID", "Owner", "Title");
println!("{}", "-".repeat(80));
unsafe {
let window_list = CGWindowListCopyWindowInfo(
kCGWindowListOptionOnScreenOnly,
kCGNullWindowID
);
let count = core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list).len();
let array = core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
let window_list =
CGWindowListCopyWindowInfo(kCGWindowListOptionOnScreenOnly, kCGNullWindowID);
let count =
core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list)
.len();
let array =
core_foundation::array::CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
for i in 0..count {
let dict = array.get(i).unwrap();
// Get window ID
let window_id_key = CFString::from_static_string("kCGWindowNumber");
let window_id: i64 = if let Some(value) = dict.find(window_id_key.to_void()) {
let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
let num: core_foundation::number::CFNumber =
TCFType::wrap_under_get_rule(*value as *const _);
num.to_i64().unwrap_or(0)
} else {
0
};
// Get owner name
let owner_key = CFString::from_static_string("kCGWindowOwnerName");
let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
@@ -37,7 +41,7 @@ fn main() {
} else {
"Unknown".to_string()
};
// Get window name/title
let name_key = CFString::from_static_string("kCGWindowName");
let title: String = if let Some(value) = dict.find(name_key.to_void()) {
@@ -46,7 +50,7 @@ fn main() {
} else {
"".to_string()
};
// Show all windows
if !owner.is_empty() {
println!("{:<10} {:<25} {}", window_id, owner, title);

View File

@@ -11,11 +11,11 @@ use g3_computer_control::MacAxController;
async fn main() -> Result<()> {
println!("🍎 macOS Accessibility API Demo\n");
println!("This demo shows how to control macOS applications using the Accessibility API.\n");
// Create controller
let controller = MacAxController::new()?;
println!("✅ MacAxController initialized\n");
// List running applications
println!("📱 Listing running applications:");
match controller.list_applications() {
@@ -30,7 +30,7 @@ async fn main() -> Result<()> {
Err(e) => println!(" ❌ Error: {}", e),
}
println!();
// Get frontmost app
println!("🎯 Getting frontmost application:");
match controller.get_frontmost_app() {
@@ -38,16 +38,16 @@ async fn main() -> Result<()> {
Err(e) => println!(" ❌ Error: {}", e),
}
println!();
// Example: Activate Finder and get its UI tree
println!("📂 Activating Finder and inspecting UI:");
match controller.activate_app("Finder") {
Ok(_) => {
println!(" ✅ Finder activated");
// Wait a moment for activation
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
// Get UI tree
match controller.get_ui_tree("Finder", 2) {
Ok(tree) => {
@@ -62,13 +62,13 @@ async fn main() -> Result<()> {
Err(e) => println!(" ❌ Error: {}", e),
}
println!();
println!("✨ Demo complete!\n");
println!("💡 Tips:");
println!(" - Use --macax flag with g3 to enable these tools");
println!(" - Grant accessibility permissions in System Preferences");
println!(" - Add accessibility identifiers to your apps for easier automation");
println!(" - See docs/macax-tools.md for full documentation\n");
Ok(())
}

View File

@@ -1,64 +1,66 @@
use g3_computer_control::SafariDriver;
use g3_computer_control::webdriver::WebDriverController;
use anyhow::Result;
use g3_computer_control::webdriver::WebDriverController;
use g3_computer_control::SafariDriver;
#[tokio::main]
async fn main() -> Result<()> {
println!("Safari WebDriver Demo");
println!("=====================\n");
println!("Make sure to:");
println!("1. Enable 'Allow Remote Automation' in Safari's Develop menu");
println!("2. Run: /usr/bin/safaridriver --enable");
println!("3. Start safaridriver in another terminal: safaridriver --port 4444\n");
println!("Connecting to SafariDriver...");
let mut driver = SafariDriver::new().await?;
println!("✅ Connected!\n");
// Navigate to a website
println!("Navigating to example.com...");
driver.navigate("https://example.com").await?;
println!("✅ Navigated\n");
// Get page title
let title = driver.title().await?;
println!("Page title: {}\n", title);
// Get current URL
let url = driver.current_url().await?;
println!("Current URL: {}\n", url);
// Find an element
println!("Finding h1 element...");
let h1 = driver.find_element("h1").await?;
let h1_text = h1.text().await?;
println!("H1 text: {}\n", h1_text);
// Find all paragraphs
println!("Finding all paragraphs...");
let paragraphs = driver.find_elements("p").await?;
println!("Found {} paragraphs\n", paragraphs.len());
// Get page source
println!("Getting page source...");
let source = driver.page_source().await?;
println!("Page source length: {} bytes\n", source.len());
// Execute JavaScript
println!("Executing JavaScript...");
let result = driver.execute_script("return document.title", vec![]).await?;
let result = driver
.execute_script("return document.title", vec![])
.await?;
println!("JS result: {:?}\n", result);
// Take a screenshot
println!("Taking screenshot...");
driver.screenshot("/tmp/safari_demo.png").await?;
println!("✅ Screenshot saved to /tmp/safari_demo.png\n");
// Close the browser
println!("Closing browser...");
driver.quit().await?;
println!("✅ Done!");
Ok(())
}

View File

@@ -3,10 +3,13 @@ use g3_computer_control::create_controller;
#[tokio::main]
async fn main() {
println!("Testing screenshot with permission prompt...");
let controller = create_controller().expect("Failed to create controller");
match controller.take_screenshot("/tmp/test_with_prompt.png", None, None).await {
match controller
.take_screenshot("/tmp/test_with_prompt.png", None, None)
.await
{
Ok(_) => {
println!("\n✅ Screenshot saved to /tmp/test_with_prompt.png");
println!("Opening screenshot...");

View File

@@ -2,29 +2,33 @@ use std::process::Command;
fn main() {
let path = "/tmp/rust_screencapture_test.png";
println!("Testing screencapture command from Rust...");
let mut cmd = Command::new("screencapture");
cmd.arg("-x"); // No sound
cmd.arg(path);
println!("Command: {:?}", cmd);
match cmd.output() {
Ok(output) => {
println!("Exit status: {}", output.status);
println!("Stdout: {}", String::from_utf8_lossy(&output.stdout));
println!("Stderr: {}", String::from_utf8_lossy(&output.stderr));
if output.status.success() {
println!("\n✅ Screenshot saved to: {}", path);
// Check file exists and size
if let Ok(metadata) = std::fs::metadata(path) {
println!("File size: {} bytes ({:.1} MB)", metadata.len(), metadata.len() as f64 / 1_000_000.0);
println!(
"File size: {} bytes ({:.1} MB)",
metadata.len(),
metadata.len() as f64 / 1_000_000.0
);
}
// Open it
let _ = Command::new("open").arg(path).spawn();
println!("\nOpened screenshot - please verify it looks correct!");

View File

@@ -4,17 +4,23 @@ use image::{ImageBuffer, RgbaImage};
fn main() {
let display = CGDisplay::main();
let image = display.image().expect("Failed to capture screen");
let width = image.width() as u32;
let height = image.height() as u32;
let bytes_per_row = image.bytes_per_row() as usize;
let data = image.data();
println!("Testing screenshot fix...");
println!("Image: {}x{}, bytes_per_row: {}", width, height, bytes_per_row);
println!(
"Image: {}x{}, bytes_per_row: {}",
width, height, bytes_per_row
);
println!("Expected bytes per row: {}", width * 4);
println!("Padding per row: {} bytes", bytes_per_row - (width as usize * 4));
println!(
"Padding per row: {} bytes",
bytes_per_row - (width as usize * 4)
);
// OLD METHOD (broken) - treating data as continuous
println!("\n=== OLD METHOD (BROKEN) ===");
let mut old_rgba = Vec::with_capacity(data.len() as usize);
@@ -26,14 +32,14 @@ fn main() {
}
println!("Converted {} pixels", old_rgba.len() / 4);
println!("Expected {} pixels", width * height);
// NEW METHOD (fixed) - handling row padding
println!("\n=== NEW METHOD (FIXED) ===");
let mut new_rgba = Vec::with_capacity((width * height * 4) as usize);
for row in 0..height as usize {
let row_start = row * bytes_per_row;
let row_end = row_start + (width as usize * 4);
for chunk in data[row_start..row_end].chunks_exact(4) {
new_rgba.push(chunk[2]); // R
new_rgba.push(chunk[1]); // G
@@ -43,26 +49,34 @@ fn main() {
}
println!("Converted {} pixels", new_rgba.len() / 4);
println!("Expected {} pixels", width * height);
// Save a small crop from both methods
let crop_size = 200;
// Old method crop
let old_crop: Vec<u8> = old_rgba.iter().take((crop_size * crop_size * 4) as usize).copied().collect();
let old_crop: Vec<u8> = old_rgba
.iter()
.take((crop_size * crop_size * 4) as usize)
.copied()
.collect();
if let Some(old_img) = ImageBuffer::from_raw(crop_size, crop_size, old_crop) {
let old_img: RgbaImage = old_img;
old_img.save("/tmp/screenshot_old_method.png").unwrap();
println!("\nSaved OLD method crop to: /tmp/screenshot_old_method.png");
}
// New method crop
let new_crop: Vec<u8> = new_rgba.iter().take((crop_size * crop_size * 4) as usize).copied().collect();
let new_crop: Vec<u8> = new_rgba
.iter()
.take((crop_size * crop_size * 4) as usize)
.copied()
.collect();
if let Some(new_img) = ImageBuffer::from_raw(crop_size, crop_size, new_crop) {
let new_img: RgbaImage = new_img;
new_img.save("/tmp/screenshot_new_method.png").unwrap();
println!("Saved NEW method crop to: /tmp/screenshot_new_method.png");
}
println!("\nOpen both images to compare:");
println!(" open /tmp/screenshot_old_method.png /tmp/screenshot_new_method.png");
}

View File

@@ -6,43 +6,43 @@ use g3_computer_control::MacAxController;
#[tokio::main]
async fn main() -> Result<()> {
println!("🧪 Testing macax type_text functionality\n");
let controller = MacAxController::new()?;
println!("✅ Controller initialized\n");
// Test 1: Type simple text
println!("Test 1: Typing simple text into TextEdit");
println!(" Please open TextEdit and create a new document...");
std::thread::sleep(std::time::Duration::from_secs(3));
match controller.type_text("TextEdit", "Hello, World!") {
Ok(_) => println!(" ✅ Successfully typed simple text\n"),
Err(e) => println!(" ❌ Failed: {}\n", e),
}
std::thread::sleep(std::time::Duration::from_secs(1));
// Test 2: Type unicode and emojis
println!("Test 2: Typing unicode and emojis");
match controller.type_text("TextEdit", "\n🌟 Unicode test: café, naïve, 日本語 🎉") {
Ok(_) => println!(" ✅ Successfully typed unicode text\n"),
Err(e) => println!(" ❌ Failed: {}\n", e),
}
std::thread::sleep(std::time::Duration::from_secs(1));
// Test 3: Type special characters
println!("Test 3: Typing special characters");
match controller.type_text("TextEdit", "\nSpecial: @#$%^&*()_+-=[]{}|;':,.<>?/") {
Ok(_) => println!(" ✅ Successfully typed special characters\n"),
Err(e) => println!(" ❌ Failed: {}\n", e),
}
println!("\n✨ Tests complete!");
println!("\n💡 Now try with Things3:");
println!(" 1. Open Things3");
println!(" 2. Press Cmd+N to create a new task");
println!(" 3. Run: g3 --macax 'type \"🌟 My awesome task\" into Things'");
Ok(())
}

View File

@@ -1,63 +1,67 @@
use g3_computer_control::ocr::{OCREngine, DefaultOCR};
use anyhow::Result;
use g3_computer_control::ocr::{DefaultOCR, OCREngine};
#[tokio::main]
async fn main() -> Result<()> {
println!("🧪 Testing Apple Vision OCR");
println!("===========================\n");
// Initialize OCR engine
println!("📦 Initializing OCR engine...");
let ocr = DefaultOCR::new()?;
println!("✅ OCR engine: {}\n", ocr.name());
// Check if test image exists
let test_image = "/tmp/safari_test.png";
if !std::path::Path::new(test_image).exists() {
println!("⚠️ Test image not found: {}", test_image);
println!(" Creating a screenshot...");
let status = std::process::Command::new("screencapture")
.arg("-x")
.arg("-R")
.arg("0,0,1200,800")
.arg(test_image)
.status()?;
if !status.success() {
anyhow::bail!("Failed to create screenshot");
}
println!("✅ Screenshot created\n");
}
// Run OCR
println!("🔍 Running Apple Vision OCR on {}...", test_image);
let start = std::time::Instant::now();
let locations = ocr.extract_text_with_locations(test_image).await?;
let duration = start.elapsed();
println!("✅ OCR completed in {:.3}s\n", duration.as_secs_f64());
// Display results
println!("📊 Results:");
println!(" Found {} text elements\n", locations.len());
if locations.is_empty() {
println!("⚠️ No text found in image");
} else {
println!(" Top 20 results:");
println!(" {:<4} {:<40} {:<15} {:<12} {:<8}", "#", "Text", "Position", "Size", "Conf");
println!(
" {:<4} {:<40} {:<15} {:<12} {:<8}",
"#", "Text", "Position", "Size", "Conf"
);
println!(" {}", "-".repeat(85));
for (i, loc) in locations.iter().take(20).enumerate() {
let text = if loc.text.len() > 37 {
format!("{}...", &loc.text[..37])
} else {
loc.text.clone()
};
println!(" {:<4} {:<40} ({:>4},{:>4}) {:>4}x{:<4} {:.2}",
println!(
" {:<4} {:<40} ({:>4},{:>4}) {:>4}x{:<4} {:.2}",
i + 1,
text,
loc.x,
@@ -67,19 +71,22 @@ async fn main() -> Result<()> {
loc.confidence
);
}
if locations.len() > 20 {
println!("\n ... and {} more", locations.len() - 20);
}
// Performance comparison
println!("\n📈 Performance:");
println!(" OCR Speed: {:.3}s", duration.as_secs_f64());
println!(" Text elements: {}", locations.len());
println!(" Avg per element: {:.1}ms", duration.as_millis() as f64 / locations.len() as f64);
println!(
" Avg per element: {:.1}ms",
duration.as_millis() as f64 / locations.len() as f64
);
}
println!("\n✅ Test complete!");
Ok(())
}

View File

@@ -3,36 +3,46 @@ use g3_computer_control::create_controller;
#[tokio::main]
async fn main() {
println!("Testing window-specific screenshot capture...");
let controller = create_controller().expect("Failed to create controller");
// Test 1: Capture iTerm2 window
println!("\n1. Capturing iTerm2 window...");
match controller.take_screenshot("/tmp/iterm_window.png", None, Some("iTerm2")).await {
match controller
.take_screenshot("/tmp/iterm_window.png", None, Some("iTerm2"))
.await
{
Ok(_) => {
println!(" ✅ iTerm2 window captured to /tmp/iterm_window.png");
let _ = std::process::Command::new("open").arg("/tmp/iterm_window.png").spawn();
let _ = std::process::Command::new("open")
.arg("/tmp/iterm_window.png")
.spawn();
}
Err(e) => println!(" ❌ Failed: {}", e),
}
// Wait a moment for the image to open
tokio::time::sleep(tokio::time::Duration::from_secs(2)).await;
// Test 2: Full screen capture for comparison
println!("\n2. Capturing full screen for comparison...");
match controller.take_screenshot("/tmp/fullscreen.png", None, None).await {
match controller
.take_screenshot("/tmp/fullscreen.png", None, None)
.await
{
Ok(_) => {
println!(" ✅ Full screen captured to /tmp/fullscreen.png");
let _ = std::process::Command::new("open").arg("/tmp/fullscreen.png").spawn();
let _ = std::process::Command::new("open")
.arg("/tmp/fullscreen.png")
.spawn();
}
Err(e) => println!(" ❌ Failed: {}", e),
}
println!("\n=== Comparison ===");
println!("iTerm window: /tmp/iterm_window.png (should show ONLY iTerm window)");
println!("Full screen: /tmp/fullscreen.png (should show entire desktop)");
// Show file sizes
if let Ok(meta1) = std::fs::metadata("/tmp/iterm_window.png") {
if let Ok(meta2) = std::fs::metadata("/tmp/fullscreen.png") {