Merge branch 'main' into micn/fix-anthropic-1p
* main: control commands for machine mode Fix duplicate dump at end minor --machine mode flag for verbose CLI output fixed x,y detection in vision click screenshotting bug fix test Native api for screen capture replace tesseract with apple vision more macax tooling coach rigor +++ thinning message highlighted warnings fix macax tools control commands Add --interactive-requirements flag for AI-enhanced requirements mode
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,6 +2,7 @@
|
||||
# will have compiled files and executables
|
||||
debug
|
||||
target
|
||||
.build
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
325
Cargo.lock
generated
325
Cargo.lock
generated
@@ -2,6 +2,28 @@
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "accessibility"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ac9f33ffc1ef16eddb2451c03c983e56a5182ac760c3f2733da55ba8f48eac4"
|
||||
dependencies = [
|
||||
"accessibility-sys",
|
||||
"cocoa 0.26.1",
|
||||
"core-foundation 0.10.1",
|
||||
"objc",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "accessibility-sys"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46a6a8e90a1d8b96a48249e7c8f5b4058447bea8847280db7bfccb6dcab6b8e1"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "adler2"
|
||||
version = "2.0.1"
|
||||
@@ -114,7 +136,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -196,28 +218,6 @@ version = "0.22.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.64.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c4243e6031260db77ede97ad86c27e501d646a27ab57b59a574f725d98ab1fb4"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"cexpr",
|
||||
"clang-sys",
|
||||
"lazy_static",
|
||||
"lazycell",
|
||||
"log",
|
||||
"peeking_take_while",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 1.0.109",
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bindgen"
|
||||
version = "0.69.5"
|
||||
@@ -237,7 +237,7 @@ dependencies = [
|
||||
"regex",
|
||||
"rustc-hash",
|
||||
"shlex",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
"which",
|
||||
]
|
||||
|
||||
@@ -318,9 +318,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.2.41"
|
||||
version = "1.2.43"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7"
|
||||
checksum = "739eb0f94557554b3ca9a86d2d37bebd49c5e6d0c1d2bda35ba5bdac830befc2"
|
||||
dependencies = [
|
||||
"find-msvc-tools",
|
||||
"jobserver",
|
||||
@@ -411,7 +411,7 @@ dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -437,9 +437,25 @@ checksum = "f6140449f97a6e97f9511815c5632d84c8aacf8ac271ad77c559218161a1373c"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"block",
|
||||
"cocoa-foundation",
|
||||
"cocoa-foundation 0.1.2",
|
||||
"core-foundation 0.9.4",
|
||||
"core-graphics",
|
||||
"core-graphics 0.23.2",
|
||||
"foreign-types 0.5.0",
|
||||
"libc",
|
||||
"objc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cocoa"
|
||||
version = "0.26.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad36507aeb7e16159dfe68db81ccc27571c3ccd4b76fb2fb72fc59e7a4b1b64c"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"block",
|
||||
"cocoa-foundation 0.2.1",
|
||||
"core-foundation 0.10.1",
|
||||
"core-graphics 0.24.0",
|
||||
"foreign-types 0.5.0",
|
||||
"libc",
|
||||
"objc",
|
||||
@@ -454,11 +470,24 @@ dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"block",
|
||||
"core-foundation 0.9.4",
|
||||
"core-graphics-types",
|
||||
"core-graphics-types 0.1.3",
|
||||
"libc",
|
||||
"objc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cocoa-foundation"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81411967c50ee9a1fc11365f8c585f863a22a9697c89239c452292c40ba79b0d"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"block",
|
||||
"core-foundation 0.10.1",
|
||||
"core-graphics-types 0.2.0",
|
||||
"objc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "color_quant"
|
||||
version = "1.1.0"
|
||||
@@ -635,7 +664,20 @@ checksum = "c07782be35f9e1140080c6b96f0d44b739e2278479f64e02fdab4e32dfd8b081"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"core-foundation 0.9.4",
|
||||
"core-graphics-types",
|
||||
"core-graphics-types 0.1.3",
|
||||
"foreign-types 0.5.0",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-graphics"
|
||||
version = "0.24.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa95a34622365fa5bbf40b20b75dba8dfa8c94c734aea8ac9a5ca38af14316f1"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"core-foundation 0.10.1",
|
||||
"core-graphics-types 0.2.0",
|
||||
"foreign-types 0.5.0",
|
||||
"libc",
|
||||
]
|
||||
@@ -651,6 +693,17 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-graphics-types"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb"
|
||||
dependencies = [
|
||||
"bitflags 2.10.0",
|
||||
"core-foundation 0.10.1",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.17"
|
||||
@@ -692,7 +745,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strict",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -831,7 +884,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"strsim",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -842,14 +895,14 @@ checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
|
||||
dependencies = [
|
||||
"darling_core",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deranged"
|
||||
version = "0.5.4"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071"
|
||||
checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
|
||||
dependencies = [
|
||||
"powerfmt",
|
||||
]
|
||||
@@ -864,7 +917,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustc_version",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -885,7 +938,7 @@ dependencies = [
|
||||
"convert_case 0.7.1",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -937,7 +990,7 @@ dependencies = [
|
||||
"libc",
|
||||
"option-ext",
|
||||
"redox_users 0.5.2",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -948,7 +1001,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -962,9 +1015,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "document-features"
|
||||
version = "0.2.11"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "95249b50c6c185bee49034bcb378a49dc2b5dff0be90ff6616d31d64febab05d"
|
||||
checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
|
||||
dependencies = [
|
||||
"litrs",
|
||||
]
|
||||
@@ -1009,7 +1062,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1091,9 +1144,9 @@ checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.1.4"
|
||||
version = "1.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
|
||||
checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
@@ -1138,7 +1191,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1218,7 +1271,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1287,18 +1340,18 @@ dependencies = [
|
||||
name = "g3-computer-control"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"accessibility",
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"cocoa",
|
||||
"core-foundation 0.9.4",
|
||||
"core-graphics",
|
||||
"cocoa 0.25.0",
|
||||
"core-foundation 0.10.1",
|
||||
"core-graphics 0.23.2",
|
||||
"fantoccini",
|
||||
"image",
|
||||
"objc",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"shellexpand",
|
||||
"tesseract",
|
||||
"thiserror 1.0.69",
|
||||
"tokio",
|
||||
"tracing",
|
||||
@@ -1518,11 +1571,11 @@ checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||
|
||||
[[package]]
|
||||
name = "home"
|
||||
version = "0.5.11"
|
||||
version = "0.5.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
|
||||
checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
|
||||
dependencies = [
|
||||
"windows-sys 0.59.0",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1869,9 +1922,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "indoc"
|
||||
version = "2.0.6"
|
||||
version = "2.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
|
||||
checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
|
||||
dependencies = [
|
||||
"rustversion",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "instability"
|
||||
@@ -1883,7 +1939,7 @@ dependencies = [
|
||||
"indoc",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1894,9 +1950,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
@@ -2004,7 +2060,7 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"regex",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2025,28 +2081,6 @@ version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
|
||||
|
||||
[[package]]
|
||||
name = "leptonica-plumbing"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc7a74c43d6f090d39158d233f326f47cd8bba545217595c93662b4e31156f42"
|
||||
dependencies = [
|
||||
"leptonica-sys",
|
||||
"libc",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "leptonica-sys"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da627c72b2499a8106f4dd33143843015e4a631f445d561f3481f7fba35b6151"
|
||||
dependencies = [
|
||||
"bindgen 0.64.0",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.177"
|
||||
@@ -2102,9 +2136,9 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
||||
|
||||
[[package]]
|
||||
name = "litrs"
|
||||
version = "0.4.2"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f5e54036fe321fd421e10d732f155734c4e4afd610dd556d9a82833ab3ee0bed"
|
||||
checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
|
||||
|
||||
[[package]]
|
||||
name = "llama_cpp"
|
||||
@@ -2127,7 +2161,7 @@ version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "037a1881ada3592c6a922224d5177b4b4f452e6b2979eb97393b71989e48357f"
|
||||
dependencies = [
|
||||
"bindgen 0.69.5",
|
||||
"bindgen",
|
||||
"cc",
|
||||
"link-cplusplus",
|
||||
"once_cell",
|
||||
@@ -2220,14 +2254,14 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "mio"
|
||||
version = "1.0.4"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
|
||||
checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
"wasi",
|
||||
"windows-sys 0.59.0",
|
||||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2299,7 +2333,7 @@ version = "0.50.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2375,9 +2409,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "once_cell_polyfill"
|
||||
version = "1.70.1"
|
||||
version = "1.70.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
|
||||
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
|
||||
|
||||
[[package]]
|
||||
name = "openssl"
|
||||
@@ -2402,7 +2436,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2474,12 +2508,6 @@ version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
|
||||
|
||||
[[package]]
|
||||
name = "peeking_take_while"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
|
||||
|
||||
[[package]]
|
||||
name = "percent-encoding"
|
||||
version = "2.3.2"
|
||||
@@ -2516,7 +2544,7 @@ dependencies = [
|
||||
"pest_meta",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2597,14 +2625,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.101"
|
||||
version = "1.0.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
|
||||
checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
@@ -2876,7 +2904,7 @@ dependencies = [
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.11.0",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3002,7 +3030,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3097,9 +3125,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-mio"
|
||||
version = "0.2.4"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd"
|
||||
checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"mio",
|
||||
@@ -3196,25 +3224,14 @@ dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.109"
|
||||
version = "2.0.108"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.107"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b"
|
||||
checksum = "da58917d35242480a05c2897064da0a80589a2a0476c9a3f2fdc83b53502e917"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -3241,7 +3258,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3275,7 +3292,7 @@ dependencies = [
|
||||
"getrandom 0.3.4",
|
||||
"once_cell",
|
||||
"rustix 1.1.2",
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3294,40 +3311,6 @@ dependencies = [
|
||||
"unicode-width 0.1.14",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tesseract"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ee0c2c608b63817b095f7fded5c50add36a29e2be2b2fc4901357163329290a"
|
||||
dependencies = [
|
||||
"tesseract-plumbing",
|
||||
"tesseract-sys",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tesseract-plumbing"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e496d3e29eba540a276975394b85dccb5fd344b3eefb743d9286c8150f766d5"
|
||||
dependencies = [
|
||||
"leptonica-plumbing",
|
||||
"tesseract-sys",
|
||||
"thiserror 1.0.69",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tesseract-sys"
|
||||
version = "0.5.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bd33f6f216124cfaf0fa86c2c0cdf04da39b6257bd78c5e44fa4fa98c3a5857b"
|
||||
dependencies = [
|
||||
"bindgen 0.64.0",
|
||||
"leptonica-sys",
|
||||
"pkg-config",
|
||||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.69"
|
||||
@@ -3354,7 +3337,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3365,7 +3348,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3463,7 +3446,7 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3589,7 +3572,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3651,9 +3634,9 @@ checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.19"
|
||||
version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
|
||||
checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
@@ -3794,7 +3777,7 @@ dependencies = [
|
||||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
@@ -3829,7 +3812,7 @@ checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
@@ -3952,7 +3935,7 @@ version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
|
||||
dependencies = [
|
||||
"windows-sys 0.61.2",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4001,7 +3984,7 @@ checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4012,7 +3995,7 @@ checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4408,7 +4391,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
@@ -4429,7 +4412,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -4449,7 +4432,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
@@ -4483,7 +4466,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.107",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
24
README.md
24
README.md
@@ -72,6 +72,16 @@ G3 includes robust error handling with automatic retry logic:
|
||||
- Conversation history preservation through summaries
|
||||
- Dynamic token allocation for different providers (4k to 200k+ tokens)
|
||||
|
||||
### Interactive Control Commands
|
||||
G3's interactive CLI includes control commands for manual context management:
|
||||
- **`/compact`**: Manually trigger summarization to compact conversation history
|
||||
- **`/thinnify`**: Manually trigger context thinning to replace large tool results with file references
|
||||
- **`/readme`**: Reload README.md and AGENTS.md from disk without restarting
|
||||
- **`/stats`**: Show detailed context and performance statistics
|
||||
- **`/help`**: Display all available control commands
|
||||
|
||||
These commands give you fine-grained control over context management, allowing you to proactively optimize token usage and refresh project documentation. See [Control Commands Documentation](docs/CONTROL_COMMANDS.md) for detailed usage.
|
||||
|
||||
### Tool Ecosystem
|
||||
- **File Operations**: Read, write, and edit files with line-range precision
|
||||
- **Shell Integration**: Execute system commands with output capture
|
||||
@@ -79,6 +89,7 @@ G3 includes robust error handling with automatic retry logic:
|
||||
- **TODO Management**: Read and write TODO lists with markdown checkbox format
|
||||
- **Computer Control** (Experimental): Automate desktop applications
|
||||
- Mouse and keyboard control
|
||||
- macOS Accessibility API for native app automation (via `--macax` flag)
|
||||
- UI element inspection
|
||||
- Screenshot capture and window management
|
||||
- OCR text extraction from images and screen regions
|
||||
@@ -156,6 +167,19 @@ safaridriver --enable # Requires password
|
||||
|
||||
**Usage**: Run G3 with the `--webdriver` flag to enable browser automation tools.
|
||||
|
||||
## macOS Accessibility API Tools
|
||||
|
||||
G3 includes support for controlling macOS applications via the Accessibility API, allowing you to automate native macOS apps.
|
||||
|
||||
**Available Tools**: `macax_list_apps`, `macax_get_frontmost_app`, `macax_activate_app`, `macax_get_ui_tree`, `macax_find_elements`, `macax_click`, `macax_set_value`, `macax_get_value`, `macax_press_key`
|
||||
|
||||
**Setup**: Enable with the `--macax` flag or in config with `macax.enabled = true`. Grant accessibility permissions:
|
||||
- **macOS**: System Preferences → Security & Privacy → Privacy → Accessibility → Add your terminal app
|
||||
|
||||
**For detailed documentation**, see [macOS Accessibility Tools Guide](docs/macax-tools.md).
|
||||
|
||||
**Note**: This is particularly useful for testing and automating apps you're building with G3, as you can add accessibility identifiers to your UI elements.
|
||||
|
||||
## Computer Control (Experimental)
|
||||
|
||||
G3 can interact with your computer's GUI for automation tasks:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
94
crates/g3-cli/src/machine_ui_writer.rs
Normal file
94
crates/g3-cli/src/machine_ui_writer.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
use g3_core::ui_writer::UiWriter;
|
||||
use std::io::{self, Write};
|
||||
|
||||
/// Machine-mode implementation of UiWriter that prints plain, unformatted output
|
||||
/// This is designed for programmatic consumption and outputs everything verbatim
|
||||
pub struct MachineUiWriter;
|
||||
|
||||
impl MachineUiWriter {
|
||||
pub fn new() -> Self {
|
||||
Self
|
||||
}
|
||||
}
|
||||
|
||||
impl UiWriter for MachineUiWriter {
|
||||
fn print(&self, message: &str) {
|
||||
print!("{}", message);
|
||||
}
|
||||
|
||||
fn println(&self, message: &str) {
|
||||
println!("{}", message);
|
||||
}
|
||||
|
||||
fn print_inline(&self, message: &str) {
|
||||
print!("{}", message);
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
|
||||
fn print_system_prompt(&self, prompt: &str) {
|
||||
println!("SYSTEM_PROMPT:");
|
||||
println!("{}", prompt);
|
||||
println!("END_SYSTEM_PROMPT");
|
||||
println!();
|
||||
}
|
||||
|
||||
fn print_context_status(&self, message: &str) {
|
||||
println!("CONTEXT_STATUS: {}", message);
|
||||
}
|
||||
|
||||
fn print_context_thinning(&self, message: &str) {
|
||||
println!("CONTEXT_THINNING: {}", message);
|
||||
}
|
||||
|
||||
fn print_tool_header(&self, tool_name: &str) {
|
||||
println!("TOOL_CALL: {}", tool_name);
|
||||
}
|
||||
|
||||
fn print_tool_arg(&self, key: &str, value: &str) {
|
||||
println!("TOOL_ARG: {} = {}", key, value);
|
||||
}
|
||||
|
||||
fn print_tool_output_header(&self) {
|
||||
println!("TOOL_OUTPUT:");
|
||||
}
|
||||
|
||||
fn update_tool_output_line(&self, line: &str) {
|
||||
println!("{}", line);
|
||||
}
|
||||
|
||||
fn print_tool_output_line(&self, line: &str) {
|
||||
println!("{}", line);
|
||||
}
|
||||
|
||||
fn print_tool_output_summary(&self, count: usize) {
|
||||
println!("TOOL_OUTPUT_LINES: {}", count);
|
||||
}
|
||||
|
||||
fn print_tool_timing(&self, duration_str: &str) {
|
||||
println!("TOOL_DURATION: {}", duration_str);
|
||||
println!("END_TOOL_OUTPUT");
|
||||
println!();
|
||||
}
|
||||
|
||||
fn print_agent_prompt(&self) {
|
||||
println!("AGENT_RESPONSE:");
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
|
||||
fn print_agent_response(&self, content: &str) {
|
||||
print!("{}", content);
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
|
||||
fn notify_sse_received(&self) {
|
||||
// No-op for machine mode
|
||||
}
|
||||
|
||||
fn flush(&self) {
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
|
||||
fn wants_full_output(&self) -> bool {
|
||||
true // Machine mode wants complete, untruncated output
|
||||
}
|
||||
}
|
||||
@@ -267,23 +267,23 @@ impl TerminalState {
|
||||
let mut current_text = String::new();
|
||||
|
||||
// Check for headers first
|
||||
if line.starts_with("### ") {
|
||||
if let Some(stripped) = line.strip_prefix("### ") {
|
||||
return Line::from(Span::styled(
|
||||
format!(" {}", &line[4..]),
|
||||
format!(" {}", stripped),
|
||||
Style::default()
|
||||
.fg(self.theme.terminal_cyan.to_color())
|
||||
.add_modifier(Modifier::BOLD | Modifier::UNDERLINED),
|
||||
));
|
||||
} else if line.starts_with("## ") {
|
||||
} else if let Some(stripped) = line.strip_prefix("## ") {
|
||||
return Line::from(Span::styled(
|
||||
format!(" {}", &line[3..]),
|
||||
format!(" {}", stripped),
|
||||
Style::default()
|
||||
.fg(self.theme.terminal_amber.to_color())
|
||||
.add_modifier(Modifier::BOLD),
|
||||
));
|
||||
} else if line.starts_with("# ") {
|
||||
} else if let Some(stripped) = line.strip_prefix("# ") {
|
||||
return Line::from(Span::styled(
|
||||
format!(" {}", &line[2..]),
|
||||
format!(" {}", stripped),
|
||||
Style::default()
|
||||
.fg(self.theme.terminal_green.to_color())
|
||||
.add_modifier(Modifier::BOLD),
|
||||
@@ -343,7 +343,7 @@ impl TerminalState {
|
||||
}
|
||||
// Find closing *
|
||||
let mut italic_text = String::new();
|
||||
while let Some(ch) = chars.next() {
|
||||
for ch in chars.by_ref() {
|
||||
if ch == '*' {
|
||||
break;
|
||||
}
|
||||
@@ -367,7 +367,7 @@ impl TerminalState {
|
||||
}
|
||||
// Find closing `
|
||||
let mut code_text = String::new();
|
||||
while let Some(ch) = chars.next() {
|
||||
for ch in chars.by_ref() {
|
||||
if ch == '`' {
|
||||
break;
|
||||
}
|
||||
@@ -612,12 +612,10 @@ impl RetroTui {
|
||||
}
|
||||
|
||||
// Update status blink only if status is "PROCESSING"
|
||||
if state.status_line == "PROCESSING" {
|
||||
if state.last_status_blink.elapsed() > Duration::from_millis(500) {
|
||||
if state.status_line == "PROCESSING" && state.last_status_blink.elapsed() > Duration::from_millis(500) {
|
||||
state.status_blink = !state.status_blink;
|
||||
state.last_status_blink = Instant::now();
|
||||
}
|
||||
}
|
||||
|
||||
// Update activity area animation
|
||||
let animation_speed = 0.15; // Adjust for faster/slower animation
|
||||
@@ -771,12 +769,7 @@ impl RetroTui {
|
||||
let total_cursor_pos = cursor_position;
|
||||
|
||||
// Determine the window into the buffer we should show
|
||||
let window_start = if total_cursor_pos > available_width - 1 {
|
||||
// Cursor is beyond the visible area, scroll the view
|
||||
total_cursor_pos - (available_width - 1)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let window_start = total_cursor_pos.saturating_sub(available_width - 1);
|
||||
|
||||
// Get the visible portion of the buffer
|
||||
let visible_buffer: String = input_buffer
|
||||
@@ -1013,9 +1006,9 @@ impl RetroTui {
|
||||
let fade_color = |color: Color| -> Color {
|
||||
match color {
|
||||
Color::Rgb(r, g, b) => {
|
||||
let faded_r = ((r as f32 * opacity) as u8).max(0);
|
||||
let faded_g = ((g as f32 * opacity) as u8).max(0);
|
||||
let faded_b = ((b as f32 * opacity) as u8).max(0);
|
||||
let faded_r = (r as f32 * opacity) as u8;
|
||||
let faded_g = (g as f32 * opacity) as u8;
|
||||
let faded_b = (b as f32 * opacity) as u8;
|
||||
Color::Rgb(faded_r, faded_g, faded_b)
|
||||
}
|
||||
_ => color,
|
||||
@@ -1098,9 +1091,9 @@ impl RetroTui {
|
||||
let fade_color = |color: Color| -> Color {
|
||||
match color {
|
||||
Color::Rgb(r, g, b) => {
|
||||
let faded_r = ((r as f32 * opacity) as u8).max(0);
|
||||
let faded_g = ((g as f32 * opacity) as u8).max(0);
|
||||
let faded_b = ((b as f32 * opacity) as u8).max(0);
|
||||
let faded_r = (r as f32 * opacity) as u8;
|
||||
let faded_g = (g as f32 * opacity) as u8;
|
||||
let faded_b = (b as f32 * opacity) as u8;
|
||||
Color::Rgb(faded_r, faded_g, faded_b)
|
||||
}
|
||||
_ => color,
|
||||
@@ -1176,7 +1169,7 @@ impl RetroTui {
|
||||
}
|
||||
|
||||
// Wave characters for smooth animation
|
||||
let wave_chars = vec!['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'];
|
||||
let wave_chars = ['▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'];
|
||||
|
||||
// Build the wave line
|
||||
let mut wave_line = String::new();
|
||||
@@ -1190,7 +1183,7 @@ impl RetroTui {
|
||||
let idx = wave_data.len().saturating_sub(display_width) + i;
|
||||
|
||||
if idx < wave_data.len() {
|
||||
let value = wave_data[idx].min(1.0).max(0.0);
|
||||
let value = wave_data[idx].clamp(0.0, 1.0);
|
||||
let char_idx = ((value * 7.0) as usize).min(7);
|
||||
wave_line.push(wave_chars[char_idx]);
|
||||
} else {
|
||||
@@ -1206,8 +1199,6 @@ impl RetroTui {
|
||||
f.render_widget(wave_paragraph, area);
|
||||
}
|
||||
|
||||
/// Draw the status bar
|
||||
|
||||
/// Draw the status bar
|
||||
fn draw_status_bar(
|
||||
f: &mut Frame,
|
||||
|
||||
32
crates/g3-cli/src/simple_output.rs
Normal file
32
crates/g3-cli/src/simple_output.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
/// Simple output helper for printing messages
|
||||
pub struct SimpleOutput {
|
||||
machine_mode: bool,
|
||||
}
|
||||
|
||||
impl SimpleOutput {
|
||||
pub fn new() -> Self {
|
||||
SimpleOutput { machine_mode: false }
|
||||
}
|
||||
|
||||
pub fn new_with_mode(machine_mode: bool) -> Self {
|
||||
SimpleOutput { machine_mode }
|
||||
}
|
||||
|
||||
pub fn print(&self, message: &str) {
|
||||
if !self.machine_mode {
|
||||
println!("{}", message);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn print_smart(&self, message: &str) {
|
||||
if !self.machine_mode {
|
||||
println!("{}", message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SimpleOutput {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
use crossterm::style::Color;
|
||||
use crossterm::style::{SetForegroundColor, ResetColor};
|
||||
use std::io::{self, Write};
|
||||
use termimad::MadSkin;
|
||||
|
||||
/// Simple output handler with markdown support
|
||||
@@ -40,7 +41,7 @@ impl SimpleOutput {
|
||||
trimmed.starts_with("* ") ||
|
||||
trimmed.starts_with("+ ") ||
|
||||
(trimmed.len() > 2 &&
|
||||
trimmed.chars().next().map_or(false, |c| c.is_ascii_digit()) &&
|
||||
trimmed.chars().next().is_some_and(|c| c.is_ascii_digit()) &&
|
||||
trimmed.chars().nth(1) == Some('.') &&
|
||||
trimmed.chars().nth(2) == Some(' ')) ||
|
||||
(trimmed.contains('[') && trimmed.contains("]("))
|
||||
@@ -93,6 +94,37 @@ impl SimpleOutput {
|
||||
print!("{}", ResetColor);
|
||||
println!(" {:.1}% | {}/{} tokens", percentage, used, total);
|
||||
}
|
||||
|
||||
pub fn print_context_thinning(&self, message: &str) {
|
||||
// Animated highlight for context thinning
|
||||
// Use bright cyan/green with a quick flash animation
|
||||
|
||||
// Flash animation: print with bright background, then normal
|
||||
let frames = vec![
|
||||
"\x1b[1;97;46m", // Frame 1: Bold white on cyan background
|
||||
"\x1b[1;97;42m", // Frame 2: Bold white on green background
|
||||
"\x1b[1;96;40m", // Frame 3: Bold cyan on black background
|
||||
];
|
||||
|
||||
println!();
|
||||
|
||||
// Quick flash animation
|
||||
for frame in &frames {
|
||||
print!("\r{} ✨ {} ✨\x1b[0m", frame, message);
|
||||
let _ = io::stdout().flush();
|
||||
std::thread::sleep(std::time::Duration::from_millis(80));
|
||||
}
|
||||
|
||||
// Final display with bright cyan and sparkle emojis
|
||||
print!("\r\x1b[1;96m✨ {} ✨\x1b[0m", message);
|
||||
println!();
|
||||
|
||||
// Add a subtle "success" indicator line
|
||||
println!("\x1b[2;36m └─ Context optimized successfully\x1b[0m");
|
||||
println!();
|
||||
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
use crate::retro_tui::RetroTui;
|
||||
use g3_core::ui_writer::UiWriter;
|
||||
use std::io::{self, Write};
|
||||
use std::sync::Mutex;
|
||||
use std::time::Instant;
|
||||
|
||||
/// Console implementation of UiWriter that prints to stdout
|
||||
pub struct ConsoleUiWriter {
|
||||
@@ -104,6 +102,37 @@ impl UiWriter for ConsoleUiWriter {
|
||||
println!("{}", message);
|
||||
}
|
||||
|
||||
fn print_context_thinning(&self, message: &str) {
|
||||
// Animated highlight for context thinning
|
||||
// Use bright cyan/green with a quick flash animation
|
||||
|
||||
// Flash animation: print with bright background, then normal
|
||||
let frames = vec![
|
||||
"\x1b[1;97;46m", // Frame 1: Bold white on cyan background
|
||||
"\x1b[1;97;42m", // Frame 2: Bold white on green background
|
||||
"\x1b[1;96;40m", // Frame 3: Bold cyan on black background
|
||||
];
|
||||
|
||||
println!();
|
||||
|
||||
// Quick flash animation
|
||||
for frame in &frames {
|
||||
print!("\r{} ✨ {} ✨\x1b[0m", frame, message);
|
||||
let _ = io::stdout().flush();
|
||||
std::thread::sleep(std::time::Duration::from_millis(80));
|
||||
}
|
||||
|
||||
// Final display with bright cyan and sparkle emojis
|
||||
print!("\r\x1b[1;96m✨ {} ✨\x1b[0m", message);
|
||||
println!();
|
||||
|
||||
// Add a subtle "success" indicator line
|
||||
println!("\x1b[2;36m └─ Context optimized successfully\x1b[0m");
|
||||
println!();
|
||||
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
|
||||
fn print_tool_header(&self, tool_name: &str) {
|
||||
// Store the tool name and clear args for collection
|
||||
*self.current_tool_name.lock().unwrap() = Some(tool_name.to_string());
|
||||
@@ -115,7 +144,6 @@ impl UiWriter for ConsoleUiWriter {
|
||||
|
||||
// For todo tools, we'll skip the normal header and print a custom one later
|
||||
if is_todo {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,7 +191,12 @@ impl UiWriter for ConsoleUiWriter {
|
||||
|
||||
// Truncate long values for display
|
||||
let display_value = if first_line.len() > 80 {
|
||||
format!("{}...", &first_line[..77])
|
||||
// Use char_indices to safely truncate at character boundary
|
||||
let truncate_at = first_line.char_indices()
|
||||
.nth(77)
|
||||
.map(|(i, _)| i)
|
||||
.unwrap_or(first_line.len());
|
||||
format!("{}...", &first_line[..truncate_at])
|
||||
} else {
|
||||
first_line.to_string()
|
||||
};
|
||||
@@ -312,223 +345,3 @@ impl UiWriter for ConsoleUiWriter {
|
||||
}
|
||||
}
|
||||
|
||||
/// RetroTui implementation of UiWriter that sends output to the TUI
|
||||
pub struct RetroTuiWriter {
|
||||
tui: RetroTui,
|
||||
current_tool_name: Mutex<Option<String>>,
|
||||
current_tool_output: Mutex<Vec<String>>,
|
||||
current_tool_start: Mutex<Option<Instant>>,
|
||||
current_tool_caption: Mutex<String>,
|
||||
}
|
||||
|
||||
impl RetroTuiWriter {
|
||||
pub fn new(tui: RetroTui) -> Self {
|
||||
Self {
|
||||
tui,
|
||||
current_tool_name: Mutex::new(None),
|
||||
current_tool_output: Mutex::new(Vec::new()),
|
||||
current_tool_start: Mutex::new(None),
|
||||
current_tool_caption: Mutex::new(String::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl UiWriter for RetroTuiWriter {
|
||||
fn print(&self, message: &str) {
|
||||
self.tui.output(message);
|
||||
}
|
||||
|
||||
fn println(&self, message: &str) {
|
||||
self.tui.output(message);
|
||||
}
|
||||
|
||||
fn print_inline(&self, message: &str) {
|
||||
// For inline printing, we'll just append to the output
|
||||
self.tui.output(message);
|
||||
}
|
||||
|
||||
fn print_system_prompt(&self, prompt: &str) {
|
||||
self.tui.output("🔍 System Prompt:");
|
||||
self.tui.output("================");
|
||||
for line in prompt.lines() {
|
||||
self.tui.output(line);
|
||||
}
|
||||
self.tui.output("================");
|
||||
self.tui.output("");
|
||||
}
|
||||
|
||||
fn print_context_status(&self, message: &str) {
|
||||
self.tui.output(message);
|
||||
}
|
||||
|
||||
fn print_tool_header(&self, tool_name: &str) {
|
||||
// Start collecting tool output
|
||||
*self.current_tool_start.lock().unwrap() = Some(Instant::now());
|
||||
*self.current_tool_name.lock().unwrap() = Some(tool_name.to_string());
|
||||
self.current_tool_output.lock().unwrap().clear();
|
||||
self.current_tool_output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("Tool: {}", tool_name));
|
||||
|
||||
// Initialize caption
|
||||
*self.current_tool_caption.lock().unwrap() = String::new();
|
||||
}
|
||||
|
||||
fn print_tool_arg(&self, key: &str, value: &str) {
|
||||
// Filter out any keys that look like they might be agent message content
|
||||
// (e.g., keys that are suspiciously long or contain message-like content)
|
||||
let is_valid_arg_key = key.len() < 50
|
||||
&& !key.contains('\n')
|
||||
&& !key.contains("I'll")
|
||||
&& !key.contains("Let me")
|
||||
&& !key.contains("Here's")
|
||||
&& !key.contains("I can");
|
||||
|
||||
if is_valid_arg_key {
|
||||
self.current_tool_output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("{}: {}", key, value));
|
||||
}
|
||||
|
||||
// Build caption from first argument (usually the most important one)
|
||||
let mut caption = self.current_tool_caption.lock().unwrap();
|
||||
if caption.is_empty() && (key == "file_path" || key == "command" || key == "path") {
|
||||
// Truncate long values for the caption
|
||||
let truncated = if value.len() > 50 {
|
||||
format!("{}...", &value[..47])
|
||||
} else {
|
||||
value.to_string()
|
||||
};
|
||||
|
||||
// Add range information for read_file tool calls
|
||||
let tool_name = self.current_tool_name.lock().unwrap();
|
||||
let range_suffix = if tool_name.as_ref().map_or(false, |name| name == "read_file") {
|
||||
// We need to check if start/end args will be provided - for now just check if this is a partial read
|
||||
// This is a simplified approach since we're building the caption incrementally
|
||||
String::new() // We'll handle this in print_tool_output_header instead
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
*caption = format!("{}{}", truncated, range_suffix);
|
||||
}
|
||||
}
|
||||
|
||||
fn print_tool_output_header(&self) {
|
||||
// This is called right before tool execution starts
|
||||
// Send the initial tool header to the TUI now
|
||||
if let Some(tool_name) = self.current_tool_name.lock().unwrap().as_ref() {
|
||||
let mut caption = self.current_tool_caption.lock().unwrap().clone();
|
||||
|
||||
// Add range information for read_file tool calls
|
||||
if tool_name == "read_file" {
|
||||
// Check the tool output for start/end parameters
|
||||
let output = self.current_tool_output.lock().unwrap();
|
||||
let has_start = output.iter().any(|line| line.starts_with("start:"));
|
||||
let has_end = output.iter().any(|line| line.starts_with("end:"));
|
||||
|
||||
if has_start || has_end {
|
||||
let start_val = output.iter().find(|line| line.starts_with("start:")).map(|line| line.split(':').nth(1).unwrap_or("0").trim()).unwrap_or("0");
|
||||
let end_val = output.iter().find(|line| line.starts_with("end:")).map(|line| line.split(':').nth(1).unwrap_or("end").trim()).unwrap_or("end");
|
||||
caption = format!("{} [{}..{}]", caption, start_val, end_val);
|
||||
}
|
||||
}
|
||||
|
||||
// Send the tool output with initial header
|
||||
self.tui.tool_output(tool_name, &caption, "");
|
||||
}
|
||||
|
||||
self.current_tool_output.lock().unwrap().push(String::new());
|
||||
self.current_tool_output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push("Output:".to_string());
|
||||
}
|
||||
|
||||
fn update_tool_output_line(&self, line: &str) {
|
||||
// For retro mode, we'll just add to the output buffer
|
||||
self.current_tool_output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(line.to_string());
|
||||
}
|
||||
|
||||
fn print_tool_output_line(&self, line: &str) {
|
||||
self.current_tool_output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(line.to_string());
|
||||
}
|
||||
|
||||
fn print_tool_output_summary(&self, hidden_count: usize) {
|
||||
self.current_tool_output.lock().unwrap().push(format!(
|
||||
"... ({} more line{})",
|
||||
hidden_count,
|
||||
if hidden_count == 1 { "" } else { "s" }
|
||||
));
|
||||
}
|
||||
|
||||
fn print_tool_timing(&self, duration_str: &str) {
|
||||
self.current_tool_output
|
||||
.lock()
|
||||
.unwrap()
|
||||
.push(format!("⚡️ {}", duration_str));
|
||||
|
||||
// Calculate the actual duration
|
||||
let duration_ms = if let Some(start) = *self.current_tool_start.lock().unwrap() {
|
||||
start.elapsed().as_millis()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// Get the tool name and caption
|
||||
if let Some(tool_name) = self.current_tool_name.lock().unwrap().as_ref() {
|
||||
let content = self.current_tool_output.lock().unwrap().join("\n");
|
||||
let caption = self.current_tool_caption.lock().unwrap().clone();
|
||||
let caption = if caption.is_empty() {
|
||||
"Completed".to_string()
|
||||
} else {
|
||||
caption
|
||||
};
|
||||
|
||||
// Update the tool detail panel with the complete output without adding a new header
|
||||
// This keeps the original header in place to be updated by tool_complete
|
||||
self.tui.update_tool_detail(tool_name, &content);
|
||||
|
||||
// Determine success based on whether there's an error in the output
|
||||
// This is a simple heuristic - you might want to make this more sophisticated
|
||||
let success = !content.contains("error")
|
||||
&& !content.contains("Error")
|
||||
&& !content.contains("ERROR");
|
||||
|
||||
// Send the completion status to update the header
|
||||
self.tui
|
||||
.tool_complete(tool_name, success, duration_ms, &caption);
|
||||
}
|
||||
|
||||
// Clear the buffers
|
||||
*self.current_tool_name.lock().unwrap() = None;
|
||||
self.current_tool_output.lock().unwrap().clear();
|
||||
*self.current_tool_start.lock().unwrap() = None;
|
||||
*self.current_tool_caption.lock().unwrap() = String::new();
|
||||
}
|
||||
|
||||
fn print_agent_prompt(&self) {
|
||||
self.tui.output("\n💬 ");
|
||||
}
|
||||
|
||||
fn print_agent_response(&self, content: &str) {
|
||||
self.tui.output(content);
|
||||
}
|
||||
|
||||
fn notify_sse_received(&self) {
|
||||
// Notify the TUI that an SSE was received
|
||||
self.tui.sse_received();
|
||||
}
|
||||
|
||||
fn flush(&self) {
|
||||
// No-op for TUI since it handles its own rendering
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,9 @@ name = "g3-computer-control"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[build-dependencies]
|
||||
# Only needed for building Swift bridge on macOS
|
||||
|
||||
[dependencies]
|
||||
# Workspace dependencies
|
||||
tokio = { workspace = true }
|
||||
@@ -20,15 +23,13 @@ async-trait = "0.1"
|
||||
# WebDriver support
|
||||
fantoccini = "0.21"
|
||||
|
||||
# OCR dependencies
|
||||
tesseract = "0.14"
|
||||
|
||||
# macOS dependencies
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
core-graphics = "0.23"
|
||||
core-foundation = "0.9"
|
||||
core-foundation = "0.10"
|
||||
cocoa = "0.25"
|
||||
objc = "0.2"
|
||||
accessibility = "0.2"
|
||||
image = "0.24"
|
||||
|
||||
# Linux dependencies
|
||||
|
||||
63
crates/g3-computer-control/build.rs
Normal file
63
crates/g3-computer-control/build.rs
Normal file
@@ -0,0 +1,63 @@
|
||||
use std::env;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
fn main() {
|
||||
// Only build Vision bridge on macOS
|
||||
if env::var("CARGO_CFG_TARGET_OS").unwrap() != "macos" {
|
||||
return;
|
||||
}
|
||||
|
||||
println!("cargo:rerun-if-changed=vision-bridge/Sources/VisionBridge/VisionOCR.swift");
|
||||
println!("cargo:rerun-if-changed=vision-bridge/Sources/VisionBridge/VisionBridge.h");
|
||||
println!("cargo:rerun-if-changed=vision-bridge/Package.swift");
|
||||
|
||||
let manifest_dir = PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap());
|
||||
let vision_bridge_dir = manifest_dir.join("vision-bridge");
|
||||
|
||||
// Build Swift package
|
||||
println!("cargo:warning=Building VisionBridge Swift package...");
|
||||
let build_status = Command::new("swift")
|
||||
.args(&["build", "-c", "release"])
|
||||
.current_dir(&vision_bridge_dir)
|
||||
.status()
|
||||
.expect("Failed to build Swift package");
|
||||
|
||||
if !build_status.success() {
|
||||
panic!("Swift build failed");
|
||||
}
|
||||
|
||||
// Find the built library
|
||||
let lib_path = vision_bridge_dir
|
||||
.join(".build/release")
|
||||
.canonicalize()
|
||||
.expect("Failed to find .build/release directory");
|
||||
|
||||
// Copy the dylib to the output directory so it can be found at runtime
|
||||
let target_dir = manifest_dir.parent().unwrap().parent().unwrap().join("target");
|
||||
let profile = env::var("PROFILE").unwrap_or_else(|_| "debug".to_string());
|
||||
let output_dir = target_dir.join(&profile);
|
||||
|
||||
let dylib_src = lib_path.join("libVisionBridge.dylib");
|
||||
let dylib_dst = output_dir.join("libVisionBridge.dylib");
|
||||
|
||||
std::fs::copy(&dylib_src, &dylib_dst)
|
||||
.expect(&format!("Failed to copy dylib from {} to {}", dylib_src.display(), dylib_dst.display()));
|
||||
|
||||
println!("cargo:warning=Copied libVisionBridge.dylib to {}", dylib_dst.display());
|
||||
|
||||
// Add rpath so the dylib can be found at runtime
|
||||
println!("cargo:rustc-link-arg=-Wl,-rpath,@executable_path");
|
||||
println!("cargo:rustc-link-arg=-Wl,-rpath,@loader_path");
|
||||
println!("cargo:rustc-link-search=native={}", lib_path.display());
|
||||
println!("cargo:rustc-link-lib=dylib=VisionBridge");
|
||||
|
||||
// Link required frameworks
|
||||
println!("cargo:rustc-link-lib=framework=Vision");
|
||||
println!("cargo:rustc-link-lib=framework=AppKit");
|
||||
println!("cargo:rustc-link-lib=framework=Foundation");
|
||||
println!("cargo:rustc-link-lib=framework=CoreGraphics");
|
||||
println!("cargo:rustc-link-lib=framework=CoreImage");
|
||||
|
||||
println!("cargo:warning=VisionBridge built successfully at {}", lib_path.display());
|
||||
}
|
||||
@@ -1,7 +1,7 @@
|
||||
use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
|
||||
use core_foundation::dictionary::CFDictionary;
|
||||
use core_foundation::string::CFString;
|
||||
use core_foundation::base::TCFType;
|
||||
use core_foundation::base::{TCFType, ToVoid};
|
||||
|
||||
fn main() {
|
||||
println!("Listing all on-screen windows...");
|
||||
@@ -22,7 +22,7 @@ fn main() {
|
||||
|
||||
// Get window ID
|
||||
let window_id_key = CFString::from_static_string("kCGWindowNumber");
|
||||
let window_id: i64 = if let Some(value) = dict.find(window_id_key.as_concrete_TypeRef()) {
|
||||
let window_id: i64 = if let Some(value) = dict.find(window_id_key.to_void()) {
|
||||
let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
num.to_i64().unwrap_or(0)
|
||||
} else {
|
||||
@@ -31,7 +31,7 @@ fn main() {
|
||||
|
||||
// Get owner name
|
||||
let owner_key = CFString::from_static_string("kCGWindowOwnerName");
|
||||
let owner: String = if let Some(value) = dict.find(owner_key.as_concrete_TypeRef()) {
|
||||
let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
|
||||
let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
s.to_string()
|
||||
} else {
|
||||
@@ -40,15 +40,15 @@ fn main() {
|
||||
|
||||
// Get window name/title
|
||||
let name_key = CFString::from_static_string("kCGWindowName");
|
||||
let title: String = if let Some(value) = dict.find(name_key.as_concrete_TypeRef()) {
|
||||
let title: String = if let Some(value) = dict.find(name_key.to_void()) {
|
||||
let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
s.to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
// Filter for iTerm or show all
|
||||
if owner.contains("iTerm") || owner.contains("Terminal") {
|
||||
// Show all windows
|
||||
if !owner.is_empty() {
|
||||
println!("{:<10} {:<25} {}", window_id, owner, title);
|
||||
}
|
||||
}
|
||||
|
||||
74
crates/g3-computer-control/examples/macax_demo.rs
Normal file
74
crates/g3-computer-control/examples/macax_demo.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
//! Example demonstrating macOS Accessibility API tools
|
||||
//!
|
||||
//! This example shows how to use the macax tools to control macOS applications.
|
||||
//!
|
||||
//! Run with: cargo run --example macax_demo
|
||||
|
||||
use anyhow::Result;
|
||||
use g3_computer_control::MacAxController;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
println!("🍎 macOS Accessibility API Demo\n");
|
||||
println!("This demo shows how to control macOS applications using the Accessibility API.\n");
|
||||
|
||||
// Create controller
|
||||
let controller = MacAxController::new()?;
|
||||
println!("✅ MacAxController initialized\n");
|
||||
|
||||
// List running applications
|
||||
println!("📱 Listing running applications:");
|
||||
match controller.list_applications() {
|
||||
Ok(apps) => {
|
||||
for app in apps.iter().take(10) {
|
||||
println!(" - {}", app.name);
|
||||
}
|
||||
if apps.len() > 10 {
|
||||
println!(" ... and {} more", apps.len() - 10);
|
||||
}
|
||||
}
|
||||
Err(e) => println!(" ❌ Error: {}", e),
|
||||
}
|
||||
println!();
|
||||
|
||||
// Get frontmost app
|
||||
println!("🎯 Getting frontmost application:");
|
||||
match controller.get_frontmost_app() {
|
||||
Ok(app) => println!(" Current: {}", app.name),
|
||||
Err(e) => println!(" ❌ Error: {}", e),
|
||||
}
|
||||
println!();
|
||||
|
||||
// Example: Activate Finder and get its UI tree
|
||||
println!("📂 Activating Finder and inspecting UI:");
|
||||
match controller.activate_app("Finder") {
|
||||
Ok(_) => {
|
||||
println!(" ✅ Finder activated");
|
||||
|
||||
// Wait a moment for activation
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(500)).await;
|
||||
|
||||
// Get UI tree
|
||||
match controller.get_ui_tree("Finder", 2) {
|
||||
Ok(tree) => {
|
||||
println!("\n UI Tree:");
|
||||
for line in tree.lines().take(10) {
|
||||
println!(" {}", line);
|
||||
}
|
||||
}
|
||||
Err(e) => println!(" ❌ Error getting UI tree: {}", e),
|
||||
}
|
||||
}
|
||||
Err(e) => println!(" ❌ Error: {}", e),
|
||||
}
|
||||
println!();
|
||||
|
||||
println!("✨ Demo complete!\n");
|
||||
println!("💡 Tips:");
|
||||
println!(" - Use --macax flag with g3 to enable these tools");
|
||||
println!(" - Grant accessibility permissions in System Preferences");
|
||||
println!(" - Add accessibility identifiers to your apps for easier automation");
|
||||
println!(" - See docs/macax-tools.md for full documentation\n");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -31,7 +31,7 @@ async fn main() -> Result<()> {
|
||||
|
||||
// Find an element
|
||||
println!("Finding h1 element...");
|
||||
let mut h1 = driver.find_element("h1").await?;
|
||||
let h1 = driver.find_element("h1").await?;
|
||||
let h1_text = h1.text().await?;
|
||||
println!("H1 text: {}\n", h1_text);
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use g3_computer_control::{create_controller, ComputerController};
|
||||
use g3_computer_control::create_controller;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use core_graphics::display::CGDisplay;
|
||||
use image::{ImageBuffer, RgbaImage};
|
||||
use std::path::Path;
|
||||
|
||||
fn main() {
|
||||
let display = CGDisplay::main();
|
||||
|
||||
48
crates/g3-computer-control/examples/test_type_text.rs
Normal file
48
crates/g3-computer-control/examples/test_type_text.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
//! Test the new type_text functionality
|
||||
|
||||
use anyhow::Result;
|
||||
use g3_computer_control::MacAxController;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
println!("🧪 Testing macax type_text functionality\n");
|
||||
|
||||
let controller = MacAxController::new()?;
|
||||
println!("✅ Controller initialized\n");
|
||||
|
||||
// Test 1: Type simple text
|
||||
println!("Test 1: Typing simple text into TextEdit");
|
||||
println!(" Please open TextEdit and create a new document...");
|
||||
std::thread::sleep(std::time::Duration::from_secs(3));
|
||||
|
||||
match controller.type_text("TextEdit", "Hello, World!") {
|
||||
Ok(_) => println!(" ✅ Successfully typed simple text\n"),
|
||||
Err(e) => println!(" ❌ Failed: {}\n", e),
|
||||
}
|
||||
|
||||
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||
|
||||
// Test 2: Type unicode and emojis
|
||||
println!("Test 2: Typing unicode and emojis");
|
||||
match controller.type_text("TextEdit", "\n🌟 Unicode test: café, naïve, 日本語 🎉") {
|
||||
Ok(_) => println!(" ✅ Successfully typed unicode text\n"),
|
||||
Err(e) => println!(" ❌ Failed: {}\n", e),
|
||||
}
|
||||
|
||||
std::thread::sleep(std::time::Duration::from_secs(1));
|
||||
|
||||
// Test 3: Type special characters
|
||||
println!("Test 3: Typing special characters");
|
||||
match controller.type_text("TextEdit", "\nSpecial: @#$%^&*()_+-=[]{}|;':,.<>?/") {
|
||||
Ok(_) => println!(" ✅ Successfully typed special characters\n"),
|
||||
Err(e) => println!(" ❌ Failed: {}\n", e),
|
||||
}
|
||||
|
||||
println!("\n✨ Tests complete!");
|
||||
println!("\n💡 Now try with Things3:");
|
||||
println!(" 1. Open Things3");
|
||||
println!(" 2. Press Cmd+N to create a new task");
|
||||
println!(" 3. Run: g3 --macax 'type \"🌟 My awesome task\" into Things'");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
85
crates/g3-computer-control/examples/test_vision.rs
Normal file
85
crates/g3-computer-control/examples/test_vision.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use g3_computer_control::ocr::{OCREngine, DefaultOCR};
|
||||
use anyhow::Result;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
println!("🧪 Testing Apple Vision OCR");
|
||||
println!("===========================\n");
|
||||
|
||||
// Initialize OCR engine
|
||||
println!("📦 Initializing OCR engine...");
|
||||
let ocr = DefaultOCR::new()?;
|
||||
println!("✅ OCR engine: {}\n", ocr.name());
|
||||
|
||||
// Check if test image exists
|
||||
let test_image = "/tmp/safari_test.png";
|
||||
if !std::path::Path::new(test_image).exists() {
|
||||
println!("⚠️ Test image not found: {}", test_image);
|
||||
println!(" Creating a screenshot...");
|
||||
|
||||
let status = std::process::Command::new("screencapture")
|
||||
.arg("-x")
|
||||
.arg("-R")
|
||||
.arg("0,0,1200,800")
|
||||
.arg(test_image)
|
||||
.status()?;
|
||||
|
||||
if !status.success() {
|
||||
anyhow::bail!("Failed to create screenshot");
|
||||
}
|
||||
|
||||
println!("✅ Screenshot created\n");
|
||||
}
|
||||
|
||||
// Run OCR
|
||||
println!("🔍 Running Apple Vision OCR on {}...", test_image);
|
||||
let start = std::time::Instant::now();
|
||||
let locations = ocr.extract_text_with_locations(test_image).await?;
|
||||
let duration = start.elapsed();
|
||||
|
||||
println!("✅ OCR completed in {:.3}s\n", duration.as_secs_f64());
|
||||
|
||||
// Display results
|
||||
println!("📊 Results:");
|
||||
println!(" Found {} text elements\n", locations.len());
|
||||
|
||||
if locations.is_empty() {
|
||||
println!("⚠️ No text found in image");
|
||||
} else {
|
||||
println!(" Top 20 results:");
|
||||
println!(" {:<4} {:<40} {:<15} {:<12} {:<8}", "#", "Text", "Position", "Size", "Conf");
|
||||
println!(" {}", "-".repeat(85));
|
||||
|
||||
for (i, loc) in locations.iter().take(20).enumerate() {
|
||||
let text = if loc.text.len() > 37 {
|
||||
format!("{}...", &loc.text[..37])
|
||||
} else {
|
||||
loc.text.clone()
|
||||
};
|
||||
|
||||
println!(" {:<4} {:<40} ({:>4},{:>4}) {:>4}x{:<4} {:.2}",
|
||||
i + 1,
|
||||
text,
|
||||
loc.x,
|
||||
loc.y,
|
||||
loc.width,
|
||||
loc.height,
|
||||
loc.confidence
|
||||
);
|
||||
}
|
||||
|
||||
if locations.len() > 20 {
|
||||
println!("\n ... and {} more", locations.len() - 20);
|
||||
}
|
||||
|
||||
// Performance comparison
|
||||
println!("\n📈 Performance:");
|
||||
println!(" OCR Speed: {:.3}s", duration.as_secs_f64());
|
||||
println!(" Text elements: {}", locations.len());
|
||||
println!(" Avg per element: {:.1}ms", duration.as_millis() as f64 / locations.len() as f64);
|
||||
}
|
||||
|
||||
println!("\n✅ Test complete!");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -1,10 +1,18 @@
|
||||
// Suppress warnings from objc crate macros
|
||||
#![allow(unexpected_cfgs)]
|
||||
|
||||
pub mod types;
|
||||
pub mod platform;
|
||||
pub mod ocr;
|
||||
pub mod webdriver;
|
||||
pub mod macax;
|
||||
|
||||
// Re-export webdriver types for convenience
|
||||
pub use webdriver::{WebDriverController, WebElement, safari::SafariDriver};
|
||||
|
||||
// Re-export macax types for convenience
|
||||
pub use macax::{MacAxController, AXElement, AXApplication};
|
||||
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use types::*;
|
||||
@@ -15,8 +23,14 @@ pub trait ComputerController: Send + Sync {
|
||||
async fn take_screenshot(&self, path: &str, region: Option<Rect>, window_id: Option<&str>) -> Result<()>;
|
||||
|
||||
// OCR operations
|
||||
async fn extract_text_from_screen(&self, region: Rect) -> Result<String>;
|
||||
async fn extract_text_from_screen(&self, region: Rect, window_id: &str) -> Result<String>;
|
||||
async fn extract_text_from_image(&self, path: &str) -> Result<String>;
|
||||
async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>>;
|
||||
async fn find_text_in_app(&self, app_name: &str, search_text: &str) -> Result<Option<TextLocation>>;
|
||||
|
||||
// Mouse operations
|
||||
fn move_mouse(&self, x: i32, y: i32) -> Result<()>;
|
||||
fn click_at(&self, x: i32, y: i32, app_name: Option<&str>) -> Result<()>;
|
||||
}
|
||||
|
||||
// Platform-specific constructor
|
||||
|
||||
822
crates/g3-computer-control/src/macax/controller.rs
Normal file
822
crates/g3-computer-control/src/macax/controller.rs
Normal file
@@ -0,0 +1,822 @@
|
||||
use super::{AXApplication, AXElement};
|
||||
use anyhow::{Context, Result};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
use accessibility::{AXUIElement, AXUIElementAttributes, ElementFinder, TreeVisitor, TreeWalker, TreeWalkerFlow};
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
use core_foundation::base::TCFType;
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
use core_foundation::string::CFString;
|
||||
|
||||
/// macOS Accessibility API controller using native APIs
|
||||
pub struct MacAxController {
|
||||
// Cache for application elements
|
||||
app_cache: std::sync::Mutex<HashMap<String, AXUIElement>>,
|
||||
}
|
||||
|
||||
impl MacAxController {
|
||||
pub fn new() -> Result<Self> {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
// Check if we have accessibility permissions by trying to get system-wide element
|
||||
let _system = AXUIElement::system_wide();
|
||||
|
||||
Ok(Self {
|
||||
app_cache: std::sync::Mutex::new(HashMap::new()),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
{
|
||||
anyhow::bail!("macOS Accessibility API is only available on macOS")
|
||||
}
|
||||
}
|
||||
|
||||
/// List all running applications
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn list_applications(&self) -> Result<Vec<AXApplication>> {
|
||||
let apps = Self::get_running_applications()?;
|
||||
Ok(apps)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn list_applications(&self) -> Result<Vec<AXApplication>> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
fn get_running_applications() -> Result<Vec<AXApplication>> {
|
||||
use cocoa::appkit::NSApplicationActivationPolicy;
|
||||
use cocoa::base::{id, nil};
|
||||
use objc::{class, msg_send, sel, sel_impl};
|
||||
|
||||
unsafe {
|
||||
let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
|
||||
let running_apps: id = msg_send![workspace, runningApplications];
|
||||
let count: usize = msg_send![running_apps, count];
|
||||
|
||||
let mut apps = Vec::new();
|
||||
|
||||
for i in 0..count {
|
||||
let app: id = msg_send![running_apps, objectAtIndex: i];
|
||||
|
||||
// Get app name
|
||||
let localized_name: id = msg_send![app, localizedName];
|
||||
if localized_name == nil {
|
||||
continue;
|
||||
}
|
||||
let name_ptr: *const i8 = msg_send![localized_name, UTF8String];
|
||||
let name = if !name_ptr.is_null() {
|
||||
std::ffi::CStr::from_ptr(name_ptr)
|
||||
.to_string_lossy()
|
||||
.to_string()
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
// Get bundle ID
|
||||
let bundle_id_obj: id = msg_send![app, bundleIdentifier];
|
||||
let bundle_id = if bundle_id_obj != nil {
|
||||
let bundle_id_ptr: *const i8 = msg_send![bundle_id_obj, UTF8String];
|
||||
if !bundle_id_ptr.is_null() {
|
||||
Some(
|
||||
std::ffi::CStr::from_ptr(bundle_id_ptr)
|
||||
.to_string_lossy()
|
||||
.to_string(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Get PID
|
||||
let pid: i32 = msg_send![app, processIdentifier];
|
||||
|
||||
// Skip background-only apps
|
||||
let activation_policy: i64 = msg_send![app, activationPolicy];
|
||||
if activation_policy == NSApplicationActivationPolicy::NSApplicationActivationPolicyRegular as i64 {
|
||||
apps.push(AXApplication {
|
||||
name,
|
||||
bundle_id,
|
||||
pid,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(apps)
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the frontmost (active) application
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn get_frontmost_app(&self) -> Result<AXApplication> {
|
||||
use cocoa::base::{id, nil};
|
||||
use objc::{class, msg_send, sel, sel_impl};
|
||||
|
||||
unsafe {
|
||||
let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
|
||||
let frontmost_app: id = msg_send![workspace, frontmostApplication];
|
||||
|
||||
if frontmost_app == nil {
|
||||
anyhow::bail!("No frontmost application");
|
||||
}
|
||||
|
||||
// Get app name
|
||||
let localized_name: id = msg_send![frontmost_app, localizedName];
|
||||
let name_ptr: *const i8 = msg_send![localized_name, UTF8String];
|
||||
let name = std::ffi::CStr::from_ptr(name_ptr)
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
// Get bundle ID
|
||||
let bundle_id_obj: id = msg_send![frontmost_app, bundleIdentifier];
|
||||
let bundle_id = if bundle_id_obj != nil {
|
||||
let bundle_id_ptr: *const i8 = msg_send![bundle_id_obj, UTF8String];
|
||||
if !bundle_id_ptr.is_null() {
|
||||
Some(
|
||||
std::ffi::CStr::from_ptr(bundle_id_ptr)
|
||||
.to_string_lossy()
|
||||
.to_string(),
|
||||
)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Get PID
|
||||
let pid: i32 = msg_send![frontmost_app, processIdentifier];
|
||||
|
||||
Ok(AXApplication {
|
||||
name,
|
||||
bundle_id,
|
||||
pid,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn get_frontmost_app(&self) -> Result<AXApplication> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
/// Get AXUIElement for an application by name or PID
|
||||
#[cfg(target_os = "macos")]
|
||||
fn get_app_element(&self, app_name: &str) -> Result<AXUIElement> {
|
||||
// Check cache first
|
||||
{
|
||||
let cache = self.app_cache.lock().unwrap();
|
||||
if let Some(element) = cache.get(app_name) {
|
||||
return Ok(element.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Find the app by name
|
||||
let apps = Self::get_running_applications()?;
|
||||
let app = apps
|
||||
.iter()
|
||||
.find(|a| a.name == app_name)
|
||||
.ok_or_else(|| anyhow::anyhow!("Application '{}' not found", app_name))?;
|
||||
|
||||
// Create AXUIElement for the app
|
||||
let element = AXUIElement::application(app.pid);
|
||||
|
||||
// Cache it
|
||||
{
|
||||
let mut cache = self.app_cache.lock().unwrap();
|
||||
cache.insert(app_name.to_string(), element.clone());
|
||||
}
|
||||
|
||||
Ok(element)
|
||||
}
|
||||
|
||||
/// Activate (bring to front) an application
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn activate_app(&self, app_name: &str) -> Result<()> {
|
||||
use cocoa::base::id;
|
||||
use objc::{class, msg_send, sel, sel_impl};
|
||||
|
||||
// Find the app
|
||||
let apps = Self::get_running_applications()?;
|
||||
let app = apps
|
||||
.iter()
|
||||
.find(|a| a.name == app_name)
|
||||
.ok_or_else(|| anyhow::anyhow!("Application '{}' not found", app_name))?;
|
||||
|
||||
unsafe {
|
||||
let workspace: id = msg_send![class!(NSWorkspace), sharedWorkspace];
|
||||
let running_apps: id = msg_send![workspace, runningApplications];
|
||||
let count: usize = msg_send![running_apps, count];
|
||||
|
||||
for i in 0..count {
|
||||
let running_app: id = msg_send![running_apps, objectAtIndex: i];
|
||||
let pid: i32 = msg_send![running_app, processIdentifier];
|
||||
|
||||
if pid == app.pid {
|
||||
let _: bool = msg_send![running_app, activateWithOptions: 0];
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
anyhow::bail!("Failed to activate application")
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn activate_app(&self, _app_name: &str) -> Result<()> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
/// Get the UI hierarchy of an application
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn get_ui_tree(&self, app_name: &str, max_depth: usize) -> Result<String> {
|
||||
let app_element = self.get_app_element(app_name)?;
|
||||
let mut output = format!("Application: {}\n", app_name);
|
||||
|
||||
Self::build_ui_tree(&app_element, &mut output, 0, max_depth)?;
|
||||
|
||||
Ok(output)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn get_ui_tree(&self, _app_name: &str, _max_depth: usize) -> Result<String> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
fn build_ui_tree(
|
||||
element: &AXUIElement,
|
||||
output: &mut String,
|
||||
depth: usize,
|
||||
max_depth: usize,
|
||||
) -> Result<()> {
|
||||
if depth >= max_depth {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let indent = " ".repeat(depth);
|
||||
|
||||
// Get role
|
||||
let role = element.role().ok().map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
|
||||
// Get title
|
||||
let title = element.title().ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
// Get identifier
|
||||
let identifier = element.identifier().ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
// Format output
|
||||
output.push_str(&format!("{}Role: {}", indent, role));
|
||||
if let Some(t) = title {
|
||||
output.push_str(&format!(", Title: {}", t));
|
||||
}
|
||||
if let Some(id) = identifier {
|
||||
output.push_str(&format!(", ID: {}", id));
|
||||
}
|
||||
output.push('\n');
|
||||
|
||||
// Get children
|
||||
if let Ok(children) = element.children() {
|
||||
for i in 0..children.len() {
|
||||
if let Some(child) = children.get(i) {
|
||||
let _ = Self::build_ui_tree(&child, output, depth + 1, max_depth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find UI elements in an application
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn find_elements(
|
||||
&self,
|
||||
app_name: &str,
|
||||
role: Option<&str>,
|
||||
title: Option<&str>,
|
||||
identifier: Option<&str>,
|
||||
) -> Result<Vec<AXElement>> {
|
||||
let app_element = self.get_app_element(app_name)?;
|
||||
let mut found_elements = Vec::new();
|
||||
|
||||
let visitor = ElementCollector {
|
||||
role_filter: role.map(|s| s.to_string()),
|
||||
title_filter: title.map(|s| s.to_string()),
|
||||
identifier_filter: identifier.map(|s| s.to_string()),
|
||||
results: std::cell::RefCell::new(&mut found_elements),
|
||||
depth: std::cell::Cell::new(0),
|
||||
};
|
||||
|
||||
let walker = TreeWalker::new();
|
||||
walker.walk(&app_element, &visitor);
|
||||
|
||||
Ok(found_elements)
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn find_elements(
|
||||
&self,
|
||||
_app_name: &str,
|
||||
_role: Option<&str>,
|
||||
_title: Option<&str>,
|
||||
_identifier: Option<&str>,
|
||||
) -> Result<Vec<AXElement>> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
/// Find a single element (helper for click, set_value, etc.)
|
||||
#[cfg(target_os = "macos")]
|
||||
fn find_element(
|
||||
&self,
|
||||
app_name: &str,
|
||||
role: &str,
|
||||
title: Option<&str>,
|
||||
identifier: Option<&str>,
|
||||
) -> Result<AXUIElement> {
|
||||
let app_element = self.get_app_element(app_name)?;
|
||||
|
||||
let role_str = role.to_string();
|
||||
let title_str = title.map(|s| s.to_string());
|
||||
let identifier_str = identifier.map(|s| s.to_string());
|
||||
|
||||
let finder = ElementFinder::new(
|
||||
&app_element,
|
||||
move |element| {
|
||||
// Check role
|
||||
let elem_role = element.role()
|
||||
.ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if let Some(r) = elem_role {
|
||||
if !r.contains(&role_str) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check title if specified
|
||||
if let Some(ref title_filter) = title_str {
|
||||
let elem_title = element.title()
|
||||
.ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if let Some(t) = elem_title {
|
||||
if !t.contains(title_filter) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check identifier if specified
|
||||
if let Some(ref id_filter) = identifier_str {
|
||||
let elem_id = element.identifier()
|
||||
.ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
if let Some(id) = elem_id {
|
||||
if !id.contains(id_filter) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
},
|
||||
Some(std::time::Duration::from_secs(2)),
|
||||
);
|
||||
|
||||
finder.find().context("Element not found")
|
||||
}
|
||||
|
||||
/// Click on a UI element
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn click_element(
|
||||
&self,
|
||||
app_name: &str,
|
||||
role: &str,
|
||||
title: Option<&str>,
|
||||
identifier: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let element = self.find_element(app_name, role, title, identifier)?;
|
||||
|
||||
// Perform the press action
|
||||
let action_name = CFString::new("AXPress");
|
||||
element
|
||||
.perform_action(&action_name)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to perform press action: {:?}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn click_element(
|
||||
&self,
|
||||
_app_name: &str,
|
||||
_role: &str,
|
||||
_title: Option<&str>,
|
||||
_identifier: Option<&str>,
|
||||
) -> Result<()> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
/// Set the value of a UI element
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn set_value(
|
||||
&self,
|
||||
app_name: &str,
|
||||
role: &str,
|
||||
value: &str,
|
||||
title: Option<&str>,
|
||||
identifier: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let element = self.find_element(app_name, role, title, identifier)?;
|
||||
|
||||
// Set the value - convert CFString to CFType
|
||||
let cf_value = CFString::new(value);
|
||||
|
||||
element.set_value(cf_value.as_CFType())
|
||||
.map_err(|e| anyhow::anyhow!("Failed to set value: {:?}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn set_value(
|
||||
&self,
|
||||
_app_name: &str,
|
||||
_role: &str,
|
||||
_value: &str,
|
||||
_title: Option<&str>,
|
||||
_identifier: Option<&str>,
|
||||
) -> Result<()> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
/// Get the value of a UI element
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn get_value(
|
||||
&self,
|
||||
app_name: &str,
|
||||
role: &str,
|
||||
title: Option<&str>,
|
||||
identifier: Option<&str>,
|
||||
) -> Result<String> {
|
||||
let element = self.find_element(app_name, role, title, identifier)?;
|
||||
|
||||
// Get the value
|
||||
let value_type = element.value()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get value: {:?}", e))?;
|
||||
|
||||
// Try to downcast to CFString
|
||||
if let Some(cf_string) = value_type.downcast::<CFString>() {
|
||||
Ok(cf_string.to_string())
|
||||
} else {
|
||||
// For non-string values, try to get a description
|
||||
Ok(format!("<non-string value>"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn get_value(
|
||||
&self,
|
||||
_app_name: &str,
|
||||
_role: &str,
|
||||
_title: Option<&str>,
|
||||
_identifier: Option<&str>,
|
||||
) -> Result<String> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
/// Type text into the currently focused element (uses system text input)
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn type_text(&self, app_name: &str, text: &str) -> Result<()> {
|
||||
use cocoa::base::{id, nil};
|
||||
use cocoa::foundation::NSString;
|
||||
use objc::{class, msg_send, sel, sel_impl};
|
||||
|
||||
// First, make sure the app is active
|
||||
self.activate_app(app_name)?;
|
||||
|
||||
// Wait for app to fully activate
|
||||
std::thread::sleep(std::time::Duration::from_millis(500));
|
||||
|
||||
// Send a Tab key to try to focus on a text field
|
||||
// This helps ensure something is focused before we paste
|
||||
let _ = self.press_key(app_name, "tab", vec![]);
|
||||
std::thread::sleep(std::time::Duration::from_millis(800));
|
||||
|
||||
// Save old clipboard, set new content, paste, then restore
|
||||
let old_content: id;
|
||||
unsafe {
|
||||
// Get the general pasteboard
|
||||
let pasteboard: id = msg_send![class!(NSPasteboard), generalPasteboard];
|
||||
|
||||
// Save current clipboard content
|
||||
let ns_string_type = NSString::alloc(nil).init_str("public.utf8-plain-text");
|
||||
old_content = msg_send![pasteboard, stringForType: ns_string_type];
|
||||
|
||||
// Clear and set new content
|
||||
let _: () = msg_send![pasteboard, clearContents];
|
||||
|
||||
let ns_string = NSString::alloc(nil).init_str(text);
|
||||
let ns_type = NSString::alloc(nil).init_str("public.utf8-plain-text");
|
||||
let _: bool = msg_send![pasteboard, setString:ns_string forType:ns_type];
|
||||
}
|
||||
|
||||
// Wait a moment for clipboard to update
|
||||
std::thread::sleep(std::time::Duration::from_millis(200));
|
||||
|
||||
// Paste using Cmd+V (outside unsafe block)
|
||||
self.press_key(app_name, "v", vec!["command"])?;
|
||||
|
||||
// Wait for paste to complete
|
||||
std::thread::sleep(std::time::Duration::from_millis(300));
|
||||
|
||||
// Restore old clipboard content if it existed
|
||||
unsafe {
|
||||
if old_content != nil {
|
||||
let pasteboard: id = msg_send![class!(NSPasteboard), generalPasteboard];
|
||||
let _: () = msg_send![pasteboard, clearContents];
|
||||
let ns_type = NSString::alloc(nil).init_str("public.utf8-plain-text");
|
||||
let _: bool = msg_send![pasteboard, setString:old_content forType:ns_type];
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn type_text(&self, _app_name: &str, _text: &str) -> Result<()> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
/// Focus on a text field or text area element
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn focus_element(
|
||||
&self,
|
||||
app_name: &str,
|
||||
role: &str,
|
||||
title: Option<&str>,
|
||||
identifier: Option<&str>,
|
||||
) -> Result<()> {
|
||||
let element = self.find_element(app_name, role, title, identifier)?;
|
||||
|
||||
// Set focused attribute to true
|
||||
use core_foundation::boolean::CFBoolean;
|
||||
let cf_true = CFBoolean::true_value();
|
||||
|
||||
element.set_attribute(&accessibility::AXAttribute::focused(), cf_true)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to focus element: {:?}", e))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Press a keyboard shortcut
|
||||
#[cfg(target_os = "macos")]
|
||||
pub fn press_key(
|
||||
&self,
|
||||
app_name: &str,
|
||||
key: &str,
|
||||
modifiers: Vec<&str>,
|
||||
) -> Result<()> {
|
||||
use core_graphics::event::{
|
||||
CGEvent, CGEventFlags, CGEventTapLocation,
|
||||
};
|
||||
use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
|
||||
|
||||
// First, make sure the app is active
|
||||
self.activate_app(app_name)?;
|
||||
|
||||
// Wait a bit for activation
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
|
||||
// Map key string to key code
|
||||
let key_code = Self::key_to_keycode(key)
|
||||
.ok_or_else(|| anyhow::anyhow!("Unknown key: {}", key))?;
|
||||
|
||||
// Map modifiers to flags
|
||||
let mut flags = CGEventFlags::CGEventFlagNull;
|
||||
for modifier in modifiers {
|
||||
match modifier.to_lowercase().as_str() {
|
||||
"command" | "cmd" => flags |= CGEventFlags::CGEventFlagCommand,
|
||||
"option" | "alt" => flags |= CGEventFlags::CGEventFlagAlternate,
|
||||
"control" | "ctrl" => flags |= CGEventFlags::CGEventFlagControl,
|
||||
"shift" => flags |= CGEventFlags::CGEventFlagShift,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Create event source
|
||||
let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState)
|
||||
.ok().context("Failed to create event source")?;
|
||||
|
||||
// Create key down event
|
||||
let key_down = CGEvent::new_keyboard_event(source.clone(), key_code, true)
|
||||
.ok().context("Failed to create key down event")?;
|
||||
key_down.set_flags(flags);
|
||||
|
||||
// Create key up event
|
||||
let key_up = CGEvent::new_keyboard_event(source, key_code, false)
|
||||
.ok().context("Failed to create key up event")?;
|
||||
key_up.set_flags(flags);
|
||||
|
||||
// Post events
|
||||
key_down.post(CGEventTapLocation::HID);
|
||||
std::thread::sleep(std::time::Duration::from_millis(50));
|
||||
key_up.post(CGEventTapLocation::HID);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub fn press_key(
|
||||
&self,
|
||||
_app_name: &str,
|
||||
_key: &str,
|
||||
_modifiers: Vec<&str>,
|
||||
) -> Result<()> {
|
||||
anyhow::bail!("Not supported on this platform")
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
fn key_to_keycode(key: &str) -> Option<u16> {
|
||||
// Map common keys to keycodes
|
||||
// See: https://eastmanreference.com/complete-list-of-applescript-key-codes
|
||||
match key.to_lowercase().as_str() {
|
||||
"a" => Some(0x00),
|
||||
"s" => Some(0x01),
|
||||
"d" => Some(0x02),
|
||||
"f" => Some(0x03),
|
||||
"h" => Some(0x04),
|
||||
"g" => Some(0x05),
|
||||
"z" => Some(0x06),
|
||||
"x" => Some(0x07),
|
||||
"c" => Some(0x08),
|
||||
"v" => Some(0x09),
|
||||
"b" => Some(0x0B),
|
||||
"q" => Some(0x0C),
|
||||
"w" => Some(0x0D),
|
||||
"e" => Some(0x0E),
|
||||
"r" => Some(0x0F),
|
||||
"y" => Some(0x10),
|
||||
"t" => Some(0x11),
|
||||
"1" => Some(0x12),
|
||||
"2" => Some(0x13),
|
||||
"3" => Some(0x14),
|
||||
"4" => Some(0x15),
|
||||
"6" => Some(0x16),
|
||||
"5" => Some(0x17),
|
||||
"=" => Some(0x18),
|
||||
"9" => Some(0x19),
|
||||
"7" => Some(0x1A),
|
||||
"-" => Some(0x1B),
|
||||
"8" => Some(0x1C),
|
||||
"0" => Some(0x1D),
|
||||
"]" => Some(0x1E),
|
||||
"o" => Some(0x1F),
|
||||
"u" => Some(0x20),
|
||||
"[" => Some(0x21),
|
||||
"i" => Some(0x22),
|
||||
"p" => Some(0x23),
|
||||
"return" | "enter" => Some(0x24),
|
||||
"l" => Some(0x25),
|
||||
"j" => Some(0x26),
|
||||
"'" => Some(0x27),
|
||||
"k" => Some(0x28),
|
||||
";" => Some(0x29),
|
||||
"\\" => Some(0x2A),
|
||||
"," => Some(0x2B),
|
||||
"/" => Some(0x2C),
|
||||
"n" => Some(0x2D),
|
||||
"m" => Some(0x2E),
|
||||
"." => Some(0x2F),
|
||||
"tab" => Some(0x30),
|
||||
"space" => Some(0x31),
|
||||
"`" => Some(0x32),
|
||||
"delete" | "backspace" => Some(0x33),
|
||||
"escape" | "esc" => Some(0x35),
|
||||
"f1" => Some(0x7A),
|
||||
"f2" => Some(0x78),
|
||||
"f3" => Some(0x63),
|
||||
"f4" => Some(0x76),
|
||||
"f5" => Some(0x60),
|
||||
"f6" => Some(0x61),
|
||||
"f7" => Some(0x62),
|
||||
"f8" => Some(0x64),
|
||||
"f9" => Some(0x65),
|
||||
"f10" => Some(0x6D),
|
||||
"f11" => Some(0x67),
|
||||
"f12" => Some(0x6F),
|
||||
"left" => Some(0x7B),
|
||||
"right" => Some(0x7C),
|
||||
"down" => Some(0x7D),
|
||||
"up" => Some(0x7E),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
struct ElementCollector<'a> {
|
||||
role_filter: Option<String>,
|
||||
title_filter: Option<String>,
|
||||
identifier_filter: Option<String>,
|
||||
results: std::cell::RefCell<&'a mut Vec<AXElement>>,
|
||||
depth: std::cell::Cell<usize>,
|
||||
}
|
||||
|
||||
#[cfg(target_os = "macos")]
|
||||
impl<'a> TreeVisitor for ElementCollector<'a> {
|
||||
fn enter_element(&self, element: &AXUIElement) -> TreeWalkerFlow {
|
||||
self.depth.set(self.depth.get() + 1);
|
||||
|
||||
if self.depth.get() > 20 {
|
||||
return TreeWalkerFlow::SkipSubtree;
|
||||
}
|
||||
|
||||
// Get element properties
|
||||
let role = element.role()
|
||||
.ok()
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
|
||||
let title = element.title()
|
||||
.ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let identifier = element.identifier()
|
||||
.ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
// Check if this element matches the filters
|
||||
let role_matches = self.role_filter.as_ref().map_or(true, |r| role.contains(r));
|
||||
let title_matches = self.title_filter.as_ref().map_or(true, |t| {
|
||||
title.as_ref().map_or(false, |title_str| title_str.contains(t))
|
||||
});
|
||||
let identifier_matches = self.identifier_filter.as_ref().map_or(true, |id| {
|
||||
identifier.as_ref().map_or(false, |id_str| id_str.contains(id))
|
||||
});
|
||||
|
||||
if role_matches && title_matches && identifier_matches {
|
||||
// Get additional properties
|
||||
let value = element.value()
|
||||
.ok()
|
||||
.and_then(|v| {
|
||||
v.downcast::<CFString>().map(|s| s.to_string())
|
||||
});
|
||||
|
||||
let label = element.description()
|
||||
.ok()
|
||||
.map(|s| s.to_string());
|
||||
|
||||
let enabled = element.enabled()
|
||||
.ok()
|
||||
.map(|b| b.into())
|
||||
.unwrap_or(false);
|
||||
|
||||
let focused = element.focused()
|
||||
.ok()
|
||||
.map(|b| b.into())
|
||||
.unwrap_or(false);
|
||||
|
||||
// Count children
|
||||
let children_count = element.children()
|
||||
.ok()
|
||||
.map(|arr| arr.len() as usize)
|
||||
.unwrap_or(0);
|
||||
|
||||
self.results.borrow_mut().push(AXElement {
|
||||
role,
|
||||
title,
|
||||
value,
|
||||
label,
|
||||
identifier,
|
||||
enabled,
|
||||
focused,
|
||||
position: None,
|
||||
size: None,
|
||||
children_count,
|
||||
});
|
||||
}
|
||||
|
||||
TreeWalkerFlow::Continue
|
||||
}
|
||||
|
||||
fn exit_element(&self, _element: &AXUIElement) {
|
||||
self.depth.set(self.depth.get() - 1);
|
||||
}
|
||||
}
|
||||
65
crates/g3-computer-control/src/macax/mod.rs
Normal file
65
crates/g3-computer-control/src/macax/mod.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
pub mod controller;
|
||||
|
||||
pub use controller::MacAxController;
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
/// Represents an accessibility element in the UI hierarchy
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AXElement {
|
||||
pub role: String,
|
||||
pub title: Option<String>,
|
||||
pub value: Option<String>,
|
||||
pub label: Option<String>,
|
||||
pub identifier: Option<String>,
|
||||
pub enabled: bool,
|
||||
pub focused: bool,
|
||||
pub position: Option<(f64, f64)>,
|
||||
pub size: Option<(f64, f64)>,
|
||||
pub children_count: usize,
|
||||
}
|
||||
|
||||
/// Represents a macOS application
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AXApplication {
|
||||
pub name: String,
|
||||
pub bundle_id: Option<String>,
|
||||
pub pid: i32,
|
||||
}
|
||||
|
||||
impl AXElement {
|
||||
/// Convert to a human-readable string representation
|
||||
pub fn to_string(&self) -> String {
|
||||
let mut parts = vec![format!("Role: {}", self.role)];
|
||||
|
||||
if let Some(ref title) = self.title {
|
||||
parts.push(format!("Title: {}", title));
|
||||
}
|
||||
if let Some(ref value) = self.value {
|
||||
parts.push(format!("Value: {}", value));
|
||||
}
|
||||
if let Some(ref label) = self.label {
|
||||
parts.push(format!("Label: {}", label));
|
||||
}
|
||||
if let Some(ref id) = self.identifier {
|
||||
parts.push(format!("ID: {}", id));
|
||||
}
|
||||
|
||||
parts.push(format!("Enabled: {}", self.enabled));
|
||||
parts.push(format!("Focused: {}", self.focused));
|
||||
|
||||
if let Some((x, y)) = self.position {
|
||||
parts.push(format!("Position: ({:.0}, {:.0})", x, y));
|
||||
}
|
||||
if let Some((w, h)) = self.size {
|
||||
parts.push(format!("Size: ({:.0}, {:.0})", w, h));
|
||||
}
|
||||
|
||||
parts.push(format!("Children: {}", self.children_count));
|
||||
|
||||
parts.join(", ")
|
||||
}
|
||||
}
|
||||
37
crates/g3-computer-control/src/macax/tests.rs
Normal file
37
crates/g3-computer-control/src/macax/tests.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::{AXElement, MacAxController};
|
||||
|
||||
#[test]
|
||||
fn test_ax_element_to_string() {
|
||||
let element = AXElement {
|
||||
role: "button".to_string(),
|
||||
title: Some("Click Me".to_string()),
|
||||
value: None,
|
||||
label: Some("Submit Button".to_string()),
|
||||
identifier: Some("submitBtn".to_string()),
|
||||
enabled: true,
|
||||
focused: false,
|
||||
position: Some((100.0, 200.0)),
|
||||
size: Some((80.0, 30.0)),
|
||||
children_count: 0,
|
||||
};
|
||||
|
||||
let string_repr = element.to_string();
|
||||
assert!(string_repr.contains("Role: button"));
|
||||
assert!(string_repr.contains("Title: Click Me"));
|
||||
assert!(string_repr.contains("Label: Submit Button"));
|
||||
assert!(string_repr.contains("ID: submitBtn"));
|
||||
assert!(string_repr.contains("Enabled: true"));
|
||||
assert!(string_repr.contains("Position: (100, 200)"));
|
||||
assert!(string_repr.contains("Size: (80, 30)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_controller_creation() {
|
||||
// Just test that we can create a controller
|
||||
// Actual functionality requires macOS and permissions
|
||||
let result = MacAxController::new();
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
}
|
||||
26
crates/g3-computer-control/src/ocr/mod.rs
Normal file
26
crates/g3-computer-control/src/ocr/mod.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
use crate::types::TextLocation;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
/// OCR engine trait for text recognition with bounding boxes
|
||||
#[async_trait]
|
||||
pub trait OCREngine: Send + Sync {
|
||||
/// Extract text with locations from an image file
|
||||
async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>>;
|
||||
|
||||
/// Get the name of the OCR engine
|
||||
fn name(&self) -> &str;
|
||||
}
|
||||
|
||||
// Platform-specific modules
|
||||
#[cfg(target_os = "macos")]
|
||||
pub mod vision;
|
||||
|
||||
pub mod tesseract;
|
||||
|
||||
// Re-export the default OCR engine for the platform
|
||||
#[cfg(target_os = "macos")]
|
||||
pub use vision::AppleVisionOCR as DefaultOCR;
|
||||
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
pub use tesseract::TesseractOCR as DefaultOCR;
|
||||
84
crates/g3-computer-control/src/ocr/tesseract.rs
Normal file
84
crates/g3-computer-control/src/ocr/tesseract.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
use super::OCREngine;
|
||||
use crate::types::TextLocation;
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
|
||||
/// Tesseract OCR engine (fallback/cross-platform)
|
||||
pub struct TesseractOCR;
|
||||
|
||||
impl TesseractOCR {
|
||||
pub fn new() -> Result<Self> {
|
||||
// Check if tesseract is available
|
||||
let tesseract_check = std::process::Command::new("which")
|
||||
.arg("tesseract")
|
||||
.output();
|
||||
|
||||
if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
|
||||
anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
|
||||
To install tesseract:\n macOS: brew install tesseract\n \
|
||||
Linux: sudo apt-get install tesseract-ocr (Ubuntu/Debian)\n \
|
||||
sudo yum install tesseract (RHEL/CentOS)\n \
|
||||
Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki\n\n\
|
||||
After installation, restart your terminal and try again.");
|
||||
}
|
||||
|
||||
Ok(Self)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl OCREngine for TesseractOCR {
|
||||
async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
|
||||
// Use tesseract CLI with TSV output to get bounding boxes
|
||||
let output = std::process::Command::new("tesseract")
|
||||
.arg(path)
|
||||
.arg("stdout")
|
||||
.arg("tsv")
|
||||
.output()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to run tesseract: {}", e))?;
|
||||
|
||||
if !output.status.success() {
|
||||
anyhow::bail!("Tesseract failed: {}", String::from_utf8_lossy(&output.stderr));
|
||||
}
|
||||
|
||||
let tsv_text = String::from_utf8_lossy(&output.stdout);
|
||||
let mut locations = Vec::new();
|
||||
|
||||
// Parse TSV output (skip header line)
|
||||
for (i, line) in tsv_text.lines().enumerate() {
|
||||
if i == 0 { continue; } // Skip header
|
||||
|
||||
let parts: Vec<&str> = line.split('\t').collect();
|
||||
if parts.len() >= 12 {
|
||||
// TSV format: level, page_num, block_num, par_num, line_num, word_num,
|
||||
// left, top, width, height, conf, text
|
||||
if let (Ok(x), Ok(y), Ok(w), Ok(h), Ok(conf), text) = (
|
||||
parts[6].parse::<i32>(),
|
||||
parts[7].parse::<i32>(),
|
||||
parts[8].parse::<i32>(),
|
||||
parts[9].parse::<i32>(),
|
||||
parts[10].parse::<f32>(),
|
||||
parts[11],
|
||||
) {
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() && conf > 0.0 {
|
||||
locations.push(TextLocation {
|
||||
text: trimmed.to_string(),
|
||||
x,
|
||||
y,
|
||||
width: w,
|
||||
height: h,
|
||||
confidence: conf / 100.0, // Convert from 0-100 to 0-1
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(locations)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"Tesseract OCR"
|
||||
}
|
||||
}
|
||||
103
crates/g3-computer-control/src/ocr/vision.rs
Normal file
103
crates/g3-computer-control/src/ocr/vision.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use super::OCREngine;
|
||||
use crate::types::TextLocation;
|
||||
use anyhow::{Result, Context};
|
||||
use async_trait::async_trait;
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::os::raw::{c_char, c_float, c_uint};
|
||||
|
||||
// FFI bindings to Swift VisionBridge
|
||||
#[repr(C)]
|
||||
struct VisionTextBox {
|
||||
text: *const c_char,
|
||||
text_len: c_uint,
|
||||
x: i32,
|
||||
y: i32,
|
||||
width: i32,
|
||||
height: i32,
|
||||
confidence: c_float,
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn vision_recognize_text(
|
||||
image_path: *const c_char,
|
||||
image_path_len: c_uint,
|
||||
out_boxes: *mut *mut std::ffi::c_void,
|
||||
out_count: *mut c_uint,
|
||||
) -> bool;
|
||||
|
||||
fn vision_free_boxes(boxes: *mut std::ffi::c_void, count: c_uint);
|
||||
}
|
||||
|
||||
/// Apple Vision Framework OCR engine
|
||||
pub struct AppleVisionOCR;
|
||||
|
||||
impl AppleVisionOCR {
|
||||
pub fn new() -> Result<Self> {
|
||||
Ok(Self)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl OCREngine for AppleVisionOCR {
|
||||
async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
|
||||
// Convert path to C string
|
||||
let c_path = CString::new(path)
|
||||
.context("Failed to convert path to C string")?;
|
||||
|
||||
let mut boxes_ptr: *mut std::ffi::c_void = std::ptr::null_mut();
|
||||
let mut count: c_uint = 0;
|
||||
|
||||
// Call Swift Vision API
|
||||
let success = unsafe {
|
||||
vision_recognize_text(
|
||||
c_path.as_ptr(),
|
||||
path.len() as c_uint,
|
||||
&mut boxes_ptr,
|
||||
&mut count,
|
||||
)
|
||||
};
|
||||
|
||||
if !success || boxes_ptr.is_null() {
|
||||
anyhow::bail!("Apple Vision OCR failed");
|
||||
}
|
||||
|
||||
// Convert C array to Rust Vec
|
||||
let mut locations = Vec::new();
|
||||
|
||||
unsafe {
|
||||
let typed_boxes = boxes_ptr as *const VisionTextBox;
|
||||
let boxes_slice = std::slice::from_raw_parts(typed_boxes, count as usize);
|
||||
|
||||
for box_data in boxes_slice {
|
||||
// Convert C string to Rust String
|
||||
let text = if !box_data.text.is_null() {
|
||||
CStr::from_ptr(box_data.text)
|
||||
.to_string_lossy()
|
||||
.into_owned()
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
if !text.is_empty() {
|
||||
locations.push(TextLocation {
|
||||
text,
|
||||
x: box_data.x,
|
||||
y: box_data.y,
|
||||
width: box_data.width,
|
||||
height: box_data.height,
|
||||
confidence: box_data.confidence,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Free the C array
|
||||
vision_free_boxes(boxes_ptr, count);
|
||||
}
|
||||
|
||||
Ok(locations)
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"Apple Vision Framework"
|
||||
}
|
||||
}
|
||||
@@ -63,10 +63,15 @@ impl ComputerController for LinuxController {
|
||||
}
|
||||
|
||||
async fn take_screenshot(&self, _path: &str, _region: Option<Rect>, _window_id: Option<&str>) -> Result<()> {
|
||||
// Enforce that window_id must be provided
|
||||
if _window_id.is_none() {
|
||||
anyhow::bail!("window_id is required. You must specify which window to capture (e.g., 'Firefox', 'Terminal', 'gedit'). Use list_windows to see available windows.");
|
||||
}
|
||||
|
||||
anyhow::bail!("Linux implementation not yet available")
|
||||
}
|
||||
|
||||
async fn extract_text_from_screen(&self, _region: Rect) -> Result<OCRResult> {
|
||||
async fn extract_text_from_screen(&self, _region: Rect, _window_id: &str) -> Result<String> {
|
||||
anyhow::bail!("Linux implementation not yet available")
|
||||
}
|
||||
|
||||
|
||||
@@ -1,22 +1,37 @@
|
||||
use crate::{ComputerController, types::Rect};
|
||||
use anyhow::Result;
|
||||
use crate::{ComputerController, types::{Rect, TextLocation}};
|
||||
use crate::ocr::{OCREngine, DefaultOCR};
|
||||
use anyhow::{Result, Context};
|
||||
use async_trait::async_trait;
|
||||
use std::path::Path;
|
||||
use tesseract::Tesseract;
|
||||
use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
|
||||
use core_foundation::dictionary::CFDictionary;
|
||||
use core_foundation::string::CFString;
|
||||
use core_foundation::base::{TCFType, ToVoid};
|
||||
use core_foundation::array::CFArray;
|
||||
|
||||
pub struct MacOSController {
|
||||
// Empty struct for now
|
||||
ocr_engine: Box<dyn OCREngine>,
|
||||
#[allow(dead_code)]
|
||||
ocr_name: String,
|
||||
}
|
||||
|
||||
impl MacOSController {
|
||||
pub fn new() -> Result<Self> {
|
||||
Ok(Self {})
|
||||
let ocr = Box::new(DefaultOCR::new()?);
|
||||
let ocr_name = ocr.name().to_string();
|
||||
tracing::info!("Initialized macOS controller with OCR engine: {}", ocr_name);
|
||||
Ok(Self { ocr_engine: ocr, ocr_name })
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ComputerController for MacOSController {
|
||||
async fn take_screenshot(&self, path: &str, region: Option<Rect>, window_id: Option<&str>) -> Result<()> {
|
||||
// Enforce that window_id must be provided
|
||||
if window_id.is_none() {
|
||||
return Err(anyhow::anyhow!("window_id is required. You must specify which window to capture (e.g., 'Safari', 'Terminal', 'Google Chrome'). Use list_windows to see available windows."));
|
||||
}
|
||||
|
||||
// Determine the temporary directory for screenshots
|
||||
let temp_dir = std::env::var("TMPDIR")
|
||||
.or_else(|_| std::env::var("HOME").map(|h| format!("{}/tmp", h)))
|
||||
@@ -37,48 +52,134 @@ impl ComputerController for MacOSController {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
let mut cmd = std::process::Command::new("screencapture");
|
||||
let app_name = window_id.unwrap(); // Safe because we checked is_none() above
|
||||
|
||||
// Add flags
|
||||
// Get the window ID for the specified application
|
||||
let cg_window_id = unsafe {
|
||||
let window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
);
|
||||
|
||||
let array = CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
|
||||
let count = array.len();
|
||||
|
||||
let mut found_window_id: Option<(u32, String)> = None; // (id, owner)
|
||||
let app_name_lower = app_name.to_lowercase();
|
||||
|
||||
for i in 0..count {
|
||||
let dict = array.get(i).unwrap();
|
||||
|
||||
// Get owner name
|
||||
let owner_key = CFString::from_static_string("kCGWindowOwnerName");
|
||||
let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
|
||||
let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
s.to_string()
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
tracing::debug!("Checking window: owner='{}', looking for '{}'", owner, app_name);
|
||||
let owner_lower = owner.to_lowercase();
|
||||
|
||||
// Normalize by removing spaces for exact matching
|
||||
let app_name_normalized = app_name_lower.replace(" ", "");
|
||||
let owner_normalized = owner_lower.replace(" ", "");
|
||||
|
||||
// ONLY accept exact matches (case-insensitive, with or without spaces)
|
||||
// This prevents "Goose" from matching "GooseStudio"
|
||||
let is_match = owner_lower == app_name_lower || owner_normalized == app_name_normalized;
|
||||
|
||||
if is_match {
|
||||
// Get window ID
|
||||
let window_id_key = CFString::from_static_string("kCGWindowNumber");
|
||||
if let Some(value) = dict.find(window_id_key.to_void()) {
|
||||
let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
if let Some(id) = num.to_i64() {
|
||||
// Get window layer to filter out menu bar windows
|
||||
let layer_key = CFString::from_static_string("kCGWindowLayer");
|
||||
let layer: i32 = if let Some(value) = dict.find(layer_key.to_void()) {
|
||||
let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
num.to_i32().unwrap_or(0)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// Get window bounds to verify it's a real window
|
||||
let bounds_key = CFString::from_static_string("kCGWindowBounds");
|
||||
let has_real_bounds = if let Some(value) = dict.find(bounds_key.to_void()) {
|
||||
let bounds_dict: CFDictionary = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
let width_key = CFString::from_static_string("Width");
|
||||
let height_key = CFString::from_static_string("Height");
|
||||
|
||||
if let (Some(w_val), Some(h_val)) = (
|
||||
bounds_dict.find(width_key.to_void()),
|
||||
bounds_dict.find(height_key.to_void()),
|
||||
) {
|
||||
let w_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*w_val as *const _);
|
||||
let h_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*h_val as *const _);
|
||||
let width = w_num.to_f64().unwrap_or(0.0);
|
||||
let height = h_num.to_f64().unwrap_or(0.0);
|
||||
// Real windows should be at least 100x100 pixels
|
||||
width >= 100.0 && height >= 100.0
|
||||
} else {
|
||||
false
|
||||
}
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
// Only accept windows that are:
|
||||
// 1. At layer 0 (normal windows, not menu bar)
|
||||
// 2. Have real bounds (width and height >= 100)
|
||||
if layer == 0 && has_real_bounds {
|
||||
tracing::info!("Found valid window: ID {} for app '{}' (layer={}, bounds valid)", id, owner, layer);
|
||||
found_window_id = Some((id as u32, owner.clone()));
|
||||
break;
|
||||
} else {
|
||||
tracing::debug!("Skipping window ID {} for '{}': layer={}, has_real_bounds={}", id, owner, layer, has_real_bounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
found_window_id
|
||||
};
|
||||
|
||||
let (cg_window_id, matched_owner) = cg_window_id.ok_or_else(|| {
|
||||
anyhow::anyhow!("Could not find window for application '{}'. Use list_windows to see available windows.", app_name)
|
||||
})?;
|
||||
tracing::info!("Taking screenshot of window ID {} for app '{}'", cg_window_id, matched_owner);
|
||||
|
||||
// Use screencapture with the window ID for now
|
||||
// TODO: Implement direct CGWindowListCreateImage approach with proper image saving
|
||||
let mut cmd = std::process::Command::new("screencapture");
|
||||
cmd.arg("-x"); // No sound
|
||||
cmd.arg("-l");
|
||||
cmd.arg(cg_window_id.to_string());
|
||||
|
||||
if let Some(region) = region {
|
||||
// Capture specific region: -R x,y,width,height
|
||||
cmd.arg("-R");
|
||||
cmd.arg(format!("{},{},{},{}", region.x, region.y, region.width, region.height));
|
||||
}
|
||||
|
||||
if let Some(app_name) = window_id {
|
||||
// Capture specific window by app name
|
||||
// Use AppleScript to get window ID
|
||||
let script = format!(r#"tell application "{}" to id of window 1"#, app_name);
|
||||
let output = std::process::Command::new("osascript")
|
||||
.arg("-e")
|
||||
.arg(&script)
|
||||
.output()?;
|
||||
|
||||
if output.status.success() {
|
||||
let window_id_str = String::from_utf8_lossy(&output.stdout).trim().to_string();
|
||||
cmd.arg(format!("-l{}", window_id_str));
|
||||
}
|
||||
}
|
||||
|
||||
cmd.arg(&final_path);
|
||||
|
||||
let screenshot_result = cmd.output()?;
|
||||
|
||||
if !screenshot_result.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&screenshot_result.stderr);
|
||||
return Err(anyhow::anyhow!("screencapture failed: {}", stderr));
|
||||
return Err(anyhow::anyhow!("screencapture failed for window {}: {}", cg_window_id, stderr));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn extract_text_from_screen(&self, region: Rect) -> Result<String> {
|
||||
async fn extract_text_from_screen(&self, region: Rect, window_id: &str) -> Result<String> {
|
||||
// Take screenshot of region first
|
||||
let temp_path = format!("/tmp/g3_ocr_{}.png", uuid::Uuid::new_v4());
|
||||
self.take_screenshot(&temp_path, Some(region), None).await?;
|
||||
self.take_screenshot(&temp_path, Some(region), Some(window_id)).await?;
|
||||
|
||||
// Extract text from the screenshot
|
||||
let result = self.extract_text_from_image(&temp_path).await?;
|
||||
@@ -90,36 +191,317 @@ impl ComputerController for MacOSController {
|
||||
}
|
||||
|
||||
async fn extract_text_from_image(&self, path: &str) -> Result<String> {
|
||||
// Check if tesseract is available on the system
|
||||
let tesseract_check = std::process::Command::new("which")
|
||||
.arg("tesseract")
|
||||
.output();
|
||||
|
||||
if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
|
||||
anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
|
||||
To install tesseract:\n macOS: brew install tesseract\n \
|
||||
Linux: sudo apt-get install tesseract-ocr (Ubuntu/Debian)\n \
|
||||
sudo yum install tesseract (RHEL/CentOS)\n \
|
||||
Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki\n\n\
|
||||
After installation, restart your terminal and try again.");
|
||||
// Extract all text and concatenate
|
||||
let locations = self.ocr_engine.extract_text_with_locations(path).await?;
|
||||
Ok(locations.iter().map(|loc| loc.text.as_str()).collect::<Vec<_>>().join(" "))
|
||||
}
|
||||
|
||||
// Initialize Tesseract
|
||||
let tess = Tesseract::new(None, Some("eng"))
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!("Failed to initialize Tesseract: {}\n\n\
|
||||
This usually means:\n1. Tesseract is not properly installed\n\
|
||||
2. Language data files are missing\n\nTo fix:\n \
|
||||
macOS: brew reinstall tesseract\n \
|
||||
Linux: sudo apt-get install tesseract-ocr-eng\n \
|
||||
Windows: Reinstall tesseract and ensure language files are included", e)
|
||||
})?;
|
||||
async fn extract_text_with_locations(&self, path: &str) -> Result<Vec<TextLocation>> {
|
||||
// Use the OCR engine
|
||||
self.ocr_engine.extract_text_with_locations(path).await
|
||||
}
|
||||
|
||||
let text = tess.set_image(path)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to load image '{}': {}", path, e))?
|
||||
.get_text()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to extract text from image: {}", e))?;
|
||||
async fn find_text_in_app(&self, app_name: &str, search_text: &str) -> Result<Option<TextLocation>> {
|
||||
// Take screenshot of specific app window
|
||||
let home = std::env::var("HOME").unwrap_or_else(|_| "/tmp".to_string());
|
||||
let temp_path = format!("{}/tmp/g3_find_text_{}_{}.png", home, app_name, uuid::Uuid::new_v4());
|
||||
self.take_screenshot(&temp_path, None, Some(app_name)).await?;
|
||||
|
||||
Ok(text)
|
||||
// Get screenshot dimensions before we delete it
|
||||
let screenshot_dims = get_image_dimensions(&temp_path)?;
|
||||
|
||||
// Extract all text with locations
|
||||
let locations = self.extract_text_with_locations(&temp_path).await?;
|
||||
|
||||
// Get window bounds to calculate coordinate transformation
|
||||
let window_bounds = self.get_window_bounds(app_name)?;
|
||||
|
||||
// Clean up temp file
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
|
||||
// Find matching text (case-insensitive)
|
||||
let search_lower = search_text.to_lowercase();
|
||||
for location in locations {
|
||||
if location.text.to_lowercase().contains(&search_lower) {
|
||||
// Transform coordinates from screenshot space to screen space
|
||||
let transformed = transform_screenshot_to_screen_coords(
|
||||
location,
|
||||
window_bounds,
|
||||
screenshot_dims,
|
||||
);
|
||||
return Ok(Some(transformed));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn move_mouse(&self, x: i32, y: i32) -> Result<()> {
|
||||
use core_graphics::event::{
|
||||
CGEvent, CGEventTapLocation, CGEventType, CGMouseButton,
|
||||
};
|
||||
use core_graphics::event_source::{
|
||||
CGEventSource, CGEventSourceStateID,
|
||||
};
|
||||
use core_graphics::geometry::CGPoint;
|
||||
|
||||
let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState)
|
||||
.ok().context("Failed to create event source")?;
|
||||
|
||||
let event = CGEvent::new_mouse_event(
|
||||
source,
|
||||
CGEventType::MouseMoved,
|
||||
CGPoint::new(x as f64, y as f64),
|
||||
CGMouseButton::Left,
|
||||
).ok().context("Failed to create mouse event")?;
|
||||
|
||||
event.post(CGEventTapLocation::HID);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn click_at(&self, x: i32, y: i32, _app_name: Option<&str>) -> Result<()> {
|
||||
use core_graphics::event::{
|
||||
CGEvent, CGEventTapLocation, CGEventType, CGMouseButton,
|
||||
};
|
||||
use core_graphics::event_source::{
|
||||
CGEventSource, CGEventSourceStateID,
|
||||
};
|
||||
use core_graphics::geometry::CGPoint;
|
||||
use core_graphics::display::CGDisplay;
|
||||
|
||||
// IMPORTANT: Coordinates passed here are in NSScreen/CGWindowListCopyWindowInfo space
|
||||
// (Y=0 at BOTTOM, increases UPWARD)
|
||||
// But CGEvent uses a different coordinate system (Y=0 at TOP, increases DOWNWARD)
|
||||
// We need to convert: CGEvent.y = screenHeight - NSScreen.y
|
||||
|
||||
let screen_height = CGDisplay::main().pixels_high() as i32;
|
||||
let cgevent_x = x;
|
||||
let cgevent_y = screen_height - y;
|
||||
|
||||
tracing::debug!("click_at: NSScreen coords ({}, {}) -> CGEvent coords ({}, {}) [screen_height={}]",
|
||||
x, y, cgevent_x, cgevent_y, screen_height);
|
||||
|
||||
let (global_x, global_y) = (cgevent_x, cgevent_y);
|
||||
|
||||
let point = CGPoint::new(global_x as f64, global_y as f64);
|
||||
|
||||
let source = CGEventSource::new(CGEventSourceStateID::HIDSystemState)
|
||||
.ok().context("Failed to create event source")?;
|
||||
|
||||
// Move mouse to position first
|
||||
let move_event = CGEvent::new_mouse_event(
|
||||
source.clone(),
|
||||
CGEventType::MouseMoved,
|
||||
point,
|
||||
CGMouseButton::Left,
|
||||
).ok().context("Failed to create mouse move event")?;
|
||||
move_event.post(CGEventTapLocation::HID);
|
||||
|
||||
std::thread::sleep(std::time::Duration::from_millis(100));
|
||||
|
||||
// Mouse down
|
||||
let mouse_down = CGEvent::new_mouse_event(
|
||||
source.clone(),
|
||||
CGEventType::LeftMouseDown,
|
||||
point,
|
||||
CGMouseButton::Left,
|
||||
).ok().context("Failed to create mouse down event")?;
|
||||
mouse_down.post(CGEventTapLocation::HID);
|
||||
|
||||
std::thread::sleep(std::time::Duration::from_millis(50));
|
||||
|
||||
// Mouse up
|
||||
let mouse_up = CGEvent::new_mouse_event(
|
||||
source,
|
||||
CGEventType::LeftMouseUp,
|
||||
point,
|
||||
CGMouseButton::Left,
|
||||
).ok().context("Failed to create mouse up event")?;
|
||||
mouse_up.post(CGEventTapLocation::HID);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MacOSController {
|
||||
/// Get window bounds for an application (helper method)
|
||||
fn get_window_bounds(&self, app_name: &str) -> Result<(i32, i32, i32, i32)> {
|
||||
unsafe {
|
||||
let window_list = CGWindowListCopyWindowInfo(
|
||||
kCGWindowListOptionOnScreenOnly,
|
||||
kCGNullWindowID
|
||||
);
|
||||
|
||||
let array = CFArray::<CFDictionary>::wrap_under_create_rule(window_list);
|
||||
let count = array.len();
|
||||
|
||||
let app_name_lower = app_name.to_lowercase();
|
||||
|
||||
for i in 0..count {
|
||||
let dict = array.get(i).unwrap();
|
||||
|
||||
// Get owner name
|
||||
let owner_key = CFString::from_static_string("kCGWindowOwnerName");
|
||||
let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
|
||||
let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
s.to_string()
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let owner_lower = owner.to_lowercase();
|
||||
|
||||
// Normalize by removing spaces for exact matching
|
||||
let app_name_normalized = app_name_lower.replace(" ", "");
|
||||
let owner_normalized = owner_lower.replace(" ", "");
|
||||
|
||||
// ONLY accept exact matches (case-insensitive, with or without spaces)
|
||||
// This prevents "Goose" from matching "GooseStudio"
|
||||
let is_match = owner_lower == app_name_lower || owner_normalized == app_name_normalized;
|
||||
|
||||
if is_match {
|
||||
// Get window layer to filter out menu bar windows
|
||||
let layer_key = CFString::from_static_string("kCGWindowLayer");
|
||||
let layer: i32 = if let Some(value) = dict.find(layer_key.to_void()) {
|
||||
let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
num.to_i32().unwrap_or(0)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// Skip menu bar windows (layer >= 20)
|
||||
if layer >= 20 {
|
||||
tracing::debug!("Skipping window for '{}' at layer {} (menu bar)", owner, layer);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Get window bounds to verify it's a real window
|
||||
let bounds_key = CFString::from_static_string("kCGWindowBounds");
|
||||
if let Some(value) = dict.find(bounds_key.to_void()) {
|
||||
let bounds_dict: CFDictionary = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
|
||||
let x_key = CFString::from_static_string("X");
|
||||
let y_key = CFString::from_static_string("Y");
|
||||
let width_key = CFString::from_static_string("Width");
|
||||
let height_key = CFString::from_static_string("Height");
|
||||
|
||||
if let (Some(x_val), Some(y_val), Some(w_val), Some(h_val)) = (
|
||||
bounds_dict.find(x_key.to_void()),
|
||||
bounds_dict.find(y_key.to_void()),
|
||||
bounds_dict.find(width_key.to_void()),
|
||||
bounds_dict.find(height_key.to_void()),
|
||||
) {
|
||||
let x_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*x_val as *const _);
|
||||
let y_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*y_val as *const _);
|
||||
let w_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*w_val as *const _);
|
||||
let h_num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*h_val as *const _);
|
||||
|
||||
let x: i32 = x_num.to_i64().unwrap_or(0) as i32;
|
||||
let y: i32 = y_num.to_i64().unwrap_or(0) as i32;
|
||||
let w: i32 = w_num.to_i64().unwrap_or(0) as i32;
|
||||
let h: i32 = h_num.to_i64().unwrap_or(0) as i32;
|
||||
|
||||
// Only accept windows with real bounds (>= 100x100 pixels)
|
||||
if w >= 100 && h >= 100 {
|
||||
tracing::info!("Found valid window bounds for '{}': x={}, y={}, w={}, h={} (layer={})", owner, x, y, w, h, layer);
|
||||
return Ok((x, y, w, h));
|
||||
} else {
|
||||
tracing::debug!("Skipping window for '{}': too small ({}x{})", owner, w, h);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow::anyhow!("Could not find window bounds for '{}'", app_name))
|
||||
}
|
||||
}
|
||||
|
||||
/// Get image dimensions from a PNG file
|
||||
fn get_image_dimensions(path: &str) -> Result<(i32, i32)> {
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
let mut file = File::open(path)?;
|
||||
let mut buffer = vec![0u8; 24];
|
||||
file.read_exact(&mut buffer)?;
|
||||
|
||||
// PNG signature check
|
||||
if &buffer[0..8] != b"\x89PNG\r\n\x1a\n" {
|
||||
anyhow::bail!("Not a valid PNG file");
|
||||
}
|
||||
|
||||
// Read IHDR chunk (width and height are at bytes 16-23)
|
||||
let width = u32::from_be_bytes([buffer[16], buffer[17], buffer[18], buffer[19]]) as i32;
|
||||
let height = u32::from_be_bytes([buffer[20], buffer[21], buffer[22], buffer[23]]) as i32;
|
||||
|
||||
Ok((width, height))
|
||||
}
|
||||
|
||||
/// Transform coordinates from screenshot space to screen space
|
||||
///
|
||||
/// The screenshot is taken of a window, and Vision OCR returns coordinates
|
||||
/// relative to the screenshot image. We need to transform these to actual
|
||||
/// screen coordinates for clicking.
|
||||
///
|
||||
/// On Retina displays, screenshots are taken at 2x resolution, so we need
|
||||
/// to account for this scaling factor.
|
||||
fn transform_screenshot_to_screen_coords(
|
||||
location: TextLocation,
|
||||
window_bounds: (i32, i32, i32, i32), // (x, y, width, height) in screen space
|
||||
screenshot_dims: (i32, i32), // (width, height) in pixels
|
||||
) -> TextLocation {
|
||||
let (win_x, win_y, win_width, win_height) = window_bounds;
|
||||
let (screenshot_width, screenshot_height) = screenshot_dims;
|
||||
|
||||
// Calculate scale factors
|
||||
// On Retina displays, screenshot is typically 2x the window size
|
||||
let scale_x = win_width as f64 / screenshot_width as f64;
|
||||
let scale_y = win_height as f64 / screenshot_height as f64;
|
||||
|
||||
tracing::debug!("Transform: screenshot={}x{}, window={}x{} at ({},{}), scale=({:.2},{:.2})",
|
||||
screenshot_width, screenshot_height, win_width, win_height, win_x, win_y, scale_x, scale_y);
|
||||
|
||||
// Transform coordinates from image space to screen space
|
||||
// IMPORTANT: macOS screen coordinates have origin at BOTTOM-LEFT (Y increases upward)
|
||||
// Image coordinates have origin at TOP-LEFT (Y increases downward)
|
||||
// win_y is the BOTTOM of the window in screen coordinates
|
||||
// So we need to: (win_y + win_height) to get window TOP, then subtract screenshot_y
|
||||
let window_top_y = win_y + win_height;
|
||||
|
||||
tracing::debug!("[transform] Input location in image space: x={}, y={}, width={}, height={}",
|
||||
location.x, location.y, location.width, location.height);
|
||||
tracing::debug!("[transform] Scale factors: scale_x={:.4}, scale_y={:.4}", scale_x, scale_y);
|
||||
|
||||
let transformed_x = win_x + (location.x as f64 * scale_x) as i32;
|
||||
let transformed_y = window_top_y - (location.y as f64 * scale_y) as i32;
|
||||
let transformed_width = (location.width as f64 * scale_x) as i32;
|
||||
let transformed_height = (location.height as f64 * scale_y) as i32;
|
||||
|
||||
tracing::debug!("[transform] Calculation details:");
|
||||
tracing::debug!(" - transformed_x = {} + ({} * {:.4}) = {} + {:.2} = {}", win_x, location.x, scale_x, win_x, location.x as f64 * scale_x, transformed_x);
|
||||
tracing::debug!(" - transformed_width = ({} * {:.4}) = {:.2} -> {}", location.width, scale_x, location.width as f64 * scale_x, transformed_width);
|
||||
tracing::debug!(" - transformed_height = ({} * {:.4}) = {:.2} -> {}", location.height, scale_y, location.height as f64 * scale_y, transformed_height);
|
||||
|
||||
tracing::debug!("Transformed location: screenshot=({},{}) {}x{} -> screen=({},{}) {}x{}",
|
||||
location.x, location.y, location.width, location.height,
|
||||
transformed_x, transformed_y, transformed_width, transformed_height);
|
||||
|
||||
TextLocation {
|
||||
text: location.text,
|
||||
x: transformed_x,
|
||||
y: transformed_y,
|
||||
width: transformed_width,
|
||||
height: transformed_height,
|
||||
confidence: location.confidence,
|
||||
}
|
||||
}
|
||||
|
||||
#[path = "macos_window_matching_test.rs"]
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
@@ -1,425 +0,0 @@
|
||||
use crate::{ComputerController, types::*};
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use core_graphics::display::CGPoint;
|
||||
use core_graphics::event::{CGEvent, CGEventType, CGMouseButton, CGEventTapLocation};
|
||||
use core_graphics::event_source::{CGEventSource, CGEventSourceStateID};
|
||||
use std::path::Path;
|
||||
use tesseract::Tesseract;
|
||||
|
||||
// MacOSController doesn't store CGEventSource to avoid Send/Sync issues
|
||||
// We create it fresh for each operation
|
||||
pub struct MacOSController {
|
||||
// Empty struct - event source created per operation
|
||||
}
|
||||
|
||||
impl MacOSController {
|
||||
pub fn new() -> Result<Self> {
|
||||
// Test that we can create an event source
|
||||
let _event_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source. Make sure Accessibility permissions are granted."))?;
|
||||
Ok(Self {})
|
||||
}
|
||||
|
||||
fn key_to_keycode(&self, key: &str) -> Result<u16> {
|
||||
// Map key names to macOS keycodes
|
||||
let keycode = match key.to_lowercase().as_str() {
|
||||
"return" | "enter" => 36,
|
||||
"tab" => 48,
|
||||
"space" => 49,
|
||||
"delete" | "backspace" => 51,
|
||||
"escape" | "esc" => 53,
|
||||
"command" | "cmd" => 55,
|
||||
"shift" => 56,
|
||||
"capslock" => 57,
|
||||
"option" | "alt" => 58,
|
||||
"control" | "ctrl" => 59,
|
||||
"left" => 123,
|
||||
"right" => 124,
|
||||
"down" => 125,
|
||||
"up" => 126,
|
||||
_ => anyhow::bail!("Unknown key: {}", key),
|
||||
};
|
||||
Ok(keycode)
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ComputerController for MacOSController {
|
||||
async fn move_mouse(&self, x: i32, y: i32) -> Result<()> {
|
||||
let event_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source"))?;
|
||||
let point = CGPoint::new(x as f64, y as f64);
|
||||
let event = CGEvent::new_mouse_event(
|
||||
event_source,
|
||||
CGEventType::MouseMoved,
|
||||
point,
|
||||
CGMouseButton::Left,
|
||||
).map_err(|_| anyhow::anyhow!("Failed to create mouse move event"))?;
|
||||
|
||||
event.post(CGEventTapLocation::HID);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn click(&self, button: MouseButton) -> Result<()> {
|
||||
let (cg_button, down_type, up_type) = match button {
|
||||
MouseButton::Left => (CGMouseButton::Left, CGEventType::LeftMouseDown, CGEventType::LeftMouseUp),
|
||||
MouseButton::Right => (CGMouseButton::Right, CGEventType::RightMouseDown, CGEventType::RightMouseUp),
|
||||
MouseButton::Middle => (CGMouseButton::Center, CGEventType::OtherMouseDown, CGEventType::OtherMouseUp),
|
||||
};
|
||||
|
||||
let point = {
|
||||
// Get current mouse position
|
||||
let temp_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source"))?;
|
||||
let event = CGEvent::new(temp_source)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to get mouse position"))?;
|
||||
let p = event.location();
|
||||
p
|
||||
};
|
||||
|
||||
{
|
||||
let event_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source"))?;
|
||||
|
||||
// Mouse down
|
||||
let down_event = CGEvent::new_mouse_event(
|
||||
event_source,
|
||||
down_type,
|
||||
point,
|
||||
cg_button,
|
||||
).map_err(|_| anyhow::anyhow!("Failed to create mouse down event"))?;
|
||||
down_event.post(CGEventTapLocation::HID);
|
||||
} // event_source and down_event dropped here
|
||||
|
||||
// Small delay
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
|
||||
|
||||
{
|
||||
let event_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source"))?;
|
||||
|
||||
let up_event = CGEvent::new_mouse_event(
|
||||
event_source,
|
||||
up_type,
|
||||
point,
|
||||
cg_button,
|
||||
).map_err(|_| anyhow::anyhow!("Failed to create mouse up event"))?;
|
||||
up_event.post(CGEventTapLocation::HID);
|
||||
} // event_source and up_event dropped here
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn double_click(&self, button: MouseButton) -> Result<()> {
|
||||
self.click(button).await?;
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
|
||||
self.click(button).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn type_text(&self, text: &str) -> Result<()> {
|
||||
for ch in text.chars() {
|
||||
{
|
||||
let event_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source"))?;
|
||||
|
||||
// Create keyboard event for character
|
||||
let event = CGEvent::new_keyboard_event(
|
||||
event_source,
|
||||
0, // keycode (0 for unicode)
|
||||
true,
|
||||
).map_err(|_| anyhow::anyhow!("Failed to create keyboard event"))?;
|
||||
|
||||
// Set unicode string
|
||||
let mut utf16_buf = [0u16; 2];
|
||||
let utf16_slice = ch.encode_utf16(&mut utf16_buf);
|
||||
let utf16_chars: Vec<u16> = utf16_slice.iter().copied().collect();
|
||||
|
||||
event.set_string_from_utf16_unchecked(utf16_chars.as_slice());
|
||||
event.post(CGEventTapLocation::HID);
|
||||
} // event_source and event dropped here
|
||||
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn press_key(&self, key: &str) -> Result<()> {
|
||||
let keycode = self.key_to_keycode(key)?;
|
||||
|
||||
{
|
||||
let event_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source"))?;
|
||||
|
||||
// Key down
|
||||
let down_event = CGEvent::new_keyboard_event(
|
||||
event_source,
|
||||
keycode,
|
||||
true,
|
||||
).map_err(|_| anyhow::anyhow!("Failed to create key down event"))?;
|
||||
down_event.post(CGEventTapLocation::HID);
|
||||
} // event_source and down_event dropped here
|
||||
|
||||
tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
|
||||
|
||||
{
|
||||
let event_source = CGEventSource::new(CGEventSourceStateID::CombinedSessionState)
|
||||
.map_err(|_| anyhow::anyhow!("Failed to create event source"))?;
|
||||
|
||||
// Key up
|
||||
let up_event = CGEvent::new_keyboard_event(
|
||||
event_source,
|
||||
keycode,
|
||||
false,
|
||||
).map_err(|_| anyhow::anyhow!("Failed to create key up event"))?;
|
||||
up_event.post(CGEventTapLocation::HID);
|
||||
} // event_source and up_event dropped here
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_windows(&self) -> Result<Vec<Window>> {
|
||||
// Note: Full implementation would use CGWindowListCopyWindowInfo
|
||||
// For now, return empty list as this requires more complex FFI
|
||||
tracing::warn!("list_windows not fully implemented on macOS");
|
||||
Ok(vec![])
|
||||
}
|
||||
|
||||
async fn focus_window(&self, _window_id: &str) -> Result<()> {
|
||||
// Note: Full implementation would use NSWorkspace to activate application
|
||||
tracing::warn!("focus_window not fully implemented on macOS");
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn get_window_bounds(&self, _window_id: &str) -> Result<Rect> {
|
||||
// Note: Full implementation would use Accessibility API
|
||||
tracing::warn!("get_window_bounds not fully implemented on macOS");
|
||||
Ok(Rect { x: 0, y: 0, width: 800, height: 600 })
|
||||
}
|
||||
|
||||
async fn find_element(&self, _selector: &ElementSelector) -> Result<Option<UIElement>> {
|
||||
// Note: Full implementation would use macOS Accessibility API
|
||||
tracing::warn!("find_element not fully implemented on macOS");
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
async fn get_element_text(&self, _element_id: &str) -> Result<String> {
|
||||
// Note: Full implementation would use Accessibility API
|
||||
tracing::warn!("get_element_text not fully implemented on macOS");
|
||||
Ok(String::new())
|
||||
}
|
||||
|
||||
async fn get_element_bounds(&self, _element_id: &str) -> Result<Rect> {
|
||||
// Note: Full implementation would use Accessibility API
|
||||
tracing::warn!("get_element_bounds not fully implemented on macOS");
|
||||
Ok(Rect { x: 0, y: 0, width: 100, height: 30 })
|
||||
}
|
||||
|
||||
async fn take_screenshot(&self, path: &str, _region: Option<Rect>, window_id: Option<&str>) -> Result<()> {
|
||||
// Use native macOS screencapture command which handles all the format complexities
|
||||
|
||||
// Check if we have Screen Recording permission by attempting a test capture
|
||||
// If we only get wallpaper/menubar but no windows, we need permission
|
||||
let needs_permission_check = std::env::var("G3_SKIP_PERMISSION_CHECK").is_err();
|
||||
|
||||
if needs_permission_check {
|
||||
// Try to open Screen Recording settings if this is the first screenshot
|
||||
static PERMISSION_PROMPTED: std::sync::atomic::AtomicBool = std::sync::atomic::AtomicBool::new(false);
|
||||
|
||||
if !PERMISSION_PROMPTED.swap(true, std::sync::atomic::Ordering::Relaxed) {
|
||||
tracing::warn!("\n=== Screen Recording Permission Required ===\n\
|
||||
macOS requires explicit permission to capture window content.\n\
|
||||
If screenshots only show wallpaper/menubar (no windows):\n\n\
|
||||
1. Open System Settings > Privacy & Security > Screen Recording\n\
|
||||
2. Enable permission for your terminal (iTerm/Terminal) or g3\n\
|
||||
3. Restart your terminal if needed\n\n\
|
||||
Opening Screen Recording settings now...\n");
|
||||
|
||||
// Try to open the settings (non-blocking)
|
||||
let _ = std::process::Command::new("open")
|
||||
.arg("x-apple.systempreferences:com.apple.preference.security?Privacy_ScreenCapture")
|
||||
.spawn();
|
||||
}
|
||||
}
|
||||
|
||||
let path_obj = Path::new(path);
|
||||
if let Some(parent) = path_obj.parent() {
|
||||
std::fs::create_dir_all(parent)?;
|
||||
}
|
||||
|
||||
let mut cmd = std::process::Command::new("screencapture");
|
||||
|
||||
// Add flags
|
||||
cmd.arg("-x"); // No sound
|
||||
|
||||
if let Some(window_id) = window_id {
|
||||
// Capture specific window by getting its bounds and using region capture
|
||||
// window_id format: "AppName" or "AppName:WindowTitle"
|
||||
let app_name = window_id.split(':').next().unwrap_or(window_id);
|
||||
|
||||
// Use AppleScript to get window bounds
|
||||
let script = format!(
|
||||
r#"tell application "{}"
|
||||
tell current window
|
||||
get bounds
|
||||
end tell
|
||||
end tell"#,
|
||||
app_name
|
||||
);
|
||||
|
||||
let output = std::process::Command::new("osascript")
|
||||
.arg("-e")
|
||||
.arg(&script)
|
||||
.output()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to get window bounds: {}", e))?;
|
||||
|
||||
if output.status.success() {
|
||||
let bounds_str = String::from_utf8_lossy(&output.stdout);
|
||||
let bounds: Vec<i32> = bounds_str
|
||||
.trim()
|
||||
.split(',')
|
||||
.filter_map(|s| s.trim().parse().ok())
|
||||
.collect();
|
||||
|
||||
if bounds.len() == 4 {
|
||||
let (left, top, right, bottom) = (bounds[0], bounds[1], bounds[2], bounds[3]);
|
||||
let width = right - left;
|
||||
let height = bottom - top;
|
||||
|
||||
cmd.arg("-R");
|
||||
cmd.arg(format!("{},{},{},{}", left, top, width, height));
|
||||
|
||||
tracing::debug!("Capturing window '{}' at region: {},{} {}x{}", app_name, left, top, width, height);
|
||||
} else {
|
||||
tracing::warn!("Failed to parse window bounds, capturing full screen");
|
||||
}
|
||||
} else {
|
||||
tracing::warn!("Failed to get window bounds for '{}', capturing full screen", app_name);
|
||||
}
|
||||
} else if let Some(region) = _region {
|
||||
// Capture specific region: -R x,y,width,height
|
||||
cmd.arg("-R");
|
||||
cmd.arg(format!("{},{},{},{}", region.x, region.y, region.width, region.height));
|
||||
}
|
||||
|
||||
cmd.arg(path);
|
||||
|
||||
let output = cmd.output()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to execute screencapture: {}", e))?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
anyhow::bail!("screencapture failed: {}", stderr);
|
||||
}
|
||||
|
||||
tracing::debug!("Screenshot saved using screencapture: {}", path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
async fn extract_text_from_screen(&self, region: Rect) -> Result<OCRResult> {
|
||||
// Take screenshot of region first
|
||||
let temp_path = format!("/tmp/g3_ocr_{}.png", uuid::Uuid::new_v4());
|
||||
self.take_screenshot(&temp_path, Some(region), None).await?;
|
||||
|
||||
// Extract text from the screenshot
|
||||
let result = self.extract_text_from_image(&temp_path).await?;
|
||||
|
||||
// Clean up temp file
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn extract_text_from_image(&self, _path: &str) -> Result<OCRResult> {
|
||||
// Check if tesseract is available on the system
|
||||
let tesseract_check = std::process::Command::new("which")
|
||||
.arg("tesseract")
|
||||
.output();
|
||||
|
||||
if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
|
||||
anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
|
||||
To install tesseract:\n macOS: brew install tesseract\n \
|
||||
Linux: sudo apt-get install tesseract-ocr (Ubuntu/Debian)\n \
|
||||
sudo yum install tesseract (RHEL/CentOS)\n \
|
||||
Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki\n\n\
|
||||
After installation, restart your terminal and try again.");
|
||||
}
|
||||
|
||||
// Initialize Tesseract
|
||||
let tess = Tesseract::new(None, Some("eng"))
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!("Failed to initialize Tesseract: {}\n\n\
|
||||
This usually means:\n1. Tesseract is not properly installed\n\
|
||||
2. Language data files are missing\n\nTo fix:\n \
|
||||
macOS: brew reinstall tesseract\n \
|
||||
Linux: sudo apt-get install tesseract-ocr-eng\n \
|
||||
Windows: Reinstall tesseract and ensure language files are included", e)
|
||||
})?;
|
||||
|
||||
let text = tess.set_image(_path)
|
||||
.map_err(|e| anyhow::anyhow!("Failed to load image '{}': {}", _path, e))?
|
||||
.get_text()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to extract text from image: {}", e))?;
|
||||
|
||||
// Get confidence (simplified - would need more complex API calls for per-word confidence)
|
||||
let confidence = 0.85; // Placeholder
|
||||
|
||||
Ok(OCRResult {
|
||||
text,
|
||||
confidence,
|
||||
bounds: Rect { x: 0, y: 0, width: 0, height: 0 }, // Would need image dimensions
|
||||
})
|
||||
}
|
||||
|
||||
async fn find_text_on_screen(&self, _text: &str) -> Result<Option<Point>> {
|
||||
// Check if tesseract is available on the system
|
||||
let tesseract_check = std::process::Command::new("which")
|
||||
.arg("tesseract")
|
||||
.output();
|
||||
|
||||
if tesseract_check.is_err() || !tesseract_check.as_ref().unwrap().status.success() {
|
||||
anyhow::bail!("Tesseract OCR is not installed on your system.\n\n\
|
||||
To install tesseract:\n macOS: brew install tesseract\n \
|
||||
Linux: sudo apt-get install tesseract-ocr (Ubuntu/Debian)\n \
|
||||
sudo yum install tesseract (RHEL/CentOS)\n \
|
||||
Windows: Download from https://github.com/UB-Mannheim/tesseract/wiki\n\n\
|
||||
After installation, restart your terminal and try again.");
|
||||
}
|
||||
|
||||
// Take full screen screenshot
|
||||
let temp_path = format!("/tmp/g3_ocr_search_{}.png", uuid::Uuid::new_v4());
|
||||
self.take_screenshot(&temp_path, None, None).await?;
|
||||
|
||||
// Use Tesseract to find text with bounding boxes
|
||||
let tess = Tesseract::new(None, Some("eng"))
|
||||
.map_err(|e| {
|
||||
anyhow::anyhow!("Failed to initialize Tesseract: {}\n\n\
|
||||
This usually means:\n1. Tesseract is not properly installed\n\
|
||||
2. Language data files are missing\n\nTo fix:\n \
|
||||
macOS: brew reinstall tesseract\n \
|
||||
Linux: sudo apt-get install tesseract-ocr-eng\n \
|
||||
Windows: Reinstall tesseract and ensure language files are included", e)
|
||||
})?;
|
||||
|
||||
let full_text = tess.set_image(temp_path.as_str())
|
||||
.map_err(|e| anyhow::anyhow!("Failed to load screenshot: {}", e))?
|
||||
.get_text()
|
||||
.map_err(|e| anyhow::anyhow!("Failed to extract text from screen: {}", e))?;
|
||||
|
||||
// Clean up temp file
|
||||
let _ = std::fs::remove_file(&temp_path);
|
||||
|
||||
// Simple text search - full implementation would use get_component_images
|
||||
// to get bounding boxes for each word
|
||||
if full_text.contains(_text) {
|
||||
tracing::warn!("Text found but precise coordinates not available in simplified implementation");
|
||||
Ok(Some(Point { x: 0, y: 0 }))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
#[cfg(test)]
|
||||
mod window_matching_tests {
|
||||
/// Test that window name matching handles spaces correctly
|
||||
///
|
||||
/// Issue: When a user requests a screenshot of "Goose Studio" but the actual
|
||||
/// application name is "GooseStudio" (no space), the fuzzy matching should
|
||||
/// still find the window.
|
||||
///
|
||||
/// The fix normalizes both names by removing spaces before comparing.
|
||||
#[test]
|
||||
fn test_space_normalization() {
|
||||
let test_cases = vec![
|
||||
// (user_input, actual_app_name, should_match)
|
||||
("Goose Studio", "GooseStudio", true),
|
||||
("GooseStudio", "Goose Studio", true),
|
||||
("Visual Studio Code", "VisualStudioCode", true),
|
||||
("Google Chrome", "Google Chrome", true),
|
||||
("Safari", "Safari", true),
|
||||
("iTerm", "iTerm2", true), // fuzzy match
|
||||
("Code", "Visual Studio Code", true), // fuzzy match
|
||||
];
|
||||
|
||||
for (user_input, app_name, should_match) in test_cases {
|
||||
let user_lower = user_input.to_lowercase();
|
||||
let app_lower = app_name.to_lowercase();
|
||||
|
||||
let user_normalized = user_lower.replace(" ", "");
|
||||
let app_normalized = app_lower.replace(" ", "");
|
||||
|
||||
let is_exact = app_lower == user_lower || app_normalized == user_normalized;
|
||||
let is_fuzzy = app_lower.contains(&user_lower)
|
||||
|| user_lower.contains(&app_lower)
|
||||
|| app_normalized.contains(&user_normalized)
|
||||
|| user_normalized.contains(&app_normalized);
|
||||
|
||||
let matches = is_exact || is_fuzzy;
|
||||
|
||||
assert_eq!(
|
||||
matches, should_match,
|
||||
"Expected '{}' vs '{}' to match={}, but got match={}",
|
||||
user_input, app_name, should_match, matches
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,10 +62,15 @@ impl ComputerController for WindowsController {
|
||||
}
|
||||
|
||||
async fn take_screenshot(&self, _path: &str, _region: Option<Rect>, _window_id: Option<&str>) -> Result<()> {
|
||||
// Enforce that window_id must be provided
|
||||
if _window_id.is_none() {
|
||||
anyhow::bail!("window_id is required. You must specify which window to capture (e.g., 'Chrome', 'Terminal', 'Notepad'). Use list_windows to see available windows.");
|
||||
}
|
||||
|
||||
anyhow::bail!("Windows implementation not yet available")
|
||||
}
|
||||
|
||||
async fn extract_text_from_screen(&self, _region: Rect) -> Result<OCRResult> {
|
||||
async fn extract_text_from_screen(&self, _region: Rect, _window_id: &str) -> Result<String> {
|
||||
anyhow::bail!("Windows implementation not yet available")
|
||||
}
|
||||
|
||||
|
||||
@@ -7,3 +7,13 @@ pub struct Rect {
|
||||
pub width: i32,
|
||||
pub height: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct TextLocation {
|
||||
pub text: String,
|
||||
pub x: i32,
|
||||
pub y: i32,
|
||||
pub width: i32,
|
||||
pub height: i32,
|
||||
pub confidence: f32,
|
||||
}
|
||||
|
||||
@@ -1,23 +1,5 @@
|
||||
use g3_computer_control::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mouse_movement() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Move mouse to center of screen (assuming 1920x1080)
|
||||
let result = controller.move_mouse(960, 540).await;
|
||||
assert!(result.is_ok(), "Failed to move mouse: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_typing() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Type some text
|
||||
let result = controller.type_text("Hello, World!").await;
|
||||
assert!(result.is_ok(), "Failed to type text: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_screenshot() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
@@ -33,30 +15,3 @@ async fn test_screenshot() {
|
||||
// Clean up
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_click() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Click at a safe location
|
||||
let result = controller.click(types::MouseButton::Left).await;
|
||||
assert!(result.is_ok(), "Failed to click: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_double_click() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Double click
|
||||
let result = controller.double_click(types::MouseButton::Left).await;
|
||||
assert!(result.is_ok(), "Failed to double click: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_press_key() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Press escape key
|
||||
let result = controller.press_key("escape").await;
|
||||
assert!(result.is_ok(), "Failed to press key: {:?}", result.err());
|
||||
}
|
||||
|
||||
24
crates/g3-computer-control/vision-bridge/Package.swift
Normal file
24
crates/g3-computer-control/vision-bridge/Package.swift
Normal file
@@ -0,0 +1,24 @@
|
||||
// swift-tools-version:5.9
|
||||
import PackageDescription
|
||||
|
||||
let package = Package(
|
||||
name: "VisionBridge",
|
||||
platforms: [
|
||||
.macOS(.v11)
|
||||
],
|
||||
products: [
|
||||
.library(
|
||||
name: "VisionBridge",
|
||||
type: .dynamic,
|
||||
targets: ["VisionBridge"]
|
||||
),
|
||||
],
|
||||
targets: [
|
||||
.target(
|
||||
name: "VisionBridge",
|
||||
dependencies: [],
|
||||
path: "Sources/VisionBridge",
|
||||
publicHeadersPath: "."
|
||||
),
|
||||
]
|
||||
)
|
||||
@@ -0,0 +1,39 @@
|
||||
#ifndef VisionBridge_h
|
||||
#define VisionBridge_h
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Text box structure for FFI
|
||||
typedef struct {
|
||||
const char* text;
|
||||
uint32_t text_len;
|
||||
int32_t x;
|
||||
int32_t y;
|
||||
int32_t width;
|
||||
int32_t height;
|
||||
float confidence;
|
||||
} VisionTextBox;
|
||||
|
||||
// Recognize text in an image and return bounding boxes
|
||||
// Returns true on success, false on failure
|
||||
// Caller must free the returned boxes using vision_free_boxes
|
||||
bool vision_recognize_text(
|
||||
const char* image_path,
|
||||
uint32_t image_path_len,
|
||||
VisionTextBox** out_boxes,
|
||||
uint32_t* out_count
|
||||
);
|
||||
|
||||
// Free memory allocated by vision_recognize_text
|
||||
void vision_free_boxes(VisionTextBox* boxes, uint32_t count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* VisionBridge_h */
|
||||
@@ -0,0 +1,145 @@
|
||||
import Foundation
|
||||
import Vision
|
||||
import AppKit
|
||||
import CoreGraphics
|
||||
|
||||
// MARK: - C Bridge Functions
|
||||
|
||||
@_cdecl("vision_recognize_text")
|
||||
public func vision_recognize_text(
|
||||
_ imagePath: UnsafePointer<CChar>,
|
||||
_ imagePathLen: UInt32,
|
||||
_ outBoxes: UnsafeMutablePointer<UnsafeMutableRawPointer?>,
|
||||
_ outCount: UnsafeMutablePointer<UInt32>
|
||||
) -> Bool {
|
||||
// Convert C string to Swift String
|
||||
guard let pathData = Data(bytes: imagePath, count: Int(imagePathLen)).withUnsafeBytes({
|
||||
String(bytes: $0, encoding: .utf8)
|
||||
}) else {
|
||||
return false
|
||||
}
|
||||
|
||||
let path = pathData.trimmingCharacters(in: .whitespaces)
|
||||
|
||||
// Load image
|
||||
guard let image = NSImage(contentsOfFile: path),
|
||||
let cgImage = image.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
|
||||
return false
|
||||
}
|
||||
|
||||
// Perform OCR
|
||||
var textBoxes: [CTextBox] = []
|
||||
let semaphore = DispatchSemaphore(value: 0)
|
||||
var success = false
|
||||
|
||||
let request = VNRecognizeTextRequest { request, error in
|
||||
defer { semaphore.signal() }
|
||||
|
||||
if let error = error {
|
||||
print("Vision OCR error: \(error.localizedDescription)")
|
||||
return
|
||||
}
|
||||
|
||||
guard let observations = request.results as? [VNRecognizedTextObservation] else {
|
||||
return
|
||||
}
|
||||
|
||||
let imageSize = CGSize(width: cgImage.width, height: cgImage.height)
|
||||
|
||||
for observation in observations {
|
||||
guard let candidate = observation.topCandidates(1).first else { continue }
|
||||
|
||||
let text = candidate.string
|
||||
let boundingBox = observation.boundingBox
|
||||
|
||||
// Convert normalized coordinates (bottom-left origin) to pixel coordinates (top-left origin)
|
||||
let x = Int32(boundingBox.origin.x * imageSize.width)
|
||||
let y = Int32((1.0 - boundingBox.origin.y - boundingBox.height) * imageSize.height)
|
||||
let width = Int32(boundingBox.width * imageSize.width)
|
||||
let height = Int32(boundingBox.height * imageSize.height)
|
||||
|
||||
// Allocate C string for text
|
||||
let cString = strdup(text)
|
||||
|
||||
textBoxes.append(CTextBox(
|
||||
text: cString,
|
||||
text_len: UInt32(text.utf8.count),
|
||||
x: x,
|
||||
y: y,
|
||||
width: width,
|
||||
height: height,
|
||||
confidence: observation.confidence
|
||||
))
|
||||
}
|
||||
|
||||
success = true
|
||||
}
|
||||
|
||||
// Configure request for best accuracy
|
||||
request.recognitionLevel = .accurate
|
||||
request.usesLanguageCorrection = true
|
||||
request.recognitionLanguages = ["en-US"]
|
||||
|
||||
// Perform request
|
||||
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
|
||||
do {
|
||||
try handler.perform([request])
|
||||
} catch {
|
||||
print("Vision request failed: \(error.localizedDescription)")
|
||||
return false
|
||||
}
|
||||
|
||||
// Wait for completion
|
||||
semaphore.wait()
|
||||
|
||||
if !success {
|
||||
return false
|
||||
}
|
||||
|
||||
// Allocate array for results
|
||||
let boxesPtr = UnsafeMutablePointer<CTextBox>.allocate(capacity: textBoxes.count)
|
||||
for (index, box) in textBoxes.enumerated() {
|
||||
boxesPtr[index] = box
|
||||
}
|
||||
|
||||
outBoxes.pointee = UnsafeMutableRawPointer(boxesPtr)
|
||||
outCount.pointee = UInt32(textBoxes.count)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
@_cdecl("vision_free_boxes")
|
||||
public func vision_free_boxes(
|
||||
_ boxes: UnsafeMutableRawPointer,
|
||||
_ count: UInt32
|
||||
) {
|
||||
let typedBoxes = boxes.assumingMemoryBound(to: CTextBox.self)
|
||||
for i in 0..<Int(count) {
|
||||
if let text = typedBoxes[i].text {
|
||||
free(UnsafeMutableRawPointer(mutating: text))
|
||||
}
|
||||
}
|
||||
typedBoxes.deallocate()
|
||||
}
|
||||
|
||||
// MARK: - C-Compatible Structure
|
||||
|
||||
public struct CTextBox {
|
||||
public let text: UnsafePointer<CChar>?
|
||||
public let text_len: UInt32
|
||||
public let x: Int32
|
||||
public let y: Int32
|
||||
public let width: Int32
|
||||
public let height: Int32
|
||||
public let confidence: Float
|
||||
|
||||
public init(text: UnsafePointer<CChar>?, text_len: UInt32, x: Int32, y: Int32, width: Int32, height: Int32, confidence: Float) {
|
||||
self.text = text
|
||||
self.text_len = text_len
|
||||
self.x = x
|
||||
self.y = y
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.confidence = confidence
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,7 @@ pub struct Config {
|
||||
pub agent: AgentConfig,
|
||||
pub computer_control: ComputerControlConfig,
|
||||
pub webdriver: WebDriverConfig,
|
||||
pub macax: MacAxConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -79,6 +80,19 @@ pub struct WebDriverConfig {
|
||||
pub safari_port: u16,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MacAxConfig {
|
||||
pub enabled: bool,
|
||||
}
|
||||
|
||||
impl Default for MacAxConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for WebDriverConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
@@ -124,6 +138,7 @@ impl Default for Config {
|
||||
},
|
||||
computer_control: ComputerControlConfig::default(),
|
||||
webdriver: WebDriverConfig::default(),
|
||||
macax: MacAxConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -238,6 +253,7 @@ impl Config {
|
||||
},
|
||||
computer_control: ComputerControlConfig::default(),
|
||||
webdriver: WebDriverConfig::default(),
|
||||
macax: MacAxConfig::default(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -156,15 +156,15 @@ pub fn fixed_filter_json_tool_calls(content: &str) -> String {
|
||||
}
|
||||
|
||||
// No JSON tool call detected, return only the new content we haven't returned yet
|
||||
let new_content = if state.buffer.len() > state.content_returned_up_to {
|
||||
|
||||
|
||||
if state.buffer.len() > state.content_returned_up_to {
|
||||
let result = state.buffer[state.content_returned_up_to..].to_string();
|
||||
state.content_returned_up_to = state.buffer.len();
|
||||
result
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
|
||||
new_content
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -104,6 +104,7 @@ impl Project {
|
||||
}
|
||||
|
||||
/// Recursively check a directory for implementation files
|
||||
#[allow(clippy::only_used_in_recursion)]
|
||||
fn check_dir_for_implementation_files(&self, dir: &Path) -> bool {
|
||||
// Common source file extensions
|
||||
let extensions = vec![
|
||||
|
||||
37
crates/g3-core/src/take_screenshot_test.rs
Normal file
37
crates/g3-core/src/take_screenshot_test.rs
Normal file
@@ -0,0 +1,37 @@
|
||||
// Test to verify take_screenshot requires window_id
|
||||
|
||||
#[cfg(test)]
|
||||
mod take_screenshot_tests {
|
||||
use super::*;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
fn test_take_screenshot_requires_window_id() {
|
||||
// Create a tool call without window_id
|
||||
let tool_call = ToolCall {
|
||||
tool: "take_screenshot".to_string(),
|
||||
args: json!({
|
||||
"path": "test.png"
|
||||
}),
|
||||
};
|
||||
|
||||
// Verify that window_id is missing
|
||||
assert!(tool_call.args.get("window_id").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_take_screenshot_with_window_id() {
|
||||
// Create a tool call with window_id
|
||||
let tool_call = ToolCall {
|
||||
tool: "take_screenshot".to_string(),
|
||||
args: json!({
|
||||
"path": "test.png",
|
||||
"window_id": "Safari"
|
||||
}),
|
||||
};
|
||||
|
||||
// Verify that window_id is present
|
||||
assert!(tool_call.args.get("window_id").is_some());
|
||||
assert_eq!(tool_call.args.get("window_id").unwrap().as_str().unwrap(), "Safari");
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,9 @@ pub trait UiWriter: Send + Sync {
|
||||
/// Print a context window status message
|
||||
fn print_context_status(&self, message: &str);
|
||||
|
||||
/// Print a context thinning success message with highlight and animation
|
||||
fn print_context_thinning(&self, message: &str);
|
||||
|
||||
/// Print a tool execution header
|
||||
fn print_tool_header(&self, tool_name: &str);
|
||||
|
||||
@@ -49,6 +52,10 @@ pub trait UiWriter: Send + Sync {
|
||||
|
||||
/// Flush any buffered output
|
||||
fn flush(&self);
|
||||
|
||||
/// Returns true if this UI writer wants full, untruncated output
|
||||
/// Default is false (truncate for human readability)
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
}
|
||||
|
||||
/// A no-op implementation for when UI output is not needed
|
||||
@@ -60,6 +67,7 @@ impl UiWriter for NullUiWriter {
|
||||
fn print_inline(&self, _message: &str) {}
|
||||
fn print_system_prompt(&self, _prompt: &str) {}
|
||||
fn print_context_status(&self, _message: &str) {}
|
||||
fn print_context_thinning(&self, _message: &str) {}
|
||||
fn print_tool_header(&self, _tool_name: &str) {}
|
||||
fn print_tool_arg(&self, _key: &str, _value: &str) {}
|
||||
fn print_tool_output_header(&self) {}
|
||||
@@ -71,4 +79,5 @@ impl UiWriter for NullUiWriter {
|
||||
fn print_agent_response(&self, _content: &str) {}
|
||||
fn notify_sse_received(&self) {}
|
||||
fn flush(&self) {}
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
}
|
||||
@@ -72,7 +72,7 @@ fn test_thin_context_basic() {
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let summary = context.thin_context();
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
@@ -93,6 +93,119 @@ fn test_thin_context_basic() {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thin_write_file_tool_calls() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
// Add some messages including a write_file tool call with large content
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Please create a large file".to_string(),
|
||||
});
|
||||
|
||||
// Add an assistant message with a write_file tool call containing large content
|
||||
let large_content = "x".repeat(1500);
|
||||
let tool_call_json = format!(
|
||||
r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
|
||||
large_content
|
||||
);
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("I'll create that file.\n\n{}", tool_call_json),
|
||||
});
|
||||
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
|
||||
});
|
||||
|
||||
// Add more messages to ensure we have enough for "first third" logic
|
||||
for i in 0..6 {
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("Response {}", i),
|
||||
});
|
||||
}
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
// Should have thinned the write_file tool call
|
||||
assert!(summary.contains("tool call") || summary.contains("chars saved"));
|
||||
|
||||
// Check that the large content was replaced with a file reference
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
if let Some(msg) = context.conversation_history.get(i) {
|
||||
if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("write_file") {
|
||||
// The content should now reference an external file
|
||||
assert!(msg.content.contains("<content saved to"));
|
||||
assert!(!msg.content.contains(&large_content));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thin_str_replace_tool_calls() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
// Add some messages including a str_replace tool call with large diff
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Please update the file".to_string(),
|
||||
});
|
||||
|
||||
// Add an assistant message with a str_replace tool call containing large diff
|
||||
let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
|
||||
let tool_call_json = format!(
|
||||
r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
|
||||
large_diff.replace('\n', "\\n")
|
||||
);
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("I'll update that file.\n\n{}", tool_call_json),
|
||||
});
|
||||
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Tool result: ✅ applied unified diff".to_string(),
|
||||
});
|
||||
|
||||
// Add more messages to ensure we have enough for "first third" logic
|
||||
for i in 0..6 {
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("Response {}", i),
|
||||
});
|
||||
}
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
// Should have thinned the str_replace tool call
|
||||
assert!(summary.contains("tool call") || summary.contains("chars saved"));
|
||||
|
||||
// Check that the large diff was replaced with a file reference
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
if let Some(msg) = context.conversation_history.get(i) {
|
||||
if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("str_replace") {
|
||||
// The diff should now reference an external file
|
||||
assert!(msg.content.contains("<diff saved to"));
|
||||
// Should not contain the large diff content
|
||||
assert!(!msg.content.contains("old line"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thin_context_no_large_results() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
@@ -106,10 +219,10 @@ fn test_thin_context_no_large_results() {
|
||||
}
|
||||
|
||||
context.used_tokens = 5000;
|
||||
let summary = context.thin_context();
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
// Should report no large results found
|
||||
assert!(summary.contains("no large tool results found"));
|
||||
assert!(summary.contains("no large tool results or tool calls found"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -135,7 +248,7 @@ fn test_thin_context_only_affects_first_third() {
|
||||
}
|
||||
|
||||
context.used_tokens = 5000;
|
||||
let summary = context.thin_context();
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
// First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
|
||||
// That's 2 tool results
|
||||
|
||||
@@ -166,6 +166,31 @@ impl CodeExecutor {
|
||||
|
||||
/// Execute Bash code
|
||||
async fn execute_bash(&self, code: &str) -> Result<ExecutionResult> {
|
||||
// Check if this is a detached/daemon command that should run independently
|
||||
let is_detached = code.trim_start().starts_with("setsid ")
|
||||
|| code.trim_start().starts_with("nohup ")
|
||||
|| code.contains(" disown")
|
||||
|| (code.contains(" &") && (code.contains("nohup") || code.contains("setsid")));
|
||||
|
||||
if is_detached {
|
||||
// For detached commands, just spawn and return immediately
|
||||
use std::process::Stdio;
|
||||
Command::new("bash")
|
||||
.arg("-c")
|
||||
.arg(code)
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::null())
|
||||
.stderr(Stdio::null())
|
||||
.spawn()?;
|
||||
|
||||
return Ok(ExecutionResult {
|
||||
stdout: "✅ Command launched in background (detached process)".to_string(),
|
||||
stderr: String::new(),
|
||||
exit_code: 0,
|
||||
success: true,
|
||||
});
|
||||
}
|
||||
|
||||
let output = Command::new("bash")
|
||||
.arg("-c")
|
||||
.arg(code)
|
||||
@@ -221,6 +246,29 @@ impl CodeExecutor {
|
||||
use tokio::io::{AsyncBufReadExt, BufReader};
|
||||
use tokio::process::Command as TokioCommand;
|
||||
|
||||
// Check if this is a detached/daemon command that should run independently
|
||||
// Look for patterns like: setsid, nohup with &, or explicit backgrounding with disown
|
||||
let is_detached = code.trim_start().starts_with("setsid ")
|
||||
|| code.trim_start().starts_with("nohup ")
|
||||
|| code.contains(" disown")
|
||||
|| (code.contains(" &") && (code.contains("nohup") || code.contains("setsid")));
|
||||
|
||||
if is_detached {
|
||||
// For detached commands, just spawn and return immediately
|
||||
TokioCommand::new("bash")
|
||||
.arg("-c")
|
||||
.arg(code)
|
||||
.spawn()?;
|
||||
|
||||
// Don't wait for the process - it's meant to run independently
|
||||
return Ok(ExecutionResult {
|
||||
stdout: "✅ Command launched in background (detached process)".to_string(),
|
||||
stderr: String::new(),
|
||||
exit_code: 0,
|
||||
success: true,
|
||||
});
|
||||
}
|
||||
|
||||
let mut child = TokioCommand::new("bash")
|
||||
.arg("-c")
|
||||
.arg(code)
|
||||
@@ -259,7 +307,7 @@ impl CodeExecutor {
|
||||
line = stderr_lines.next_line() => {
|
||||
match line {
|
||||
Ok(Some(line)) => {
|
||||
receiver.on_output_line(&format!("{}", line));
|
||||
receiver.on_output_line(&line.to_string());
|
||||
stderr_output.push(line);
|
||||
}
|
||||
Ok(None) => {}, // stderr EOF, continue
|
||||
|
||||
@@ -213,7 +213,7 @@ impl DatabricksProvider {
|
||||
|
||||
let mut builder = self
|
||||
.client
|
||||
.post(&format!(
|
||||
.post(format!(
|
||||
"{}/serving-endpoints/{}/invocations",
|
||||
self.host, self.model
|
||||
))
|
||||
@@ -882,6 +882,14 @@ impl LLMProvider for DatabricksProvider {
|
||||
request.messages.len()
|
||||
);
|
||||
|
||||
// Debug: Log tool count
|
||||
if let Some(ref tools) = request.tools {
|
||||
debug!("Request has {} tools", tools.len());
|
||||
for tool in tools.iter().take(5) {
|
||||
debug!(" Tool: {}", tool.name);
|
||||
}
|
||||
}
|
||||
|
||||
let max_tokens = request.max_tokens.unwrap_or(self.max_tokens);
|
||||
let temperature = request.temperature.unwrap_or(self.temperature);
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ async fn get_workspace_endpoints(host: &str) -> Result<OidcEndpoints> {
|
||||
if !resp.status().is_success() {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Failed to get OIDC configuration from {}",
|
||||
oidc_url.to_string()
|
||||
oidc_url
|
||||
));
|
||||
}
|
||||
|
||||
|
||||
@@ -259,7 +259,7 @@ impl LLMProvider for OpenAIProvider {
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&format!("{}/chat/completions", self.base_url))
|
||||
.post(format!("{}/chat/completions", self.base_url))
|
||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||
.json(&body)
|
||||
.send()
|
||||
@@ -318,7 +318,7 @@ impl LLMProvider for OpenAIProvider {
|
||||
|
||||
let response = self
|
||||
.client
|
||||
.post(&format!("{}/chat/completions", self.base_url))
|
||||
.post(format!("{}/chat/completions", self.base_url))
|
||||
.header("Authorization", format!("Bearer {}", self.api_key))
|
||||
.json(&body)
|
||||
.send()
|
||||
|
||||
@@ -1,75 +0,0 @@
|
||||
# Coach-Player Provider Configuration
|
||||
|
||||
G3 now supports specifying different LLM providers for the coach and player agents when running in autonomous mode. This allows you to optimize for different requirements:
|
||||
|
||||
- **Player**: The agent that implements code - might benefit from a faster, more cost-effective model
|
||||
- **Coach**: The agent that reviews code - might benefit from a more powerful, analytical model
|
||||
|
||||
## Configuration
|
||||
|
||||
In your `config.toml` file, under the `[providers]` section, you can specify:
|
||||
|
||||
```toml
|
||||
[providers]
|
||||
default_provider = "databricks" # Used for normal operations
|
||||
coach = "databricks" # Provider for coach (code reviewer)
|
||||
player = "anthropic" # Provider for player (code implementer)
|
||||
```
|
||||
|
||||
If `coach` or `player` are not specified, they will default to using the `default_provider`.
|
||||
|
||||
## Example Use Cases
|
||||
|
||||
### Cost Optimization
|
||||
Use a cheaper, faster model for initial implementations (player) and a more powerful model for review (coach):
|
||||
|
||||
```toml
|
||||
coach = "anthropic" # Claude Sonnet for thorough review
|
||||
player = "anthropic" # Claude Haiku for quick implementation
|
||||
```
|
||||
|
||||
### Speed vs Quality Trade-off
|
||||
Use a local embedded model for fast iterations (player) and a cloud model for quality review (coach):
|
||||
|
||||
```toml
|
||||
coach = "databricks" # Cloud model for quality review
|
||||
player = "embedded" # Local model for fast implementation
|
||||
```
|
||||
|
||||
### Specialized Models
|
||||
Use different models optimized for different tasks:
|
||||
|
||||
```toml
|
||||
coach = "databricks" # Model fine-tuned for code review
|
||||
player = "openai" # Model optimized for code generation
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
- Both providers must be properly configured in your config file
|
||||
- Each provider must have valid credentials
|
||||
- The models specified for each provider must be accessible
|
||||
|
||||
## How It Works
|
||||
|
||||
When running in autonomous mode (`g3 --autonomous`), the system will:
|
||||
|
||||
1. Use the `player` provider (or default) for the initial implementation
|
||||
2. Switch to the `coach` provider (or default) for code review
|
||||
3. Return to the `player` provider for implementing feedback
|
||||
4. Continue this cycle for the specified number of turns
|
||||
|
||||
The providers are logged at startup so you can verify which models are being used:
|
||||
|
||||
```
|
||||
🎮 Player provider: anthropic
|
||||
👨🏫 Coach provider: databricks
|
||||
ℹ️ Using different providers for player and coach
|
||||
```
|
||||
|
||||
## Benefits
|
||||
|
||||
- **Cost Efficiency**: Use expensive models only where they add the most value
|
||||
- **Speed Optimization**: Use faster models for iterative development
|
||||
- **Specialization**: Leverage models that excel at specific tasks
|
||||
- **Flexibility**: Easy to experiment with different provider combinations
|
||||
39
test-ai-requirements.sh
Executable file
39
test-ai-requirements.sh
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/bin/bash
|
||||
# Test script for AI-enhanced interactive requirements mode
|
||||
|
||||
echo "Testing AI-enhanced interactive requirements mode..."
|
||||
echo ""
|
||||
|
||||
# Create a test workspace
|
||||
TEST_WORKSPACE="/tmp/g3-test-interactive-$(date +%s)"
|
||||
mkdir -p "$TEST_WORKSPACE"
|
||||
|
||||
echo "Test workspace: $TEST_WORKSPACE"
|
||||
echo ""
|
||||
|
||||
# Create sample brief input
|
||||
BRIEF_INPUT="build a calculator cli in rust with basic operations"
|
||||
|
||||
echo "Brief input:"
|
||||
echo "---"
|
||||
echo "$BRIEF_INPUT"
|
||||
echo "---"
|
||||
echo ""
|
||||
|
||||
echo "This will:"
|
||||
echo "1. Send brief input to AI"
|
||||
echo "2. AI generates structured requirements.md"
|
||||
echo "3. Show enhanced requirements"
|
||||
echo "4. Prompt for confirmation (y/e/n)"
|
||||
echo ""
|
||||
|
||||
echo "To test manually, run:"
|
||||
echo "cargo run -- --autonomous --interactive-requirements --workspace $TEST_WORKSPACE"
|
||||
echo ""
|
||||
echo "Then type: $BRIEF_INPUT"
|
||||
echo "Press Ctrl+D"
|
||||
echo "Review the AI-generated requirements"
|
||||
echo "Choose 'y' to proceed, 'e' to edit, or 'n' to cancel"
|
||||
echo ""
|
||||
|
||||
echo "Test workspace will be at: $TEST_WORKSPACE"
|
||||
Reference in New Issue
Block a user