coach rigor +++
This commit is contained in:
@@ -243,6 +243,10 @@ pub struct Cli {
|
||||
/// Enable macOS Accessibility API tools for native app automation
|
||||
#[arg(long)]
|
||||
pub macax: bool,
|
||||
|
||||
/// Enable WebDriver browser automation tools
|
||||
#[arg(long)]
|
||||
pub webdriver: bool,
|
||||
}
|
||||
|
||||
pub async fn run() -> Result<()> {
|
||||
@@ -451,6 +455,11 @@ Output ONLY the markdown content, no explanations or meta-commentary."#,
|
||||
}
|
||||
}
|
||||
|
||||
// Apply webdriver flag override
|
||||
if cli.webdriver {
|
||||
config.webdriver.enabled = true;
|
||||
}
|
||||
|
||||
// Validate provider if specified
|
||||
if let Some(ref provider) = cli.provider {
|
||||
let valid_providers = ["anthropic", "databricks", "embedded", "openai"];
|
||||
@@ -1630,6 +1639,7 @@ Review the current state of the project and provide a concise critique focusing
|
||||
2. Whether the project compiles successfully
|
||||
3. What requirements are missing or incorrect
|
||||
4. Specific improvements needed to satisfy requirements
|
||||
5. Use UI tools such as webdriver to test functionality thoroughly
|
||||
|
||||
CRITICAL INSTRUCTIONS:
|
||||
1. You MUST use the final_output tool to provide your feedback
|
||||
@@ -1637,13 +1647,13 @@ CRITICAL INSTRUCTIONS:
|
||||
3. Focus ONLY on what needs to be fixed or improved
|
||||
4. Do NOT include your analysis process, file contents, or compilation output in the summary
|
||||
|
||||
If the implementation generally meets all requirements and compiles without errors:
|
||||
If the implementation thoroughly meets all requirements, compiles and is fully tested (especially UI flows) *WITHOUT* gaps or errors:
|
||||
- Call final_output with summary: 'IMPLEMENTATION_APPROVED'
|
||||
|
||||
If improvements are needed:
|
||||
- Call final_output with a brief summary listing ONLY the specific issues to fix
|
||||
|
||||
Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and generally fits the requirements. Don't be picky.",
|
||||
Remember: Be clear in your review and concise in your feedback. APPROVE if the implementation works and thoroughly fits the requirements (implementation > 95% complete). Be rigorous, especially by testing that all UI features work.",
|
||||
requirements
|
||||
);
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use core_graphics::window::{kCGWindowListOptionOnScreenOnly, kCGNullWindowID, CGWindowListCopyWindowInfo};
|
||||
use core_foundation::dictionary::CFDictionary;
|
||||
use core_foundation::string::CFString;
|
||||
use core_foundation::base::TCFType;
|
||||
use core_foundation::base::{TCFType, ToVoid};
|
||||
|
||||
fn main() {
|
||||
println!("Listing all on-screen windows...");
|
||||
@@ -22,7 +22,7 @@ fn main() {
|
||||
|
||||
// Get window ID
|
||||
let window_id_key = CFString::from_static_string("kCGWindowNumber");
|
||||
let window_id: i64 = if let Some(value) = dict.find(window_id_key.as_concrete_TypeRef()) {
|
||||
let window_id: i64 = if let Some(value) = dict.find(window_id_key.to_void()) {
|
||||
let num: core_foundation::number::CFNumber = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
num.to_i64().unwrap_or(0)
|
||||
} else {
|
||||
@@ -31,7 +31,7 @@ fn main() {
|
||||
|
||||
// Get owner name
|
||||
let owner_key = CFString::from_static_string("kCGWindowOwnerName");
|
||||
let owner: String = if let Some(value) = dict.find(owner_key.as_concrete_TypeRef()) {
|
||||
let owner: String = if let Some(value) = dict.find(owner_key.to_void()) {
|
||||
let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
s.to_string()
|
||||
} else {
|
||||
@@ -40,7 +40,7 @@ fn main() {
|
||||
|
||||
// Get window name/title
|
||||
let name_key = CFString::from_static_string("kCGWindowName");
|
||||
let title: String = if let Some(value) = dict.find(name_key.as_concrete_TypeRef()) {
|
||||
let title: String = if let Some(value) = dict.find(name_key.to_void()) {
|
||||
let s: CFString = TCFType::wrap_under_get_rule(*value as *const _);
|
||||
s.to_string()
|
||||
} else {
|
||||
|
||||
@@ -1,23 +1,5 @@
|
||||
use g3_computer_control::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_mouse_movement() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Move mouse to center of screen (assuming 1920x1080)
|
||||
let result = controller.move_mouse(960, 540).await;
|
||||
assert!(result.is_ok(), "Failed to move mouse: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_typing() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Type some text
|
||||
let result = controller.type_text("Hello, World!").await;
|
||||
assert!(result.is_ok(), "Failed to type text: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_screenshot() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
@@ -33,30 +15,3 @@ async fn test_screenshot() {
|
||||
// Clean up
|
||||
let _ = std::fs::remove_file(path);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_click() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Click at a safe location
|
||||
let result = controller.click(types::MouseButton::Left).await;
|
||||
assert!(result.is_ok(), "Failed to click: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_double_click() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Double click
|
||||
let result = controller.double_click(types::MouseButton::Left).await;
|
||||
assert!(result.is_ok(), "Failed to double click: {:?}", result.err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_press_key() {
|
||||
let controller = create_controller().expect("Failed to create controller");
|
||||
|
||||
// Press escape key
|
||||
let result = controller.press_key("escape").await;
|
||||
assert!(result.is_ok(), "Failed to press key: {:?}", result.err());
|
||||
}
|
||||
|
||||
@@ -466,6 +466,7 @@ Format this as a detailed but concise summary that can be used to resume the con
|
||||
let first_third_end = (total_messages / 3).max(1);
|
||||
|
||||
let mut leaned_count = 0;
|
||||
let mut tool_call_leaned_count = 0;
|
||||
let mut chars_saved = 0;
|
||||
|
||||
// Create ~/tmp directory if it doesn't exist
|
||||
@@ -478,7 +479,7 @@ Format this as a detailed but concise summary that can be used to resume the con
|
||||
// Scan the first third of messages
|
||||
for i in 0..first_third_end {
|
||||
if let Some(message) = self.conversation_history.get_mut(i) {
|
||||
// Only process User messages that look like tool results
|
||||
// Process User messages that look like tool results
|
||||
if matches!(message.role, MessageRole::User) && message.content.starts_with("Tool result:") {
|
||||
let content_len = message.content.len();
|
||||
|
||||
@@ -508,6 +509,109 @@ Format this as a detailed but concise summary that can be used to resume the con
|
||||
debug!("Thinned tool result {} ({} chars) to {}", i, original_len, file_path);
|
||||
}
|
||||
}
|
||||
|
||||
// Process Assistant messages that contain tool calls with large arguments
|
||||
if matches!(message.role, MessageRole::Assistant) {
|
||||
// Try to parse the message content as JSON to find tool calls
|
||||
let content = &message.content;
|
||||
|
||||
// Look for JSON tool call patterns
|
||||
if let Some(tool_call_start) = content.find(r#"{"tool":"#)
|
||||
.or_else(|| content.find(r#"{ "tool":"#))
|
||||
.or_else(|| content.find(r#"{"tool" :"#))
|
||||
.or_else(|| content.find(r#"{ "tool" :"#))
|
||||
{
|
||||
// Try to extract and parse the JSON tool call
|
||||
let json_portion = &content[tool_call_start..];
|
||||
|
||||
// Find the end of the JSON object
|
||||
if let Some(json_end) = Self::find_json_end(json_portion) {
|
||||
let json_str = &json_portion[..=json_end];
|
||||
|
||||
// Try to parse as ToolCall
|
||||
if let Ok(mut tool_call) = serde_json::from_str::<ToolCall>(json_str) {
|
||||
let mut modified = false;
|
||||
|
||||
// Handle write_file tool calls
|
||||
if tool_call.tool == "write_file" {
|
||||
if let Some(args_obj) = tool_call.args.as_object_mut() {
|
||||
// Extract content to avoid borrow issues
|
||||
let content_info = args_obj.get("content")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| (s.to_string(), s.len()));
|
||||
|
||||
if let Some((content_str, content_len)) = content_info {
|
||||
// Only thin if content is greater than 1000 chars
|
||||
if content_len > 1000 {
|
||||
let timestamp = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
let filename = format!("leaned_write_file_content_{}_{}.txt", timestamp, i);
|
||||
let file_path = format!("{}/{}", tmp_dir, filename);
|
||||
|
||||
if std::fs::write(&file_path, &content_str).is_ok() {
|
||||
args_obj.insert(
|
||||
"content".to_string(),
|
||||
serde_json::Value::String(format!("<content saved to {}>", file_path))
|
||||
);
|
||||
modified = true;
|
||||
chars_saved += content_len;
|
||||
tool_call_leaned_count += 1;
|
||||
debug!("Thinned write_file content {} ({} chars) to {}", i, content_len, file_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Handle str_replace tool calls
|
||||
if tool_call.tool == "str_replace" {
|
||||
if let Some(args_obj) = tool_call.args.as_object_mut() {
|
||||
// Extract diff to avoid borrow issues
|
||||
let diff_info = args_obj.get("diff")
|
||||
.and_then(|v| v.as_str())
|
||||
.map(|s| (s.to_string(), s.len()));
|
||||
|
||||
if let Some((diff_str, diff_len)) = diff_info {
|
||||
// Only thin if diff is greater than 1000 chars
|
||||
if diff_len > 1000 {
|
||||
let timestamp = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap_or_default()
|
||||
.as_secs();
|
||||
let filename = format!("leaned_str_replace_diff_{}_{}.txt", timestamp, i);
|
||||
let file_path = format!("{}/{}", tmp_dir, filename);
|
||||
|
||||
if std::fs::write(&file_path, &diff_str).is_ok() {
|
||||
args_obj.insert(
|
||||
"diff".to_string(),
|
||||
serde_json::Value::String(format!("<diff saved to {}>", file_path))
|
||||
);
|
||||
modified = true;
|
||||
chars_saved += diff_len;
|
||||
tool_call_leaned_count += 1;
|
||||
debug!("Thinned str_replace diff {} ({} chars) to {}", i, diff_len, file_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we modified the tool call, reconstruct the message
|
||||
if modified {
|
||||
let prefix = &content[..tool_call_start];
|
||||
let suffix = &content[tool_call_start + json_str.len()..];
|
||||
|
||||
// Serialize the modified tool call
|
||||
if let Ok(new_json) = serde_json::to_string(&tool_call) {
|
||||
message.content = format!("{}{}{}", prefix, new_json, suffix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -515,10 +619,18 @@ Format this as a detailed but concise summary that can be used to resume the con
|
||||
self.recalculate_tokens();
|
||||
|
||||
if leaned_count > 0 {
|
||||
if tool_call_leaned_count > 0 {
|
||||
(format!("🥒 Context thinned at {}%: {} tool results + {} tool calls, ~{} chars saved",
|
||||
current_threshold, leaned_count, tool_call_leaned_count, chars_saved), chars_saved)
|
||||
} else {
|
||||
(format!("🥒 Context thinned at {}%: {} tool results, ~{} chars saved",
|
||||
current_threshold, leaned_count, chars_saved), chars_saved)
|
||||
}
|
||||
} else if tool_call_leaned_count > 0 {
|
||||
(format!("🥒 Context thinned at {}%: {} tool calls, ~{} chars saved",
|
||||
current_threshold, tool_call_leaned_count, chars_saved), chars_saved)
|
||||
} else {
|
||||
(format!("ℹ Context thinning triggered at {}% but no large tool results found in first third",
|
||||
(format!("ℹ Context thinning triggered at {}% but no large tool results or tool calls found in first third",
|
||||
current_threshold), 0)
|
||||
}
|
||||
}
|
||||
@@ -533,6 +645,35 @@ Format this as a detailed but concise summary that can be used to resume the con
|
||||
|
||||
debug!("Recalculated tokens after thinning: {} tokens", total);
|
||||
}
|
||||
|
||||
/// Helper function to find the end of a JSON object
|
||||
fn find_json_end(json_str: &str) -> Option<usize> {
|
||||
let mut brace_count = 0;
|
||||
let mut in_string = false;
|
||||
let mut escape_next = false;
|
||||
|
||||
for (i, ch) in json_str.char_indices() {
|
||||
if escape_next {
|
||||
escape_next = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
match ch {
|
||||
'\\' => escape_next = true,
|
||||
'"' if !escape_next => in_string = !in_string,
|
||||
'{' if !in_string => brace_count += 1,
|
||||
'}' if !in_string => {
|
||||
brace_count -= 1;
|
||||
if brace_count == 0 {
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Agent<W: UiWriter> {
|
||||
|
||||
@@ -72,7 +72,7 @@ fn test_thin_context_basic() {
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let summary = context.thin_context();
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
@@ -93,6 +93,119 @@ fn test_thin_context_basic() {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thin_write_file_tool_calls() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
// Add some messages including a write_file tool call with large content
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Please create a large file".to_string(),
|
||||
});
|
||||
|
||||
// Add an assistant message with a write_file tool call containing large content
|
||||
let large_content = "x".repeat(1500);
|
||||
let tool_call_json = format!(
|
||||
r#"{{"tool": "write_file", "args": {{"file_path": "test.txt", "content": "{}"}}}}"#,
|
||||
large_content
|
||||
);
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("I'll create that file.\n\n{}", tool_call_json),
|
||||
});
|
||||
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Tool result: ✅ Successfully wrote 1500 lines".to_string(),
|
||||
});
|
||||
|
||||
// Add more messages to ensure we have enough for "first third" logic
|
||||
for i in 0..6 {
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("Response {}", i),
|
||||
});
|
||||
}
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
// Should have thinned the write_file tool call
|
||||
assert!(summary.contains("tool call") || summary.contains("chars saved"));
|
||||
|
||||
// Check that the large content was replaced with a file reference
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
if let Some(msg) = context.conversation_history.get(i) {
|
||||
if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("write_file") {
|
||||
// The content should now reference an external file
|
||||
assert!(msg.content.contains("<content saved to"));
|
||||
assert!(!msg.content.contains(&large_content));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thin_str_replace_tool_calls() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
|
||||
// Add some messages including a str_replace tool call with large diff
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Please update the file".to_string(),
|
||||
});
|
||||
|
||||
// Add an assistant message with a str_replace tool call containing large diff
|
||||
let large_diff = format!("--- old\n{}\n+++ new\n{}", "-old line\n".repeat(100), "+new line\n".repeat(100));
|
||||
let tool_call_json = format!(
|
||||
r#"{{"tool": "str_replace", "args": {{"file_path": "test.txt", "diff": "{}"}}}}"#,
|
||||
large_diff.replace('\n', "\\n")
|
||||
);
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("I'll update that file.\n\n{}", tool_call_json),
|
||||
});
|
||||
|
||||
context.add_message(Message {
|
||||
role: MessageRole::User,
|
||||
content: "Tool result: ✅ applied unified diff".to_string(),
|
||||
});
|
||||
|
||||
// Add more messages to ensure we have enough for "first third" logic
|
||||
for i in 0..6 {
|
||||
context.add_message(Message {
|
||||
role: MessageRole::Assistant,
|
||||
content: format!("Response {}", i),
|
||||
});
|
||||
}
|
||||
|
||||
// Trigger thinning at 50%
|
||||
context.used_tokens = 5000;
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
println!("Thinning summary: {}", summary);
|
||||
|
||||
// Should have thinned the str_replace tool call
|
||||
assert!(summary.contains("tool call") || summary.contains("chars saved"));
|
||||
|
||||
// Check that the large diff was replaced with a file reference
|
||||
let first_third_end = context.conversation_history.len() / 3;
|
||||
for i in 0..first_third_end {
|
||||
if let Some(msg) = context.conversation_history.get(i) {
|
||||
if matches!(msg.role, MessageRole::Assistant) && msg.content.contains("str_replace") {
|
||||
// The diff should now reference an external file
|
||||
assert!(msg.content.contains("<diff saved to"));
|
||||
// Should not contain the large diff content
|
||||
assert!(!msg.content.contains("old line"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_thin_context_no_large_results() {
|
||||
let mut context = ContextWindow::new(10000);
|
||||
@@ -106,10 +219,10 @@ fn test_thin_context_no_large_results() {
|
||||
}
|
||||
|
||||
context.used_tokens = 5000;
|
||||
let summary = context.thin_context();
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
// Should report no large results found
|
||||
assert!(summary.contains("no large tool results found"));
|
||||
assert!(summary.contains("no large tool results or tool calls found"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -135,7 +248,7 @@ fn test_thin_context_only_affects_first_third() {
|
||||
}
|
||||
|
||||
context.used_tokens = 5000;
|
||||
let summary = context.thin_context();
|
||||
let (summary, _chars_saved) = context.thin_context();
|
||||
|
||||
// First third is 4 messages (indices 0-3), so only indices 1 and 3 should be thinned
|
||||
// That's 2 tool results
|
||||
|
||||
Reference in New Issue
Block a user