From 28a83d2dcfb9bafe8bb4bde4c852e9521022abc6 Mon Sep 17 00:00:00 2001 From: Jochen Date: Fri, 21 Nov 2025 12:09:01 +1100 Subject: [PATCH] check for stale TODOs on by default, can be disabled --- Cargo.lock | 8 + crates/g3-cli/Cargo.toml | 2 + crates/g3-cli/src/lib.rs | 16 +- crates/g3-cli/src/machine_ui_writer.rs | 7 + crates/g3-cli/src/ui_writer_impl.rs | 13 ++ crates/g3-config/src/lib.rs | 8 + crates/g3-core/src/lib.rs | 46 +++++ crates/g3-core/src/prompts.rs | 10 +- crates/g3-core/src/ui_writer.rs | 4 + crates/g3-core/tests/todo_staleness_test.rs | 184 ++++++++++++++++++++ 10 files changed, 295 insertions(+), 3 deletions(-) create mode 100644 crates/g3-core/tests/todo_staleness_test.rs diff --git a/Cargo.lock b/Cargo.lock index acb650b..99ad254 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1365,11 +1365,13 @@ dependencies = [ "dirs 5.0.1", "g3-config", "g3-core", + "hex", "indicatif", "ratatui", "rustyline", "serde", "serde_json", + "sha2", "termimad", "tokio", "tokio-util", @@ -1653,6 +1655,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "home" version = "0.5.9" diff --git a/crates/g3-cli/Cargo.toml b/crates/g3-cli/Cargo.toml index 7c43308..9b2ba7c 100644 --- a/crates/g3-cli/Cargo.toml +++ b/crates/g3-cli/Cargo.toml @@ -17,6 +17,8 @@ serde_json = { workspace = true } rustyline = "17.0.1" dirs = "5.0" tokio-util = "0.7" +sha2 = "0.10" +hex = "0.4" indicatif = "0.17" chrono = { version = "0.4", features = ["serde"] } crossterm = "0.29.0" diff --git a/crates/g3-cli/src/lib.rs b/crates/g3-cli/src/lib.rs index 920d4a0..1565796 100644 --- a/crates/g3-cli/src/lib.rs +++ b/crates/g3-cli/src/lib.rs @@ -164,6 +164,7 @@ use rustyline::error::ReadlineError; use rustyline::DefaultEditor; use std::path::Path; use std::path::PathBuf; +use sha2::{Digest, Sha256}; use tokio_util::sync::CancellationToken; use tracing::{error, info}; @@ -1660,6 +1661,17 @@ async fn run_autonomous( } else { output.print("📋 Requirements loaded from requirements.md"); } + + // Calculate SHA256 of requirements + let mut hasher = Sha256::new(); + hasher.update(requirements.as_bytes()); + let requirements_sha = hex::encode(hasher.finalize()); + + output.print(&format!("🔒 Requirements SHA256: {}", requirements_sha)); + + // Pass SHA to agent for staleness checking + agent.set_requirements_sha(requirements_sha.clone()); + output.print("🔄 Starting coach-player feedback loop..."); // Check if implementation files already exist @@ -1692,8 +1704,8 @@ async fn run_autonomous( // Player mode: implement requirements (with coach feedback if available) let player_prompt = if coach_feedback.is_empty() { format!( - "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.", - requirements + "You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nRequirements SHA256: {}\n\nImplement this step by step, creating all necessary files and code.", + requirements, requirements_sha ) } else { format!( diff --git a/crates/g3-cli/src/machine_ui_writer.rs b/crates/g3-cli/src/machine_ui_writer.rs index 0d97292..5d236b4 100644 --- a/crates/g3-cli/src/machine_ui_writer.rs +++ b/crates/g3-cli/src/machine_ui_writer.rs @@ -91,4 +91,11 @@ impl UiWriter for MachineUiWriter { fn wants_full_output(&self) -> bool { true // Machine mode wants complete, untruncated output } + + fn prompt_user_yes_no(&self, message: &str) -> bool { + // In machine mode, we can't interactively prompt, so we log the request and return true + // to allow automation to proceed. + println!("PROMPT_USER_YES_NO: {}", message); + true + } } diff --git a/crates/g3-cli/src/ui_writer_impl.rs b/crates/g3-cli/src/ui_writer_impl.rs index 2f336fd..8dfa145 100644 --- a/crates/g3-cli/src/ui_writer_impl.rs +++ b/crates/g3-cli/src/ui_writer_impl.rs @@ -343,5 +343,18 @@ impl UiWriter for ConsoleUiWriter { fn flush(&self) { let _ = io::stdout().flush(); } + + fn prompt_user_yes_no(&self, message: &str) -> bool { + print!("{} [y/N] ", message); + let _ = io::stdout().flush(); + + let mut input = String::new(); + if io::stdin().read_line(&mut input).is_ok() { + let trimmed = input.trim().to_lowercase(); + trimmed == "y" || trimmed == "yes" + } else { + false + } + } } diff --git a/crates/g3-config/src/lib.rs b/crates/g3-config/src/lib.rs index e8f567f..0c034de 100644 --- a/crates/g3-config/src/lib.rs +++ b/crates/g3-config/src/lib.rs @@ -75,6 +75,12 @@ pub struct AgentConfig { pub auto_compact: bool, pub max_retry_attempts: u32, pub autonomous_max_retry_attempts: u32, + #[serde(default = "default_check_todo_staleness")] + pub check_todo_staleness: bool, +} + +fn default_check_todo_staleness() -> bool { + true } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -151,6 +157,7 @@ impl Default for Config { auto_compact: true, max_retry_attempts: 3, autonomous_max_retry_attempts: 6, + check_todo_staleness: true, }, computer_control: ComputerControlConfig::default(), webdriver: WebDriverConfig::default(), @@ -272,6 +279,7 @@ impl Config { auto_compact: true, max_retry_attempts: 3, autonomous_max_retry_attempts: 6, + check_todo_staleness: true, }, computer_control: ComputerControlConfig::default(), webdriver: WebDriverConfig::default(), diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs index 5af0fd9..37e3a56 100644 --- a/crates/g3-core/src/lib.rs +++ b/crates/g3-core/src/lib.rs @@ -759,6 +759,7 @@ pub struct Agent { macax_controller: std::sync::Arc>>, tool_call_count: usize, + requirements_sha: Option, } impl Agent { @@ -1030,6 +1031,7 @@ impl Agent { })) }, tool_call_count: 0, + requirements_sha: None, }) } @@ -1979,6 +1981,10 @@ impl Agent { &self.config } + pub fn set_requirements_sha(&mut self, sha: String) { + self.requirements_sha = Some(sha); + } + async fn stream_completion( &mut self, request: CompletionRequest, @@ -4250,6 +4256,46 @@ impl Agent { let mut todo = self.todo_content.write().await; *todo = content.clone(); + // Check for staleness if enabled and we have a requirements SHA + if self.config.agent.check_todo_staleness { + if let Some(req_sha) = &self.requirements_sha { + // Parse the first line for the SHA header + if let Some(first_line) = content.lines().next() { + if first_line.starts_with("{{Based on the requirements file with SHA256:") { + let parts: Vec<&str> = first_line.split("SHA256:").collect(); + if parts.len() > 1 { + let todo_sha = parts[1].trim().trim_end_matches("}}").trim(); + if todo_sha != req_sha { + let warning = format!( + "⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}", + req_sha, todo_sha + ); + self.ui_writer.print_context_status(&warning); + + // Beep 6 times + print!("\x07\x07\x07\x07\x07\x07"); + let _ = std::io::stdout().flush(); + + if !self.ui_writer.prompt_user_yes_no("Requirements have changed! Continue?") { + return Ok("❌ User aborted due to stale TODO list.".to_string()); + } + } + } + } else { + // Header missing, but we have a SHA. Warn the user? + // For now, maybe just proceed or warn. + // Let's just warn but not block unless strictly required. + // Or maybe we should treat missing header as mismatch? + // The plan said: "If the SHA256 doesn't match..." + // Missing header implies it doesn't match. + // But existing TODOs might not have it. + // Let's be safe and only warn if we see a DIFFERENT SHA. + // If no header, it might be an old TODO or manual one. + } + } + } + } + if content.trim().is_empty() { Ok("📝 TODO list is empty".to_string()) } else { diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs index 3acabf0..ddf8c98 100644 --- a/crates/g3-core/src/prompts.rs +++ b/crates/g3-core/src/prompts.rs @@ -71,9 +71,13 @@ Every multi-step task follows this pattern: 1. **Start**: Call todo_read, then todo_write to create your plan 2. **During**: Execute steps, then todo_read and todo_write to mark progress 3. **End**: Call todo_read to verify all items complete - + Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory. +IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format: +`{{Based on the requirements file with SHA256: }}` +This ensures the TODO list is tracked against the specific version of requirements it was generated from. + ## Examples **Example 1: Feature Implementation** @@ -303,6 +307,10 @@ Every multi-step task follows this pattern: Note: todo_write replaces the entire list, so always read first to preserve content. +IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format: +`{{Based on the requirements file with SHA256: }}` +This ensures the TODO list is tracked against the specific version of requirements it was generated from. + ## Examples **Example 1: Feature Implementation** diff --git a/crates/g3-core/src/ui_writer.rs b/crates/g3-core/src/ui_writer.rs index 49e29b9..8fc9959 100644 --- a/crates/g3-core/src/ui_writer.rs +++ b/crates/g3-core/src/ui_writer.rs @@ -56,6 +56,9 @@ pub trait UiWriter: Send + Sync { /// Returns true if this UI writer wants full, untruncated output /// Default is false (truncate for human readability) fn wants_full_output(&self) -> bool { false } + + /// Prompt the user for a yes/no confirmation + fn prompt_user_yes_no(&self, message: &str) -> bool; } /// A no-op implementation for when UI output is not needed @@ -80,4 +83,5 @@ impl UiWriter for NullUiWriter { fn notify_sse_received(&self) {} fn flush(&self) {} fn wants_full_output(&self) -> bool { false } + fn prompt_user_yes_no(&self, _message: &str) -> bool { true } } \ No newline at end of file diff --git a/crates/g3-core/tests/todo_staleness_test.rs b/crates/g3-core/tests/todo_staleness_test.rs new file mode 100644 index 0000000..ae2244d --- /dev/null +++ b/crates/g3-core/tests/todo_staleness_test.rs @@ -0,0 +1,184 @@ +use g3_core::{Agent, ToolCall}; +use g3_core::ui_writer::UiWriter; +use g3_config::Config; +use std::sync::{Arc, Mutex}; +use tempfile::TempDir; +use serial_test::serial; + +// Mock UI Writer for testing +#[derive(Clone)] +struct MockUiWriter { + output: Arc>>, + prompt_responses: Arc>>, +} + +impl MockUiWriter { + fn new() -> Self { + Self { + output: Arc::new(Mutex::new(Vec::new())), + prompt_responses: Arc::new(Mutex::new(Vec::new())), + } + } + + fn set_prompt_response(&self, response: bool) { + self.prompt_responses.lock().unwrap().push(response); + } + + fn get_output(&self) -> Vec { + self.output.lock().unwrap().clone() + } +} + +impl UiWriter for MockUiWriter { + fn print(&self, message: &str) { + self.output.lock().unwrap().push(message.to_string()); + } + fn println(&self, message: &str) { + self.output.lock().unwrap().push(message.to_string()); + } + fn print_inline(&self, message: &str) { + self.output.lock().unwrap().push(message.to_string()); + } + fn print_system_prompt(&self, _prompt: &str) {} + fn print_context_status(&self, message: &str) { + self.output.lock().unwrap().push(format!("STATUS: {}", message)); + } + fn print_context_thinning(&self, _message: &str) {} + fn print_tool_header(&self, _tool_name: &str) {} + fn print_tool_arg(&self, _key: &str, _value: &str) {} + fn print_tool_output_header(&self) {} + fn update_tool_output_line(&self, _line: &str) {} + fn print_tool_output_line(&self, _line: &str) {} + fn print_tool_output_summary(&self, _hidden_count: usize) {} + fn print_tool_timing(&self, _duration_str: &str) {} + fn print_agent_prompt(&self) {} + fn print_agent_response(&self, _content: &str) {} + fn notify_sse_received(&self) {} + fn flush(&self) {} + fn wants_full_output(&self) -> bool { false } + fn prompt_user_yes_no(&self, message: &str) -> bool { + self.output.lock().unwrap().push(format!("PROMPT: {}", message)); + self.prompt_responses.lock().unwrap().pop().unwrap_or(true) + } +} + +#[tokio::test] +#[serial] +async fn test_todo_staleness_check_matching_sha() { + let temp_dir = TempDir::new().unwrap(); + let todo_path = temp_dir.path().join("todo.g3.md"); + std::env::set_current_dir(&temp_dir).unwrap(); + + let sha = "abc123hash"; + let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha); + std::fs::write(&todo_path, content).unwrap(); + + let mut config = Config::default(); + config.agent.check_todo_staleness = true; + + let ui_writer = MockUiWriter::new(); + let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); + agent.set_requirements_sha(sha.to_string()); + + let tool_call = ToolCall { + tool: "todo_read".to_string(), + args: serde_json::json!({}), + }; + let result = agent.execute_tool(&tool_call).await.unwrap(); + + assert!(result.contains("📝 TODO list:")); + assert!(!result.contains("⚠️ TODO list is stale")); +} + +#[tokio::test] +#[serial] +async fn test_todo_staleness_check_mismatch_sha_abort() { + let temp_dir = TempDir::new().unwrap(); + let todo_path = temp_dir.path().join("todo.g3.md"); + std::env::set_current_dir(&temp_dir).unwrap(); + + let sha_file = "old_sha"; + let sha_req = "new_sha"; + let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file); + std::fs::write(&todo_path, content).unwrap(); + + let mut config = Config::default(); + config.agent.check_todo_staleness = true; + + let ui_writer = MockUiWriter::new(); + ui_writer.set_prompt_response(false); // Abort + + let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); + agent.set_requirements_sha(sha_req.to_string()); + + let tool_call = ToolCall { + tool: "todo_read".to_string(), + args: serde_json::json!({}), + }; + let result = agent.execute_tool(&tool_call).await.unwrap(); + + assert!(result.contains("❌ User aborted due to stale TODO list.")); +} + +#[tokio::test] +#[serial] +async fn test_todo_staleness_check_mismatch_sha_continue() { + let temp_dir = TempDir::new().unwrap(); + let todo_path = temp_dir.path().join("todo.g3.md"); + std::env::set_current_dir(&temp_dir).unwrap(); + + let sha_file = "old_sha"; + let sha_req = "new_sha"; + let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file); + std::fs::write(&todo_path, content).unwrap(); + + let mut config = Config::default(); + config.agent.check_todo_staleness = true; + + let ui_writer = MockUiWriter::new(); + ui_writer.set_prompt_response(true); // Continue + let output_handle = ui_writer.clone(); // Clone to keep handle + + let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); + agent.set_requirements_sha(sha_req.to_string()); + + let tool_call = ToolCall { + tool: "todo_read".to_string(), + args: serde_json::json!({}), + }; + let result = agent.execute_tool(&tool_call).await.unwrap(); + + assert!(result.contains("📝 TODO list:")); + + let output = output_handle.get_output(); + let has_warning = output.iter().any(|s| s.contains("⚠️ TODO list is stale")); + assert!(has_warning, "Should have printed warning to UI"); +} + +#[tokio::test] +#[serial] +async fn test_todo_staleness_check_disabled() { + let temp_dir = TempDir::new().unwrap(); + let todo_path = temp_dir.path().join("todo.g3.md"); + std::env::set_current_dir(&temp_dir).unwrap(); + + let sha_file = "old_sha"; + let sha_req = "new_sha"; + let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file); + std::fs::write(&todo_path, content).unwrap(); + + let mut config = Config::default(); + config.agent.check_todo_staleness = false; + + let ui_writer = MockUiWriter::new(); + let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); + agent.set_requirements_sha(sha_req.to_string()); + + let tool_call = ToolCall { + tool: "todo_read".to_string(), + args: serde_json::json!({}), + }; + let result = agent.execute_tool(&tool_call).await.unwrap(); + + assert!(result.contains("📝 TODO list:")); +}