check for stale TODOs
on by default, can be disabled
This commit is contained in:
@@ -17,6 +17,8 @@ serde_json = { workspace = true }
|
||||
rustyline = "17.0.1"
|
||||
dirs = "5.0"
|
||||
tokio-util = "0.7"
|
||||
sha2 = "0.10"
|
||||
hex = "0.4"
|
||||
indicatif = "0.17"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
crossterm = "0.29.0"
|
||||
|
||||
@@ -164,6 +164,7 @@ use rustyline::error::ReadlineError;
|
||||
use rustyline::DefaultEditor;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use sha2::{Digest, Sha256};
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tracing::{error, info};
|
||||
|
||||
@@ -1660,6 +1661,17 @@ async fn run_autonomous(
|
||||
} else {
|
||||
output.print("📋 Requirements loaded from requirements.md");
|
||||
}
|
||||
|
||||
// Calculate SHA256 of requirements
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(requirements.as_bytes());
|
||||
let requirements_sha = hex::encode(hasher.finalize());
|
||||
|
||||
output.print(&format!("🔒 Requirements SHA256: {}", requirements_sha));
|
||||
|
||||
// Pass SHA to agent for staleness checking
|
||||
agent.set_requirements_sha(requirements_sha.clone());
|
||||
|
||||
output.print("🔄 Starting coach-player feedback loop...");
|
||||
|
||||
// Check if implementation files already exist
|
||||
@@ -1692,8 +1704,8 @@ async fn run_autonomous(
|
||||
// Player mode: implement requirements (with coach feedback if available)
|
||||
let player_prompt = if coach_feedback.is_empty() {
|
||||
format!(
|
||||
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nImplement this step by step, creating all necessary files and code.",
|
||||
requirements
|
||||
"You are G3 in implementation mode. Read and implement the following requirements:\n\n{}\n\nRequirements SHA256: {}\n\nImplement this step by step, creating all necessary files and code.",
|
||||
requirements, requirements_sha
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
|
||||
@@ -91,4 +91,11 @@ impl UiWriter for MachineUiWriter {
|
||||
fn wants_full_output(&self) -> bool {
|
||||
true // Machine mode wants complete, untruncated output
|
||||
}
|
||||
|
||||
fn prompt_user_yes_no(&self, message: &str) -> bool {
|
||||
// In machine mode, we can't interactively prompt, so we log the request and return true
|
||||
// to allow automation to proceed.
|
||||
println!("PROMPT_USER_YES_NO: {}", message);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -343,5 +343,18 @@ impl UiWriter for ConsoleUiWriter {
|
||||
fn flush(&self) {
|
||||
let _ = io::stdout().flush();
|
||||
}
|
||||
|
||||
fn prompt_user_yes_no(&self, message: &str) -> bool {
|
||||
print!("{} [y/N] ", message);
|
||||
let _ = io::stdout().flush();
|
||||
|
||||
let mut input = String::new();
|
||||
if io::stdin().read_line(&mut input).is_ok() {
|
||||
let trimmed = input.trim().to_lowercase();
|
||||
trimmed == "y" || trimmed == "yes"
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -75,6 +75,12 @@ pub struct AgentConfig {
|
||||
pub auto_compact: bool,
|
||||
pub max_retry_attempts: u32,
|
||||
pub autonomous_max_retry_attempts: u32,
|
||||
#[serde(default = "default_check_todo_staleness")]
|
||||
pub check_todo_staleness: bool,
|
||||
}
|
||||
|
||||
fn default_check_todo_staleness() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
@@ -151,6 +157,7 @@ impl Default for Config {
|
||||
auto_compact: true,
|
||||
max_retry_attempts: 3,
|
||||
autonomous_max_retry_attempts: 6,
|
||||
check_todo_staleness: true,
|
||||
},
|
||||
computer_control: ComputerControlConfig::default(),
|
||||
webdriver: WebDriverConfig::default(),
|
||||
@@ -272,6 +279,7 @@ impl Config {
|
||||
auto_compact: true,
|
||||
max_retry_attempts: 3,
|
||||
autonomous_max_retry_attempts: 6,
|
||||
check_todo_staleness: true,
|
||||
},
|
||||
computer_control: ComputerControlConfig::default(),
|
||||
webdriver: WebDriverConfig::default(),
|
||||
|
||||
@@ -759,6 +759,7 @@ pub struct Agent<W: UiWriter> {
|
||||
macax_controller:
|
||||
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
|
||||
tool_call_count: usize,
|
||||
requirements_sha: Option<String>,
|
||||
}
|
||||
|
||||
impl<W: UiWriter> Agent<W> {
|
||||
@@ -1030,6 +1031,7 @@ impl<W: UiWriter> Agent<W> {
|
||||
}))
|
||||
},
|
||||
tool_call_count: 0,
|
||||
requirements_sha: None,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1979,6 +1981,10 @@ impl<W: UiWriter> Agent<W> {
|
||||
&self.config
|
||||
}
|
||||
|
||||
pub fn set_requirements_sha(&mut self, sha: String) {
|
||||
self.requirements_sha = Some(sha);
|
||||
}
|
||||
|
||||
async fn stream_completion(
|
||||
&mut self,
|
||||
request: CompletionRequest,
|
||||
@@ -4250,6 +4256,46 @@ impl<W: UiWriter> Agent<W> {
|
||||
let mut todo = self.todo_content.write().await;
|
||||
*todo = content.clone();
|
||||
|
||||
// Check for staleness if enabled and we have a requirements SHA
|
||||
if self.config.agent.check_todo_staleness {
|
||||
if let Some(req_sha) = &self.requirements_sha {
|
||||
// Parse the first line for the SHA header
|
||||
if let Some(first_line) = content.lines().next() {
|
||||
if first_line.starts_with("{{Based on the requirements file with SHA256:") {
|
||||
let parts: Vec<&str> = first_line.split("SHA256:").collect();
|
||||
if parts.len() > 1 {
|
||||
let todo_sha = parts[1].trim().trim_end_matches("}}").trim();
|
||||
if todo_sha != req_sha {
|
||||
let warning = format!(
|
||||
"⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}",
|
||||
req_sha, todo_sha
|
||||
);
|
||||
self.ui_writer.print_context_status(&warning);
|
||||
|
||||
// Beep 6 times
|
||||
print!("\x07\x07\x07\x07\x07\x07");
|
||||
let _ = std::io::stdout().flush();
|
||||
|
||||
if !self.ui_writer.prompt_user_yes_no("Requirements have changed! Continue?") {
|
||||
return Ok("❌ User aborted due to stale TODO list.".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Header missing, but we have a SHA. Warn the user?
|
||||
// For now, maybe just proceed or warn.
|
||||
// Let's just warn but not block unless strictly required.
|
||||
// Or maybe we should treat missing header as mismatch?
|
||||
// The plan said: "If the SHA256 doesn't match..."
|
||||
// Missing header implies it doesn't match.
|
||||
// But existing TODOs might not have it.
|
||||
// Let's be safe and only warn if we see a DIFFERENT SHA.
|
||||
// If no header, it might be an old TODO or manual one.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if content.trim().is_empty() {
|
||||
Ok("📝 TODO list is empty".to_string())
|
||||
} else {
|
||||
|
||||
@@ -71,9 +71,13 @@ Every multi-step task follows this pattern:
|
||||
1. **Start**: Call todo_read, then todo_write to create your plan
|
||||
2. **During**: Execute steps, then todo_read and todo_write to mark progress
|
||||
3. **End**: Call todo_read to verify all items complete
|
||||
|
||||
|
||||
Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory.
|
||||
|
||||
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
|
||||
`{{Based on the requirements file with SHA256: <SHA>}}`
|
||||
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
|
||||
|
||||
## Examples
|
||||
|
||||
**Example 1: Feature Implementation**
|
||||
@@ -303,6 +307,10 @@ Every multi-step task follows this pattern:
|
||||
|
||||
Note: todo_write replaces the entire list, so always read first to preserve content.
|
||||
|
||||
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
|
||||
`{{Based on the requirements file with SHA256: <SHA>}}`
|
||||
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
|
||||
|
||||
## Examples
|
||||
|
||||
**Example 1: Feature Implementation**
|
||||
|
||||
@@ -56,6 +56,9 @@ pub trait UiWriter: Send + Sync {
|
||||
/// Returns true if this UI writer wants full, untruncated output
|
||||
/// Default is false (truncate for human readability)
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
|
||||
/// Prompt the user for a yes/no confirmation
|
||||
fn prompt_user_yes_no(&self, message: &str) -> bool;
|
||||
}
|
||||
|
||||
/// A no-op implementation for when UI output is not needed
|
||||
@@ -80,4 +83,5 @@ impl UiWriter for NullUiWriter {
|
||||
fn notify_sse_received(&self) {}
|
||||
fn flush(&self) {}
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
fn prompt_user_yes_no(&self, _message: &str) -> bool { true }
|
||||
}
|
||||
184
crates/g3-core/tests/todo_staleness_test.rs
Normal file
184
crates/g3-core/tests/todo_staleness_test.rs
Normal file
@@ -0,0 +1,184 @@
|
||||
use g3_core::{Agent, ToolCall};
|
||||
use g3_core::ui_writer::UiWriter;
|
||||
use g3_config::Config;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tempfile::TempDir;
|
||||
use serial_test::serial;
|
||||
|
||||
// Mock UI Writer for testing
|
||||
#[derive(Clone)]
|
||||
struct MockUiWriter {
|
||||
output: Arc<Mutex<Vec<String>>>,
|
||||
prompt_responses: Arc<Mutex<Vec<bool>>>,
|
||||
}
|
||||
|
||||
impl MockUiWriter {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
output: Arc::new(Mutex::new(Vec::new())),
|
||||
prompt_responses: Arc::new(Mutex::new(Vec::new())),
|
||||
}
|
||||
}
|
||||
|
||||
fn set_prompt_response(&self, response: bool) {
|
||||
self.prompt_responses.lock().unwrap().push(response);
|
||||
}
|
||||
|
||||
fn get_output(&self) -> Vec<String> {
|
||||
self.output.lock().unwrap().clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl UiWriter for MockUiWriter {
|
||||
fn print(&self, message: &str) {
|
||||
self.output.lock().unwrap().push(message.to_string());
|
||||
}
|
||||
fn println(&self, message: &str) {
|
||||
self.output.lock().unwrap().push(message.to_string());
|
||||
}
|
||||
fn print_inline(&self, message: &str) {
|
||||
self.output.lock().unwrap().push(message.to_string());
|
||||
}
|
||||
fn print_system_prompt(&self, _prompt: &str) {}
|
||||
fn print_context_status(&self, message: &str) {
|
||||
self.output.lock().unwrap().push(format!("STATUS: {}", message));
|
||||
}
|
||||
fn print_context_thinning(&self, _message: &str) {}
|
||||
fn print_tool_header(&self, _tool_name: &str) {}
|
||||
fn print_tool_arg(&self, _key: &str, _value: &str) {}
|
||||
fn print_tool_output_header(&self) {}
|
||||
fn update_tool_output_line(&self, _line: &str) {}
|
||||
fn print_tool_output_line(&self, _line: &str) {}
|
||||
fn print_tool_output_summary(&self, _hidden_count: usize) {}
|
||||
fn print_tool_timing(&self, _duration_str: &str) {}
|
||||
fn print_agent_prompt(&self) {}
|
||||
fn print_agent_response(&self, _content: &str) {}
|
||||
fn notify_sse_received(&self) {}
|
||||
fn flush(&self) {}
|
||||
fn wants_full_output(&self) -> bool { false }
|
||||
fn prompt_user_yes_no(&self, message: &str) -> bool {
|
||||
self.output.lock().unwrap().push(format!("PROMPT: {}", message));
|
||||
self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn test_todo_staleness_check_matching_sha() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let todo_path = temp_dir.path().join("todo.g3.md");
|
||||
std::env::set_current_dir(&temp_dir).unwrap();
|
||||
|
||||
let sha = "abc123hash";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
config.agent.check_todo_staleness = true;
|
||||
|
||||
let ui_writer = MockUiWriter::new();
|
||||
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
|
||||
agent.set_requirements_sha(sha.to_string());
|
||||
|
||||
let tool_call = ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
assert!(result.contains("📝 TODO list:"));
|
||||
assert!(!result.contains("⚠️ TODO list is stale"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn test_todo_staleness_check_mismatch_sha_abort() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let todo_path = temp_dir.path().join("todo.g3.md");
|
||||
std::env::set_current_dir(&temp_dir).unwrap();
|
||||
|
||||
let sha_file = "old_sha";
|
||||
let sha_req = "new_sha";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
config.agent.check_todo_staleness = true;
|
||||
|
||||
let ui_writer = MockUiWriter::new();
|
||||
ui_writer.set_prompt_response(false); // Abort
|
||||
|
||||
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
|
||||
agent.set_requirements_sha(sha_req.to_string());
|
||||
|
||||
let tool_call = ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
assert!(result.contains("❌ User aborted due to stale TODO list."));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn test_todo_staleness_check_mismatch_sha_continue() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let todo_path = temp_dir.path().join("todo.g3.md");
|
||||
std::env::set_current_dir(&temp_dir).unwrap();
|
||||
|
||||
let sha_file = "old_sha";
|
||||
let sha_req = "new_sha";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
config.agent.check_todo_staleness = true;
|
||||
|
||||
let ui_writer = MockUiWriter::new();
|
||||
ui_writer.set_prompt_response(true); // Continue
|
||||
let output_handle = ui_writer.clone(); // Clone to keep handle
|
||||
|
||||
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
|
||||
agent.set_requirements_sha(sha_req.to_string());
|
||||
|
||||
let tool_call = ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
assert!(result.contains("📝 TODO list:"));
|
||||
|
||||
let output = output_handle.get_output();
|
||||
let has_warning = output.iter().any(|s| s.contains("⚠️ TODO list is stale"));
|
||||
assert!(has_warning, "Should have printed warning to UI");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
#[serial]
|
||||
async fn test_todo_staleness_check_disabled() {
|
||||
let temp_dir = TempDir::new().unwrap();
|
||||
let todo_path = temp_dir.path().join("todo.g3.md");
|
||||
std::env::set_current_dir(&temp_dir).unwrap();
|
||||
|
||||
let sha_file = "old_sha";
|
||||
let sha_req = "new_sha";
|
||||
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
|
||||
std::fs::write(&todo_path, content).unwrap();
|
||||
|
||||
let mut config = Config::default();
|
||||
config.agent.check_todo_staleness = false;
|
||||
|
||||
let ui_writer = MockUiWriter::new();
|
||||
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
|
||||
agent.set_requirements_sha(sha_req.to_string());
|
||||
|
||||
let tool_call = ToolCall {
|
||||
tool: "todo_read".to_string(),
|
||||
args: serde_json::json!({}),
|
||||
};
|
||||
let result = agent.execute_tool(&tool_call).await.unwrap();
|
||||
|
||||
assert!(result.contains("📝 TODO list:"));
|
||||
}
|
||||
Reference in New Issue
Block a user