check for stale TODOs

on by default, can be disabled
This commit is contained in:
Jochen
2025-11-21 12:09:01 +11:00
parent 0ce905dc74
commit 28a83d2dcf
10 changed files with 295 additions and 3 deletions

View File

@@ -759,6 +759,7 @@ pub struct Agent<W: UiWriter> {
macax_controller:
std::sync::Arc<tokio::sync::RwLock<Option<g3_computer_control::MacAxController>>>,
tool_call_count: usize,
requirements_sha: Option<String>,
}
impl<W: UiWriter> Agent<W> {
@@ -1030,6 +1031,7 @@ impl<W: UiWriter> Agent<W> {
}))
},
tool_call_count: 0,
requirements_sha: None,
})
}
@@ -1979,6 +1981,10 @@ impl<W: UiWriter> Agent<W> {
&self.config
}
pub fn set_requirements_sha(&mut self, sha: String) {
self.requirements_sha = Some(sha);
}
async fn stream_completion(
&mut self,
request: CompletionRequest,
@@ -4250,6 +4256,46 @@ impl<W: UiWriter> Agent<W> {
let mut todo = self.todo_content.write().await;
*todo = content.clone();
// Check for staleness if enabled and we have a requirements SHA
if self.config.agent.check_todo_staleness {
if let Some(req_sha) = &self.requirements_sha {
// Parse the first line for the SHA header
if let Some(first_line) = content.lines().next() {
if first_line.starts_with("{{Based on the requirements file with SHA256:") {
let parts: Vec<&str> = first_line.split("SHA256:").collect();
if parts.len() > 1 {
let todo_sha = parts[1].trim().trim_end_matches("}}").trim();
if todo_sha != req_sha {
let warning = format!(
"⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}",
req_sha, todo_sha
);
self.ui_writer.print_context_status(&warning);
// Beep 6 times
print!("\x07\x07\x07\x07\x07\x07");
let _ = std::io::stdout().flush();
if !self.ui_writer.prompt_user_yes_no("Requirements have changed! Continue?") {
return Ok("❌ User aborted due to stale TODO list.".to_string());
}
}
}
} else {
// Header missing, but we have a SHA. Warn the user?
// For now, maybe just proceed or warn.
// Let's just warn but not block unless strictly required.
// Or maybe we should treat missing header as mismatch?
// The plan said: "If the SHA256 doesn't match..."
// Missing header implies it doesn't match.
// But existing TODOs might not have it.
// Let's be safe and only warn if we see a DIFFERENT SHA.
// If no header, it might be an old TODO or manual one.
}
}
}
}
if content.trim().is_empty() {
Ok("📝 TODO list is empty".to_string())
} else {

View File

@@ -71,9 +71,13 @@ Every multi-step task follows this pattern:
1. **Start**: Call todo_read, then todo_write to create your plan
2. **During**: Execute steps, then todo_read and todo_write to mark progress
3. **End**: Call todo_read to verify all items complete
Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory.
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
`{{Based on the requirements file with SHA256: <SHA>}}`
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
## Examples
**Example 1: Feature Implementation**
@@ -303,6 +307,10 @@ Every multi-step task follows this pattern:
Note: todo_write replaces the entire list, so always read first to preserve content.
IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
`{{Based on the requirements file with SHA256: <SHA>}}`
This ensures the TODO list is tracked against the specific version of requirements it was generated from.
## Examples
**Example 1: Feature Implementation**

View File

@@ -56,6 +56,9 @@ pub trait UiWriter: Send + Sync {
/// Returns true if this UI writer wants full, untruncated output
/// Default is false (truncate for human readability)
fn wants_full_output(&self) -> bool { false }
/// Prompt the user for a yes/no confirmation
fn prompt_user_yes_no(&self, message: &str) -> bool;
}
/// A no-op implementation for when UI output is not needed
@@ -80,4 +83,5 @@ impl UiWriter for NullUiWriter {
fn notify_sse_received(&self) {}
fn flush(&self) {}
fn wants_full_output(&self) -> bool { false }
fn prompt_user_yes_no(&self, _message: &str) -> bool { true }
}

View File

@@ -0,0 +1,184 @@
use g3_core::{Agent, ToolCall};
use g3_core::ui_writer::UiWriter;
use g3_config::Config;
use std::sync::{Arc, Mutex};
use tempfile::TempDir;
use serial_test::serial;
// Mock UI Writer for testing
#[derive(Clone)]
struct MockUiWriter {
output: Arc<Mutex<Vec<String>>>,
prompt_responses: Arc<Mutex<Vec<bool>>>,
}
impl MockUiWriter {
fn new() -> Self {
Self {
output: Arc::new(Mutex::new(Vec::new())),
prompt_responses: Arc::new(Mutex::new(Vec::new())),
}
}
fn set_prompt_response(&self, response: bool) {
self.prompt_responses.lock().unwrap().push(response);
}
fn get_output(&self) -> Vec<String> {
self.output.lock().unwrap().clone()
}
}
impl UiWriter for MockUiWriter {
fn print(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn println(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_inline(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_system_prompt(&self, _prompt: &str) {}
fn print_context_status(&self, message: &str) {
self.output.lock().unwrap().push(format!("STATUS: {}", message));
}
fn print_context_thinning(&self, _message: &str) {}
fn print_tool_header(&self, _tool_name: &str) {}
fn print_tool_arg(&self, _key: &str, _value: &str) {}
fn print_tool_output_header(&self) {}
fn update_tool_output_line(&self, _line: &str) {}
fn print_tool_output_line(&self, _line: &str) {}
fn print_tool_output_summary(&self, _hidden_count: usize) {}
fn print_tool_timing(&self, _duration_str: &str) {}
fn print_agent_prompt(&self) {}
fn print_agent_response(&self, _content: &str) {}
fn notify_sse_received(&self) {}
fn flush(&self) {}
fn wants_full_output(&self) -> bool { false }
fn prompt_user_yes_no(&self, message: &str) -> bool {
self.output.lock().unwrap().push(format!("PROMPT: {}", message));
self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
}
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_matching_sha() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha = "abc123hash";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
assert!(!result.contains("⚠️ TODO list is stale"));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_abort() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_prompt_response(false); // Abort
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("❌ User aborted due to stale TODO list."));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_continue() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_prompt_response(true); // Continue
let output_handle = ui_writer.clone(); // Clone to keep handle
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
let output = output_handle.get_output();
let has_warning = output.iter().any(|s| s.contains("⚠️ TODO list is stale"));
assert!(has_warning, "Should have printed warning to UI");
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_disabled() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!("{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", sha_file);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = false;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
}