Add Plan Mode to replace TODO system

Plan Mode is a cognitive forcing system that requires reasoning about:
- Happy path
- Negative case
- Boundary condition

New tools:
- plan_read: Read current plan for session
- plan_write: Create/update plan with YAML content (validates structure)
- plan_approve: Mark current revision as approved

New command:
- /feature <description>: Start Plan Mode for a new feature

Plan schema requires:
- plan_id, revision, approved_revision
- items with id, description, state, touches, checks (happy/negative/boundary)
- evidence and notes required when marking items done

Verification:
- plan_verify() called automatically when all items are done/blocked

Removed:
- todo_read, todo_write tools
- todo.rs module and related tests
This commit is contained in:
Dhanji R. Prasanna
2026-02-02 14:38:25 +11:00
parent 7fc9eb0778
commit a63950d8f5
12 changed files with 997 additions and 942 deletions

View File

@@ -589,36 +589,56 @@ mod tool_execution_integration {
);
}
/// CHARACTERIZATION: TODO tools work through agent
/// CHARACTERIZATION: Plan tools work through agent
#[tokio::test]
#[serial]
async fn todo_tools_work() {
async fn plan_tools_work() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
// Write TODO
// Initialize session ID for plan tools (they are session-scoped)
agent.init_session_id_for_test("plan-tools-test");
// Write Plan
let write_call = ToolCall {
tool: "todo_write".to_string(),
tool: "plan_write".to_string(),
args: serde_json::json!({
"content": "- [ ] Test task\n- [x] Done task"
"plan": r#"plan_id: test-plan
revision: 1
items:
- id: I1
description: Test task
state: todo
touches:
- src/test.rs
checks:
happy:
desc: Works correctly
target: test::module
negative:
desc: Handles errors
target: test::module
boundary:
desc: Edge cases
target: test::module"#
}),
};
let write_result = agent.execute_tool(&write_call).await.unwrap();
assert!(
write_result.contains(""),
"Write should succeed: {}",
"Plan write should succeed: {}",
write_result
);
// Read TODO
// Read Plan
let read_call = ToolCall {
tool: "todo_read".to_string(),
tool: "plan_read".to_string(),
args: serde_json::json!({}),
};
let read_result = agent.execute_tool(&read_call).await.unwrap();
assert!(
read_result.contains("Test task"),
"Should read back TODO: {}",
read_result.contains("test-plan"),
"Should read back plan: {}",
read_result
);
}

View File

@@ -1,388 +0,0 @@
use g3_core::ui_writer::NullUiWriter;
use g3_core::Agent;
use serial_test::serial;
use std::fs;
use std::path::PathBuf;
use tempfile::TempDir;
/// Helper to create a test agent in a temporary directory
async fn create_test_agent_in_dir(temp_dir: &TempDir) -> Agent<NullUiWriter> {
// Change to temp directory
std::env::set_current_dir(temp_dir.path()).unwrap();
// Create a minimal config
let config = g3_config::Config::default();
let ui_writer = NullUiWriter;
Agent::new(config, ui_writer).await.unwrap()
}
/// Helper to get todo.g3.md path in temp directory
fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
temp_dir.path().join("todo.g3.md")
}
#[tokio::test]
#[serial]
async fn test_todo_write_creates_file() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Initially, todo.g3.md should not exist
assert!(!todo_path.exists(), "todo.g3.md should not exist initially");
// Create a tool call to write TODO
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3"
}),
};
// Execute the tool
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should report success
assert!(result.contains(""), "Should report success: {}", result);
assert!(
result.contains("todo.g3.md"),
"Should mention todo.g3.md: {}",
result
);
// File should now exist
assert!(todo_path.exists(), "todo.g3.md should exist after write");
// File should contain the correct content
let content = fs::read_to_string(&todo_path).unwrap();
assert_eq!(content, "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3");
}
#[tokio::test]
#[serial]
async fn test_todo_read_from_file() {
let temp_dir = TempDir::new().unwrap();
let todo_path = get_todo_path(&temp_dir);
// Pre-create a todo.g3.md file
let test_content = "# My TODO\n\n- [ ] First task\n- [x] Completed task";
fs::write(&todo_path, test_content).unwrap();
// Create agent (should load from file)
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Create a tool call to read TODO
let tool_call = g3_core::ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
// Execute the tool
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should contain the TODO content
assert!(
result.contains("📝 TODO list:"),
"Should have TODO list header: {}",
result
);
assert!(
result.contains("First task"),
"Should contain first task: {}",
result
);
assert!(
result.contains("Completed task"),
"Should contain completed task: {}",
result
);
}
#[tokio::test]
#[serial]
async fn test_todo_read_empty_file() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Create a tool call to read TODO (file doesn't exist)
let tool_call = g3_core::ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
// Execute the tool
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should report empty
assert!(result.contains("empty"), "Should report empty: {}", result);
}
#[tokio::test]
#[serial]
async fn test_todo_persistence_across_agents() {
let temp_dir = TempDir::new().unwrap();
let todo_path = get_todo_path(&temp_dir);
// Agent 1: Write TODO
{
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": "- [ ] Persistent task\n- [x] Done task"
}),
};
agent.execute_tool(&tool_call).await.unwrap();
}
// Verify file exists
assert!(
todo_path.exists(),
"todo.g3.md should persist after agent drops"
);
// Agent 2: Read TODO (new agent instance)
{
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let tool_call = g3_core::ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should read the persisted content
assert!(
result.contains("Persistent task"),
"Should read persisted task: {}",
result
);
assert!(
result.contains("Done task"),
"Should read done task: {}",
result
);
}
}
#[tokio::test]
#[serial]
async fn test_todo_update_preserves_file() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Write initial TODO
let write_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": "- [ ] Task 1\n- [ ] Task 2"
}),
};
agent.execute_tool(&write_call).await.unwrap();
// Update TODO
let update_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3"
}),
};
agent.execute_tool(&update_call).await.unwrap();
// Verify file has updated content
let content = fs::read_to_string(&todo_path).unwrap();
assert_eq!(content, "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3");
}
#[tokio::test]
#[serial]
async fn test_todo_handles_large_content() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Create a large TODO (but under the 50k limit)
let mut large_content = String::from("# Large TODO\n\n");
for i in 0..100 {
large_content.push_str(&format!(
"- [ ] Task {} with a long description that exceeds normal line lengths\n",
i
));
}
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": large_content
}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(
result.contains(""),
"Should handle large content: {}",
result
);
// Verify file contains all content
let file_content = fs::read_to_string(&todo_path).unwrap();
assert_eq!(file_content, large_content);
assert!(file_content.contains("Task 99"), "Should contain all tasks");
}
#[tokio::test]
#[serial]
async fn test_todo_respects_size_limit() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Create content that exceeds the default 50k limit
let huge_content = "x".repeat(60_000);
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": huge_content
}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
// Should reject content that's too large
assert!(
result.contains(""),
"Should reject oversized content: {}",
result
);
assert!(
result.contains("too large"),
"Should mention size limit: {}",
result
);
}
#[tokio::test]
#[serial]
async fn test_todo_agent_initialization_loads_file() {
let temp_dir = TempDir::new().unwrap();
let todo_path = get_todo_path(&temp_dir);
// Pre-create todo.g3.md before agent initialization
let initial_content = "- [ ] Pre-existing task";
fs::write(&todo_path, initial_content).unwrap();
// Create agent - should load the file during initialization
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Read TODO - should return the pre-existing content
let tool_call = g3_core::ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(
result.contains("Pre-existing task"),
"Should load file on init: {}",
result
);
}
#[tokio::test]
#[serial]
async fn test_todo_handles_unicode_content() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Create TODO with unicode characters
let unicode_content = "- [ ] 日本語タスク\n- [ ] Émoji task 🚀\n- [x] Ελληνικά task";
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": unicode_content
}),
};
agent.execute_tool(&tool_call).await.unwrap();
// Verify file preserves unicode
let file_content = fs::read_to_string(&todo_path).unwrap();
assert_eq!(file_content, unicode_content);
// Verify reading back works
let read_call = g3_core::ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&read_call).await.unwrap();
assert!(
result.contains("日本語"),
"Should preserve Japanese: {}",
result
);
assert!(result.contains("🚀"), "Should preserve emoji: {}", result);
assert!(
result.contains("Ελληνικά"),
"Should preserve Greek: {}",
result
);
}
#[tokio::test]
#[serial]
async fn test_todo_empty_content_creates_empty_file() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
let todo_path = get_todo_path(&temp_dir);
// Write empty TODO
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": ""
}),
};
agent.execute_tool(&tool_call).await.unwrap();
// File should exist but be empty
assert!(todo_path.exists(), "Empty todo.g3.md should create file");
let content = fs::read_to_string(&todo_path).unwrap();
assert_eq!(content, "");
}
#[tokio::test]
#[serial]
async fn test_todo_whitespace_only_content() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent_in_dir(&temp_dir).await;
// Write whitespace-only TODO
let tool_call = g3_core::ToolCall {
tool: "todo_write".to_string(),
args: serde_json::json!({
"content": " \n\n \t \n"
}),
};
agent.execute_tool(&tool_call).await.unwrap();
// Read it back
let read_call = g3_core::ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&read_call).await.unwrap();
// Should report as empty (whitespace is trimmed)
assert!(
result.contains("empty"),
"Whitespace-only should be empty: {}",
result
);
}

View File

@@ -1,223 +0,0 @@
use g3_config::Config;
use g3_core::ui_writer::UiWriter;
use g3_core::{Agent, ToolCall};
use serial_test::serial;
use std::sync::{Arc, Mutex};
use tempfile::TempDir;
// Mock UI Writer for testing
#[derive(Clone)]
struct MockUiWriter {
output: Arc<Mutex<Vec<String>>>,
prompt_responses: Arc<Mutex<Vec<bool>>>,
choice_responses: Arc<Mutex<Vec<usize>>>,
}
impl MockUiWriter {
fn new() -> Self {
Self {
output: Arc::new(Mutex::new(Vec::new())),
prompt_responses: Arc::new(Mutex::new(Vec::new())),
choice_responses: Arc::new(Mutex::new(Vec::new())),
}
}
#[allow(dead_code)]
fn set_prompt_response(&self, response: bool) {
self.prompt_responses.lock().unwrap().push(response);
}
#[allow(dead_code)]
fn set_choice_response(&self, response: usize) {
self.choice_responses.lock().unwrap().push(response);
}
#[allow(dead_code)]
fn get_output(&self) -> Vec<String> {
self.output.lock().unwrap().clone()
}
}
impl UiWriter for MockUiWriter {
fn print(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn println(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_inline(&self, message: &str) {
self.output.lock().unwrap().push(message.to_string());
}
fn print_system_prompt(&self, _prompt: &str) {}
fn print_context_status(&self, message: &str) {
self.output
.lock()
.unwrap()
.push(format!("STATUS: {}", message));
}
fn print_g3_progress(&self, _message: &str) {}
fn print_g3_status(&self, _message: &str, _status: &str) {}
fn print_thin_result(&self, _result: &g3_core::ThinResult) {}
fn print_tool_header(&self, _tool_name: &str, _tool_args: Option<&serde_json::Value>) {}
fn print_tool_arg(&self, _key: &str, _value: &str) {}
fn print_tool_output_header(&self) {}
fn update_tool_output_line(&self, _line: &str) {}
fn print_tool_output_line(&self, _line: &str) {}
fn print_tool_output_summary(&self, _hidden_count: usize) {}
fn print_tool_timing(&self, _duration_str: &str, _tokens_delta: u32, _context_percentage: f32) {}
fn print_agent_prompt(&self) {}
fn print_agent_response(&self, _content: &str) {}
fn notify_sse_received(&self) {}
fn flush(&self) {}
fn wants_full_output(&self) -> bool {
false
}
fn prompt_user_yes_no(&self, message: &str) -> bool {
self.output
.lock()
.unwrap()
.push(format!("PROMPT: {}", message));
self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
}
fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
self.output
.lock()
.unwrap()
.push(format!("CHOICE: {} Options: {:?}", message, options));
self.choice_responses.lock().unwrap().pop().unwrap_or(0)
}
fn print_tool_streaming_hint(&self, _tool_name: &str) {}
fn print_tool_streaming_active(&self) {}
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_matching_sha() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha = "abc123hash";
let content = format!(
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
sha
);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
assert!(!result.contains("⚠️ TODO list is stale"));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_ignore() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!(
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
sha_file
);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_choice_response(0); // Ignore
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
}
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_mismatch_sha_mark_stale() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!(
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
sha_file
);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = true;
let ui_writer = MockUiWriter::new();
ui_writer.set_choice_response(1); // Mark as Stale
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("⚠️ TODO list is stale"));
assert!(result.contains("Please regenerate"));
}
// Note: We cannot easily test "Quit" (index 2) because it calls std::process::exit(0)
// which would kill the test runner. We skip that test case here.
#[tokio::test]
#[serial]
async fn test_todo_staleness_check_disabled() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
std::env::set_current_dir(&temp_dir).unwrap();
let sha_file = "old_sha";
let sha_req = "new_sha";
let content = format!(
"{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
sha_file
);
std::fs::write(&todo_path, content).unwrap();
let mut config = Config::default();
config.agent.check_todo_staleness = false;
let ui_writer = MockUiWriter::new();
let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
agent.set_requirements_sha(sha_req.to_string());
let tool_call = ToolCall {
tool: "todo_read".to_string(),
args: serde_json::json!({}),
};
let result = agent.execute_tool(&tool_call).await.unwrap();
assert!(result.contains("📝 TODO list:"));
}

View File

@@ -393,21 +393,38 @@ mod str_replace_execution {
// Test: TODO tool execution
// =============================================================================
mod todo_execution {
mod plan_execution {
use super::*;
/// Test writing and reading TODO
/// Test writing and reading Plan
#[tokio::test]
#[serial]
async fn test_todo_write_and_read() {
async fn test_plan_write_and_read() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
agent.init_session_id_for_test("plan-test");
// Write TODO
// Write Plan
let write_call = make_tool_call(
"todo_write",
"plan_write",
serde_json::json!({
"content": "- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3"
"plan": r#"plan_id: test-plan
revision: 1
items:
- id: I1
description: Task 1
state: todo
touches: ["src/test.rs"]
checks:
happy:
desc: Works
target: test
negative:
desc: Errors
target: test
boundary:
desc: Edge
target: test"#
}),
);
@@ -415,52 +432,61 @@ mod todo_execution {
assert!(write_result.contains("") || write_result.to_lowercase().contains("success"),
"Write should succeed: {}", write_result);
// Read TODO
let read_call = make_tool_call("todo_read", serde_json::json!({}));
// Read Plan
let read_call = make_tool_call("plan_read", serde_json::json!({}));
let read_result = agent.execute_tool(&read_call).await.unwrap();
assert!(read_result.contains("test-plan"), "Should contain plan id: {}", read_result);
assert!(read_result.contains("Task 1"), "Should contain Task 1: {}", read_result);
assert!(read_result.contains("Task 2"), "Should contain Task 2: {}", read_result);
assert!(read_result.contains("Task 3"), "Should contain Task 3: {}", read_result);
}
/// Test reading empty TODO
/// Test reading empty Plan
#[tokio::test]
#[serial]
async fn test_todo_read_empty() {
async fn test_plan_read_empty() {
let temp_dir = TempDir::new().unwrap();
let mut agent = create_test_agent(&temp_dir).await;
agent.init_session_id_for_test("plan-empty-test");
let read_call = make_tool_call("todo_read", serde_json::json!({}));
let read_call = make_tool_call("plan_read", serde_json::json!({}));
let result = agent.execute_tool(&read_call).await.unwrap();
assert!(result.to_lowercase().contains("empty") || result.contains("no todo"),
assert!(result.contains("No plan") || result.to_lowercase().contains("no plan"),
"Should indicate empty: {}", result);
}
/// Test TODO persists to file
/// Test Plan approval
#[tokio::test]
#[serial]
async fn test_todo_persists_to_file() {
async fn test_plan_approve() {
let temp_dir = TempDir::new().unwrap();
let todo_path = temp_dir.path().join("todo.g3.md");
let mut agent = create_test_agent(&temp_dir).await;
agent.init_session_id_for_test("plan-approve-test");
{
let mut agent = create_test_agent(&temp_dir).await;
let write_call = make_tool_call(
"todo_write",
serde_json::json!({
"content": "- [ ] Persistent task"
}),
);
agent.execute_tool(&write_call).await.unwrap();
}
// First write a plan
let write_call = make_tool_call(
"plan_write",
serde_json::json!({
"plan": r#"plan_id: approve-test
revision: 1
items:
- id: I1
description: Test task
state: todo
touches: ["src/test.rs"]
checks:
happy: {desc: Works, target: test}
negative: {desc: Errors, target: test}
boundary: {desc: Edge, target: test}"#
}),
);
agent.execute_tool(&write_call).await.unwrap();
// File should exist after agent is dropped
assert!(todo_path.exists(), "TODO file should persist");
let content = fs::read_to_string(&todo_path).unwrap();
assert!(content.contains("Persistent task"), "Content should persist: {}", content);
// Approve the plan
let approve_call = make_tool_call("plan_approve", serde_json::json!({}));
let result = agent.execute_tool(&approve_call).await.unwrap();
assert!(result.contains("") && result.contains("approved"),
"Should approve plan: {}", result);
}
}