From a63950d8f5cb3765fc1d3703faca677542a6911b Mon Sep 17 00:00:00 2001 From: "Dhanji R. Prasanna" Date: Mon, 2 Feb 2026 14:38:25 +1100 Subject: [PATCH] Add Plan Mode to replace TODO system Plan Mode is a cognitive forcing system that requires reasoning about: - Happy path - Negative case - Boundary condition New tools: - plan_read: Read current plan for session - plan_write: Create/update plan with YAML content (validates structure) - plan_approve: Mark current revision as approved New command: - /feature : Start Plan Mode for a new feature Plan schema requires: - plan_id, revision, approved_revision - items with id, description, state, touches, checks (happy/negative/boundary) - evidence and notes required when marking items done Verification: - plan_verify() called automatically when all items are done/blocked Removed: - todo_read, todo_write tools - todo.rs module and related tests --- analysis/memory.md | 53 +- crates/g3-cli/src/commands.rs | 30 + crates/g3-core/src/prompts.rs | 140 ++-- crates/g3-core/src/tool_definitions.rs | 99 ++- crates/g3-core/src/tool_dispatch.rs | 9 +- crates/g3-core/src/tools/mod.rs | 4 +- crates/g3-core/src/tools/plan.rs | 674 ++++++++++++++++++ crates/g3-core/src/tools/todo.rs | 187 ----- ...stream_completion_characterization_test.rs | 40 +- crates/g3-core/tests/test_todo_persistence.rs | 388 ---------- crates/g3-core/tests/todo_staleness_test.rs | 223 ------ .../tests/tool_execution_roundtrip_test.rs | 92 ++- 12 files changed, 997 insertions(+), 942 deletions(-) create mode 100644 crates/g3-core/src/tools/plan.rs delete mode 100644 crates/g3-core/src/tools/todo.rs delete mode 100644 crates/g3-core/tests/test_todo_persistence.rs delete mode 100644 crates/g3-core/tests/todo_staleness_test.rs diff --git a/analysis/memory.md b/analysis/memory.md index f477225..6763227 100644 --- a/analysis/memory.md +++ b/analysis/memory.md @@ -1,5 +1,5 @@ # Workspace Memory -> Updated: 2026-01-30T01:10:54Z | Size: 13.2k chars +> Updated: 2026-02-02T03:16:47Z | Size: 15.3k chars ### Remember Tool Wiring - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()` @@ -240,4 +240,53 @@ Research tool is asynchronous - spawns scout agent in background, returns immedi 2. Scout agent runs in background tokio task 3. On completion, `PendingResearchManager.complete()` stores result 4. At next iteration start or user prompt, `inject_completed_research()` adds to context -5. Agent can check status with `research_status` tool or user with `/research` command \ No newline at end of file +5. Agent can check status with `research_status` tool or user with `/research` command + +### Plan Mode (replaces TODO system) +Structured task planning with cognitive forcing - requires happy/negative/boundary checks. + +- `crates/g3-core/src/tools/plan.rs` + - `Plan` [200..240] - plan_id, revision, approved_revision, items[] + - `PlanItem` [110..145] - id, description, state, touches, checks, evidence, notes + - `PlanState` [25..45] - enum: Todo, Doing, Done, Blocked + - `Check` [60..85] - desc, target fields + - `Checks` [90..105] - happy, negative, boundary + - `get_plan_path()` [280..285] - returns `.g3/sessions//plan.g3.md` + - `read_plan()` [290..310] - loads plan from YAML in markdown + - `write_plan()` [315..335] - validates and saves plan + - `plan_verify()` [355..390] - placeholder called when all items done/blocked + - `execute_plan_read()` [395..420] - plan.read tool + - `execute_plan_write()` [425..490] - plan.write tool with validation + - `execute_plan_approve()` [495..530] - plan.approve tool + +- `crates/g3-core/src/tool_definitions.rs` [263..330] - plan.read, plan.write, plan.approve definitions +- `crates/g3-core/src/tool_dispatch.rs` [36..38] - dispatch cases for plan tools +- `crates/g3-cli/src/commands.rs` [460..490] - `/feature` command starts Plan Mode +- `crates/g3-core/src/prompts.rs` [21..130] - SHARED_PLAN_SECTION replaces TODO section + +**Plan Schema (YAML)**: +```yaml +plan_id: feature-name +revision: 1 +approved_revision: 1 # set by plan.approve +items: + - id: I1 + description: What to do + state: todo|doing|done|blocked + touches: [paths/modules] + checks: + happy: {desc, target} + negative: {desc, target} + boundary: {desc, target} + evidence: [file:line, test names] # required when done + notes: Implementation explanation # required when done +``` + +**Workflow**: `/feature ` → agent drafts plan → user approves → agent implements → plan_verify() called when complete + +### Plan Mode Tool Names (IMPORTANT) +Tool names must use underscores, not dots (Anthropic API restriction: `^[a-zA-Z0-9_-]{1,128}$`). + +- `plan_read` - Read current plan +- `plan_write` - Create/update plan +- `plan_approve` - Approve plan revision \ No newline at end of file diff --git a/crates/g3-cli/src/commands.rs b/crates/g3-cli/src/commands.rs index 4d39eea..5f5e738 100644 --- a/crates/g3-cli/src/commands.rs +++ b/crates/g3-cli/src/commands.rs @@ -74,6 +74,7 @@ pub async fn handle_command( output.print(" /readme - Reload README.md and AGENTS.md from disk"); output.print(" /stats - Show detailed context and performance statistics"); output.print(" /run - Read file and execute as prompt"); + output.print(" /feature - Start Plan Mode for a new feature"); output.print(" /help - Show this help message"); output.print(" exit/quit - Exit the interactive session"); output.print(""); @@ -452,6 +453,35 @@ pub async fn handle_command( } Ok(true) } + cmd if cmd.starts_with("/feature") => { + let parts: Vec<&str> = cmd.splitn(2, ' ').collect(); + if parts.len() < 2 || parts[1].trim().is_empty() { + output.print("Usage: /feature "); + output.print("Starts Plan Mode for a new feature. The agent will:"); + output.print(" 1. Research and draft a Plan with checks (happy/negative/boundary)"); + output.print(" 2. Ask clarifying questions if needed"); + output.print(" 3. Request approval before coding"); + output.print(""); + output.print("Example: /feature Add CSV import for comic book metadata"); + } else { + let feature_description = parts[1].trim(); + + // Construct the feature prompt that instructs the agent to use Plan Mode + let prompt = format!( + "I want to implement a new feature: {}\n\n\ + Please use Plan Mode to help me implement this:\n\ + 1. First, research the codebase to understand where this feature should live\n\ + 2. Draft a Plan using `plan_write` with items that have all three checks (happy, negative, boundary)\n\ + 3. Ask me any clarifying questions if needed\n\ + 4. Then ask me to approve the plan before you start coding\n\n\ + Do NOT start coding until I approve the plan.", + feature_description + ); + + execute_task_with_retry(agent, &prompt, show_prompt, show_code, output).await; + } + Ok(true) + } "/unproject" => { if active_project.is_some() { use crate::g3_status::G3Status; diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs index db63cc6..0acd235 100644 --- a/crates/g3-core/src/prompts.rs +++ b/crates/g3-core/src/prompts.rs @@ -18,70 +18,92 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request: For shell commands: Use the shell tool with the exact command needed. Always use `rg` (ripgrep) instead of `grep` - it's faster, has better defaults, and respects .gitignore. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\". If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir."; -const SHARED_TODO_SECTION: &str = "\ -# Task Management with TODO Tools +const SHARED_PLAN_SECTION: &str = "\ +# Task Management with Plan Mode -**REQUIRED for multi-step tasks.** Use TODO tools when your task involves ANY of: +**REQUIRED for multi-step tasks.** Use Plan Mode when your task involves ANY of: - Multiple files to create/modify (2+) - Multiple distinct steps (3+) - Dependencies between steps - Testing or verification needed - Uncertainty about approach +Plan Mode is a cognitive forcing system that prevents: +- Attention collapse +- False claims of completeness +- Happy-path-only implementations +- Duplication/contradiction with existing code + ## Workflow -Every multi-step task follows this pattern: -1. **Start**: Call todo_read, then todo_write to create your plan -2. **During**: Execute steps, then todo_read and todo_write to mark progress -3. **End**: Call todo_read to verify all items complete -4. **Finally**, call `remember` to save info on new features created or discovered +1. **Draft**: Call `plan_read` to check for existing plan, then `plan_write` to create/update +2. **Approval**: Ask user to approve before coding (\"'approve', or edit plan?\") +3. **Execute**: Implement items, updating plan with `plan_write` to mark progress +4. **Complete**: When all items are done/blocked, verification runs automatically +5. **Remember**: Call `remember` to save discovered code locations -Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists are scoped to the current session and stored in the session directory. +## Plan Schema -## Examples +Each plan item MUST have: +- `id`: Stable identifier (e.g., \"I1\", \"I2\") +- `description`: What will be done +- `state`: todo | doing | done | blocked +- `touches`: Paths/modules this affects (forces \"where does this live?\") +- `checks`: Three required perspectives: + - `happy`: {desc, target} - Normal successful operation + - `negative`: {desc, target} - Error handling, invalid input + - `boundary`: {desc, target} - Edge cases, limits +- `evidence`: (required when done) File:line refs, test names +- `notes`: (required when done) Short implementation explanation -**Example 1: Feature Implementation** -User asks: \"Add user authentication with tests\" +## Rules -First action: -{\"tool\": \"todo_read\", \"args\": {}} +When drafting a plan, you MUST: +- Keep items ≤ 7 by default +- Commit to where the work will live (touches) +- Provide all three checks (happy, negative, boundary) -Then create plan: -{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n - [ ] Create User struct\\n - [ ] Add login endpoint\\n - [ ] Add password hashing\\n - [ ] Write unit tests\\n - [ ] Write integration tests\"}} +When updating a plan: +- Cannot remove items from an approved plan (mark as blocked instead) +- Must provide evidence and notes when marking item as done -After completing User struct: -{\"tool\": \"todo_read\", \"args\": {}} -{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n - [x] Create User struct\\n - [ ] Add login endpoint\\n - [ ] Add password hashing\\n - [ ] Write unit tests\\n - [ ] Write integration tests\"}} +## Example Plan Item -**Example 2: Bug Fix** -User asks: \"Fix the memory leak in cache module\" +```yaml +- id: I1 + description: \"Add CSV import for comic book metadata\" + state: todo + touches: [\"src/import\", \"src/library\"] + checks: + happy: + desc: \"Valid CSV imports 3 comics\" + target: \"import::csv\" + negative: + desc: \"Missing column errors with MissingColumn\" + target: \"import::csv\" + boundary: + desc: \"Empty file yields empty import without error\" + target: \"import::csv\" +``` -{\"tool\": \"todo_read\", \"args\": {}} -{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Fix memory leak\\n - [ ] Review cache.rs\\n - [ ] Check for unclosed resources\\n - [ ] Add drop implementation\\n - [ ] Write test to verify fix\"}} - -**Example 3: Refactoring** -User asks: \"Refactor database layer to use async/await\" - -{\"tool\": \"todo_read\", \"args\": {}} -{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Refactor to async\\n - [ ] Update function signatures\\n - [ ] Replace blocking calls\\n - [ ] Update all callers\\n - [ ] Update tests\"}} - -## Format - -Use markdown checkboxes: -- \"- [ ]\" for incomplete tasks -- \"- [x]\" for completed tasks -- Indent with 2 spaces for subtasks - -Keep items short, specific, and action-oriented. +When done, add evidence and notes: +```yaml + state: done + evidence: + - \"src/import/csv.rs:42-118\" + - \"tests/import_csv.rs::test_valid_csv\" + notes: \"Extended existing parser instead of creating duplicate\" +``` ## Benefits ✓ Prevents missed steps ✓ Makes progress visible ✓ Helps recover from interruptions -✓ Creates better summaries +✓ Forces consideration of edge cases +✓ Provides audit trail with evidence -If you can complete it with 1-2 tool calls, skip TODO."; +If you can complete it with 1-2 tool calls, skip Plan Mode."; const SHARED_TEMPORARY_FILES: &str = "\ # Temporary files @@ -153,7 +175,7 @@ Do NOT save duplicates - check the Workspace Memory section (loaded at startup) After discovering how session continuation works: -{\"tool\": \"remember\", \"args\": {\"notes\": \"### Session Continuation\\nSave/restore session state across g3 invocations using symlink-based approach.\\n\\n- `crates/g3-core/src/session_continuation.rs`\\n - `SessionContinuation` [850..2100] - artifact struct with session state, TODO snapshot, context %\\n - `save_continuation()` [5765..7200] - saves to `.g3/sessions//latest.json`, updates symlink\\n - `load_continuation()` [7250..8900] - follows `.g3/session` symlink to restore\\n - `find_incomplete_agent_session()` [10500..13200] - finds sessions with incomplete TODOs for agent resume\"}} +{\"tool\": \"remember\", \"args\": {\"notes\": \"### Session Continuation\\nSave/restore session state across g3 invocations using symlink-based approach.\\n\\n- `crates/g3-core/src/session_continuation.rs`\\n - `SessionContinuation` [850..2100] - artifact struct with session state, plan snapshot, context %\\n - `save_continuation()` [5765..7200] - saves to `.g3/sessions//latest.json`, updates symlink\\n - `load_continuation()` [7250..8900] - follows `.g3/session` symlink to restore\\n - `find_incomplete_agent_session()` [10500..13200] - finds sessions with incomplete plans for agent resume\"}} After discovering a useful pattern: @@ -213,13 +235,17 @@ Short description for providers without native calling specs: - Format: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"path/to/file\", \"diff\": \"--- old\\n-old text\\n+++ new\\n+new text\"}} - Example: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"src/main.rs\", \"diff\": \"--- old\\n-old_code();\\n+++ new\\n+new_code();\"}} -- **todo_read**: Read the current session's TODO list from todo.g3.md (session-scoped) - - Format: {\"tool\": \"todo_read\", \"args\": {}} - - Example: {\"tool\": \"todo_read\", \"args\": {}} +- **plan_read**: Read the current Plan for this session + - Format: {\"tool\": \"plan_read\", \"args\": {}} + - Example: {\"tool\": \"plan_read\", \"args\": {}} -- **todo_write**: Write or overwrite the session's todo.g3.md file (WARNING: overwrites completely, always read first) - - Format: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Task 1\\n- [ ] Task 2\"}} - - Example: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Implement feature\\n - [ ] Write tests\\n - [ ] Run tests\"}} +- **plan_write**: Create or update the Plan with YAML content + - Format: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: my-plan\\nitems: [...]\"}} + - Example: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n - id: I1\\n description: Add feature\\n state: todo\\n touches: [src/lib.rs]\\n checks:\\n happy: {desc: Works, target: lib}\\n negative: {desc: Errors, target: lib}\\n boundary: {desc: Edge, target: lib}\"}} + +- **plan_approve**: Approve the current plan revision (called by user) + - Format: {\"tool\": \"plan_approve\", \"args\": {}} + - Example: {\"tool\": \"plan_approve\", \"args\": {}} - **code_search**: Syntax-aware code search using tree-sitter. Supports Rust, Python, JavaScript, TypeScript. - Format: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"label\", \"query\": \"tree-sitter query\", \"language\": \"rust|python|javascript|typescript\", \"paths\": [\"src/\"], \"context_lines\": 0}]}} @@ -269,11 +295,6 @@ write_file(\"file2.txt\", \"...\") write_file(\"helper.rs\", \"...\") [DONE]"; -const NON_NATIVE_TODO_ADDENDUM: &str = " - -IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format: -`{{Based on the requirements file with SHA256: }}` -This ensures the TODO list is tracked against the specific version of requirements it was generated from."; // ============================================================================ // COMPOSED PROMPTS @@ -284,7 +305,7 @@ pub fn get_system_prompt_for_native() -> String { format!( "{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}", SHARED_INTRO, - SHARED_TODO_SECTION, + SHARED_PLAN_SECTION, SHARED_TEMPORARY_FILES, SHARED_WEB_RESEARCH, SHARED_WORKSPACE_MEMORY, @@ -295,12 +316,11 @@ pub fn get_system_prompt_for_native() -> String { /// System prompt for providers without native tool calling (embedded models) pub fn get_system_prompt_for_non_native() -> String { format!( - "{}\n\n{}\n\n{}\n\n{}{}\n\n{}\n\n{}\n\n{}", + "{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}", SHARED_INTRO, NON_NATIVE_TOOL_FORMAT, NON_NATIVE_INSTRUCTIONS, - SHARED_TODO_SECTION, - NON_NATIVE_TODO_ADDENDUM, + SHARED_PLAN_SECTION, SHARED_WEB_RESEARCH, SHARED_WORKSPACE_MEMORY, SHARED_RESPONSE_GUIDELINES @@ -311,7 +331,7 @@ pub fn get_system_prompt_for_non_native() -> String { const G3_IDENTITY_LINE: &str = "You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals."; /// Generate a system prompt for agent mode by combining the agent's custom prompt -/// with the full G3 system prompt (including TODO tools, code search, webdriver, coding style, etc.) +/// with the full G3 system prompt (including plan tools, code search, webdriver, coding style, etc.) /// /// The agent_prompt replaces only the G3 identity line at the start of the prompt. /// Everything else (tool instructions, coding guidelines, etc.) is preserved. @@ -374,12 +394,12 @@ mod tests { } #[test] - fn test_both_prompts_have_todo_section() { + fn test_both_prompts_have_plan_section() { let native = get_system_prompt_for_native(); let non_native = get_system_prompt_for_non_native(); - assert!(native.contains("# Task Management with TODO Tools")); - assert!(non_native.contains("# Task Management with TODO Tools")); + assert!(native.contains("# Task Management with Plan Mode")); + assert!(non_native.contains("# Task Management with Plan Mode")); } #[test] diff --git a/crates/g3-core/src/tool_definitions.rs b/crates/g3-core/src/tool_definitions.rs index 8ddf75c..be581ba 100644 --- a/crates/g3-core/src/tool_definitions.rs +++ b/crates/g3-core/src/tool_definitions.rs @@ -193,29 +193,6 @@ fn create_core_tools(exclude_research: bool) -> Vec { "required": ["path", "window_id"] }), }, - Tool { - name: "todo_read".to_string(), - description: "Read your current TODO list from todo.g3.md file in the session directory. Shows what tasks are planned and their status. Call this at the start of multi-step tasks to check for existing plans, and during execution to review progress before updating. TODO lists are scoped to the current session.".to_string(), - input_schema: json!({ - "type": "object", - "properties": {}, - "required": [] - }), - }, - Tool { - name: "todo_write".to_string(), - description: "Create or update your TODO list in todo.g3.md file with a complete task plan. Use markdown checkboxes: - [ ] for pending, - [x] for complete. This tool replaces the entire file content, so always call todo_read first to preserve existing content. Essential for multi-step tasks. TODO lists are scoped to the current session.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "content": { - "type": "string", - "description": "The TODO list content to save. Use markdown checkbox format: - [ ] for incomplete tasks, - [x] for completed tasks. Support nested tasks with indentation." - } - }, - "required": ["content"] - }), - }, Tool { name: "coverage".to_string(), description: "Generate a code coverage report for the entire workspace using cargo llvm-cov. This runs all tests with coverage instrumentation and returns a summary of coverage statistics. Requires llvm-tools-preview and cargo-llvm-cov to be installed (they will be auto-installed if missing).".to_string(), @@ -288,6 +265,62 @@ fn create_core_tools(exclude_research: bool) -> Vec { }); } + // Plan Mode tools + tools.push(Tool { + name: "plan_read".to_string(), + description: "Read the current Plan for this session. Shows the plan structure with items, their states, checks (happy/negative/boundary), evidence, and notes. Use this to review the plan before making updates.".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }); + + tools.push(Tool { + name: "plan_write".to_string(), + description: r#"Create or update the Plan for this session. The plan must be provided as YAML with the following structure: + +- plan_id: Unique identifier for the plan +- revision: Will be auto-incremented +- items: Array of plan items, each with: + - id: Stable identifier (e.g., "I1") + - description: What will be done + - state: todo | doing | done | blocked + - touches: Array of paths/modules affected + - checks: + happy: {desc, target} - Normal successful operation + negative: {desc, target} - Error handling, invalid input + boundary: {desc, target} - Edge cases, limits + - evidence: Array of file:line refs, test names (required when done) + - notes: Implementation explanation (required when done) + +Rules: +- Keep items ≤ 7 by default +- All three checks (happy, negative, boundary) are required +- Cannot remove items from an approved plan (mark as blocked instead) +- Evidence and notes required when marking item as done"#.to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "plan": { + "type": "string", + "description": "The plan as YAML. Must include plan_id and items array." + } + }, + "required": ["plan"] + }), + }); + + tools.push(Tool { + name: "plan_approve".to_string(), + description: "Mark the current plan revision as approved. This is called by the user (not the agent) to approve a drafted plan before implementation begins. Once approved, plan items cannot be removed (only marked as blocked). The agent should ask for approval after drafting a plan.".to_string(), + input_schema: json!({ + "type": "object", + "properties": {}, + "required": [] + }), + }); + // Workspace memory tool (memory is auto-loaded at startup, only remember is needed) tools.push(Tool { name: "remember".to_string(), @@ -523,11 +556,11 @@ mod tests { #[test] fn test_core_tools_count() { let tools = create_core_tools(false); - // Should have the core tools: shell, background_process, read_file, read_image, - // write_file, str_replace, screenshot, - // todo_read, todo_write, coverage, code_search, research, research_status, remember - // (15 total - memory is auto-loaded, only remember tool needed) - assert_eq!(tools.len(), 15); + // Core tools: shell, background_process, read_file, read_image, + // write_file, str_replace, screenshot, coverage, code_search, + // research, research_status, remember, plan_read, plan_write, plan_approve + // (16 total - memory is auto-loaded, only remember tool needed) + assert_eq!(tools.len(), 16); } #[test] @@ -541,15 +574,15 @@ mod tests { fn test_create_tool_definitions_core_only() { let config = ToolConfig::default(); let tools = create_tool_definitions(config); - assert_eq!(tools.len(), 15); + assert_eq!(tools.len(), 16); } #[test] fn test_create_tool_definitions_all_enabled() { let config = ToolConfig::new(true, true); let tools = create_tool_definitions(config); - // 15 core + 15 webdriver = 30 - assert_eq!(tools.len(), 30); + // 16 core + 15 webdriver = 31 + assert_eq!(tools.len(), 31); } #[test] @@ -567,8 +600,8 @@ mod tests { let tools_with_research = create_core_tools(false); let tools_without_research = create_core_tools(true); - assert_eq!(tools_with_research.len(), 15); - assert_eq!(tools_without_research.len(), 13); // research + research_status both excluded + assert_eq!(tools_with_research.len(), 16); + assert_eq!(tools_without_research.len(), 14); // research + research_status both excluded assert!(tools_with_research.iter().any(|t| t.name == "research")); assert!(!tools_without_research.iter().any(|t| t.name == "research")); diff --git a/crates/g3-core/src/tool_dispatch.rs b/crates/g3-core/src/tool_dispatch.rs index 49ec507..67e1765 100644 --- a/crates/g3-core/src/tool_dispatch.rs +++ b/crates/g3-core/src/tool_dispatch.rs @@ -7,7 +7,7 @@ use anyhow::Result; use tracing::{debug, warn}; use crate::tools::executor::ToolContext; -use crate::tools::{acd, file_ops, memory, misc, research, shell, todo, webdriver}; +use crate::tools::{acd, file_ops, memory, misc, plan, research, shell, webdriver}; use crate::ui_writer::UiWriter; use crate::ToolCall; @@ -32,9 +32,10 @@ pub async fn dispatch_tool( "write_file" => file_ops::execute_write_file(tool_call, ctx).await, "str_replace" => file_ops::execute_str_replace(tool_call, ctx).await, - // TODO management - "todo_read" => todo::execute_todo_read(tool_call, ctx).await, - "todo_write" => todo::execute_todo_write(tool_call, ctx).await, + // Plan Mode + "plan_read" => plan::execute_plan_read(tool_call, ctx).await, + "plan_write" => plan::execute_plan_write(tool_call, ctx).await, + "plan_approve" => plan::execute_plan_approve(tool_call, ctx).await, // Miscellaneous tools "screenshot" => misc::execute_take_screenshot(tool_call, ctx).await, diff --git a/crates/g3-core/src/tools/mod.rs b/crates/g3-core/src/tools/mod.rs index 69d0dfa..fa11b96 100644 --- a/crates/g3-core/src/tools/mod.rs +++ b/crates/g3-core/src/tools/mod.rs @@ -4,7 +4,7 @@ //! Tools are organized by category: //! - `shell` - Shell command execution and background processes //! - `file_ops` - File reading, writing, and editing -//! - `todo` - TODO list management +//! - `plan` - Plan Mode for structured task planning //! - `webdriver` - Browser automation via WebDriver //! - `misc` - Other tools (screenshots, code search, etc.) //! - `research` - Web research via scout agent @@ -16,9 +16,9 @@ pub mod acd; pub mod file_ops; pub mod memory; pub mod misc; +pub mod plan; pub mod research; pub mod shell; -pub mod todo; pub mod webdriver; pub use executor::ToolExecutor; diff --git a/crates/g3-core/src/tools/plan.rs b/crates/g3-core/src/tools/plan.rs new file mode 100644 index 0000000..f788b83 --- /dev/null +++ b/crates/g3-core/src/tools/plan.rs @@ -0,0 +1,674 @@ +//! Plan Mode - Structured task planning with cognitive forcing. +//! +//! This module implements Plan Mode, which replaces the TODO system with a +//! checklist-style plan that forces reasoning about: +//! - Happy path +//! - Negative case +//! - Boundary condition +//! +//! A task is done ONLY when all plan items are satisfied with evidence. + +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::path::PathBuf; +use tracing::debug; + +use crate::paths::{ensure_session_dir, get_session_logs_dir}; +use crate::ui_writer::UiWriter; +use crate::ToolCall; + +use super::executor::ToolContext; + +// ============================================================================ +// Plan Schema +// ============================================================================ + +/// State of a plan item. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "lowercase")] +pub enum PlanState { + #[default] + Todo, + Doing, + Done, + Blocked, +} + +impl fmt::Display for PlanState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PlanState::Todo => write!(f, "todo"), + PlanState::Doing => write!(f, "doing"), + PlanState::Done => write!(f, "done"), + PlanState::Blocked => write!(f, "blocked"), + } + } +} + +impl std::str::FromStr for PlanState { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "todo" => Ok(PlanState::Todo), + "doing" => Ok(PlanState::Doing), + "done" => Ok(PlanState::Done), + "blocked" => Ok(PlanState::Blocked), + _ => Err(anyhow!("Invalid plan state: {}", s)), + } + } +} + +/// A check with description and target. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct Check { + /// Description of what this check verifies + pub desc: String, + /// Target module/function/file this check applies to + pub target: String, +} + +impl Check { + pub fn new(desc: impl Into, target: impl Into) -> Self { + Self { + desc: desc.into(), + target: target.into(), + } + } + + /// Validate that the check has required fields. + pub fn validate(&self) -> Result<()> { + if self.desc.trim().is_empty() { + return Err(anyhow!("Check description cannot be empty")); + } + if self.target.trim().is_empty() { + return Err(anyhow!("Check target cannot be empty")); + } + Ok(()) + } +} + +/// The three required checks for each plan item. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct Checks { + /// Happy path check - normal successful operation + pub happy: Check, + /// Negative case check - error handling, invalid input + pub negative: Check, + /// Boundary condition check - edge cases, limits + pub boundary: Check, +} + +impl Checks { + /// Validate all three checks. + pub fn validate(&self) -> Result<()> { + self.happy.validate().map_err(|e| anyhow!("happy check: {}", e))?; + self.negative.validate().map_err(|e| anyhow!("negative check: {}", e))?; + self.boundary.validate().map_err(|e| anyhow!("boundary check: {}", e))?; + Ok(()) + } +} + +/// A single item in the plan. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PlanItem { + /// Stable identifier (e.g., "I1", "I2") + pub id: String, + /// What will be done + pub description: String, + /// Current state + pub state: PlanState, + /// Paths/modules this affects + pub touches: Vec, + /// The three required checks + pub checks: Checks, + /// Evidence when done (file:line, test names, snippets) + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub evidence: Vec, + /// Short explanation including implementation nuances + #[serde(default, skip_serializing_if = "Option::is_none")] + pub notes: Option, +} + +impl PlanItem { + /// Create a new plan item with required fields. + pub fn new( + id: impl Into, + description: impl Into, + touches: Vec, + checks: Checks, + ) -> Self { + Self { + id: id.into(), + description: description.into(), + state: PlanState::Todo, + touches, + checks, + evidence: Vec::new(), + notes: None, + } + } + + /// Validate the plan item structure. + pub fn validate(&self) -> Result<()> { + if self.id.trim().is_empty() { + return Err(anyhow!("Item id cannot be empty")); + } + if self.description.trim().is_empty() { + return Err(anyhow!("Item description cannot be empty")); + } + if self.touches.is_empty() { + return Err(anyhow!("Item must specify at least one path/module in 'touches'")); + } + self.checks.validate().map_err(|e| anyhow!("Item '{}': {}", self.id, e))?; + + // If done, must have evidence and notes + if self.state == PlanState::Done { + if self.evidence.is_empty() { + return Err(anyhow!( + "Item '{}' is marked done but has no evidence", + self.id + )); + } + if self.notes.as_ref().map(|n| n.trim().is_empty()).unwrap_or(true) { + return Err(anyhow!( + "Item '{}' is marked done but has no notes", + self.id + )); + } + } + + Ok(()) + } + + /// Check if this item is terminal (done or blocked). + pub fn is_terminal(&self) -> bool { + matches!(self.state, PlanState::Done | PlanState::Blocked) + } +} + +/// A complete plan with metadata and items. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Plan { + /// Unique identifier for this plan + pub plan_id: String, + /// Current revision number (increments on each write) + pub revision: u32, + /// The revision that was approved (None if not yet approved) + #[serde(default, skip_serializing_if = "Option::is_none")] + pub approved_revision: Option, + /// The plan items + pub items: Vec, +} + +impl Plan { + /// Create a new plan with the given ID. + pub fn new(plan_id: impl Into) -> Self { + Self { + plan_id: plan_id.into(), + revision: 1, + approved_revision: None, + items: Vec::new(), + } + } + + /// Check if the plan has been approved. + pub fn is_approved(&self) -> bool { + self.approved_revision.is_some() + } + + /// Approve the current revision. + pub fn approve(&mut self) { + self.approved_revision = Some(self.revision); + } + + /// Check if all items are terminal (done or blocked). + pub fn is_complete(&self) -> bool { + !self.items.is_empty() && self.items.iter().all(|item| item.is_terminal()) + } + + /// Validate the entire plan structure. + pub fn validate(&self) -> Result<()> { + if self.plan_id.trim().is_empty() { + return Err(anyhow!("Plan ID cannot be empty")); + } + + if self.items.is_empty() { + return Err(anyhow!("Plan must have at least one item")); + } + + if self.items.len() > 7 { + // Warn but don't fail - this is a guideline + debug!("Plan has {} items (recommended max is 7)", self.items.len()); + } + + // Check for duplicate IDs + let mut seen_ids = std::collections::HashSet::new(); + for item in &self.items { + if !seen_ids.insert(&item.id) { + return Err(anyhow!("Duplicate item ID: {}", item.id)); + } + item.validate()?; + } + + Ok(()) + } + + /// Get a summary of the plan status. + pub fn status_summary(&self) -> String { + let total = self.items.len(); + let done = self.items.iter().filter(|i| i.state == PlanState::Done).count(); + let doing = self.items.iter().filter(|i| i.state == PlanState::Doing).count(); + let blocked = self.items.iter().filter(|i| i.state == PlanState::Blocked).count(); + let todo = self.items.iter().filter(|i| i.state == PlanState::Todo).count(); + + let approved_str = if let Some(rev) = self.approved_revision { + format!(" (approved at rev {})", rev) + } else { + " (NOT APPROVED)".to_string() + }; + + format!( + "Plan '{}' rev {}{}: {}/{} done, {} doing, {} blocked, {} todo", + self.plan_id, self.revision, approved_str, done, total, doing, blocked, todo + ) + } +} + +// ============================================================================ +// Plan Storage +// ============================================================================ + +/// Get the path to the plan.g3.md file for a session. +pub fn get_plan_path(session_id: &str) -> PathBuf { + get_session_logs_dir(session_id).join("plan.g3.md") +} + +/// Read a plan from the session's plan.g3.md file. +pub fn read_plan(session_id: &str) -> Result> { + let path = get_plan_path(session_id); + if !path.exists() { + return Ok(None); + } + + let content = std::fs::read_to_string(&path)?; + + // Extract YAML from markdown code block + let yaml_content = extract_yaml_from_markdown(&content)?; + + let plan: Plan = serde_yaml::from_str(&yaml_content)?; + Ok(Some(plan)) +} + +/// Write a plan to the session's plan.g3.md file. +pub fn write_plan(session_id: &str, plan: &Plan) -> Result<()> { + // Validate before writing + plan.validate()?; + + let _ = ensure_session_dir(session_id)?; + let path = get_plan_path(session_id); + + // Format as markdown with YAML code block + let content = format_plan_as_markdown(plan); + + std::fs::write(&path, content)?; + Ok(()) +} + +/// Extract YAML content from a markdown file with ```yaml code block. +fn extract_yaml_from_markdown(content: &str) -> Result { + // Look for ```yaml ... ``` block + let start_marker = "```yaml"; + let end_marker = "```"; + + if let Some(start_idx) = content.find(start_marker) { + let yaml_start = start_idx + start_marker.len(); + if let Some(end_idx) = content[yaml_start..].find(end_marker) { + let yaml = content[yaml_start..yaml_start + end_idx].trim(); + return Ok(yaml.to_string()); + } + } + + // If no code block, try parsing the whole content as YAML + Ok(content.to_string()) +} + +/// Format a plan as markdown with embedded YAML. +fn format_plan_as_markdown(plan: &Plan) -> String { + let yaml = serde_yaml::to_string(plan).unwrap_or_else(|_| "# Error serializing plan".to_string()); + + let mut md = String::new(); + md.push_str(&format!("# Plan: {}\n\n", plan.plan_id)); + md.push_str(&format!("**Status**: {}\n\n", plan.status_summary())); + md.push_str("## Plan Data\n\n"); + md.push_str("```yaml\n"); + md.push_str(&yaml); + md.push_str("```\n"); + + md +} + +// ============================================================================ +// Plan Verification +// ============================================================================ + +/// Verify a completed plan. Called by the agent loop when all items are done/blocked. +/// +/// This is a placeholder that prints the plan contents. +/// In the future, this could perform additional validation. +pub fn plan_verify(plan: &Plan) { + println!("\n{}", "=".repeat(60)); + println!("PLAN VERIFY CALLED"); + println!("{}", "=".repeat(60)); + println!("Plan ID: {}", plan.plan_id); + println!("Revision: {}", plan.revision); + println!("Approved: {:?}", plan.approved_revision); + println!("Status: {}", plan.status_summary()); + println!(); + + for item in &plan.items { + println!("[{}] {} - {}", item.id, item.state, item.description); + println!(" Touches: {:?}", item.touches); + println!(" Checks:"); + println!(" Happy: {} -> {}", item.checks.happy.desc, item.checks.happy.target); + println!(" Negative: {} -> {}", item.checks.negative.desc, item.checks.negative.target); + println!(" Boundary: {} -> {}", item.checks.boundary.desc, item.checks.boundary.target); + if !item.evidence.is_empty() { + println!(" Evidence:"); + for e in &item.evidence { + println!(" - {}", e); + } + } + if let Some(notes) = &item.notes { + println!(" Notes: {}", notes); + } + println!(); + } + println!("{}\n", "=".repeat(60)); +} + +// ============================================================================ +// Tool Implementations +// ============================================================================ + +/// Execute the `plan_read` tool. +pub async fn execute_plan_read( + _tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + debug!("Processing plan_read tool call"); + + let session_id = match ctx.session_id { + Some(id) => id, + None => return Ok("❌ No active session - plans are session-scoped.".to_string()), + }; + + match read_plan(session_id)? { + Some(plan) => { + let yaml = serde_yaml::to_string(&plan)?; + Ok(format!( + "📋 {}\n\n```yaml\n{}```", + plan.status_summary(), + yaml + )) + } + None => Ok("📋 No plan exists for this session. Use plan_write to create one.".to_string()), + } +} + +/// Execute the `plan_write` tool. +pub async fn execute_plan_write( + tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + debug!("Processing plan_write tool call"); + + let session_id = match ctx.session_id { + Some(id) => id, + None => return Ok("❌ No active session - plans are session-scoped.".to_string()), + }; + + // Get the plan content from args + let plan_yaml = match tool_call.args.get("plan").and_then(|v| v.as_str()) { + Some(p) => p, + None => return Ok("❌ Missing 'plan' argument. Provide the plan as YAML.".to_string()), + }; + + // Parse the YAML + let mut plan: Plan = match serde_yaml::from_str(plan_yaml) { + Ok(p) => p, + Err(e) => return Ok(format!("❌ Invalid plan YAML: {}", e)), + }; + + // Load existing plan to preserve approved_revision and increment revision + if let Some(existing) = read_plan(session_id)? { + // Preserve approved_revision from existing plan + plan.approved_revision = existing.approved_revision; + // Increment revision + plan.revision = existing.revision + 1; + + // If plan was approved, ensure checks are not removed + if existing.is_approved() { + // Verify all existing item IDs still exist + for existing_item in &existing.items { + if !plan.items.iter().any(|i| i.id == existing_item.id) { + return Ok(format!( + "❌ Cannot remove item '{}' from approved plan. Items can only be marked blocked, not removed.", + existing_item.id + )); + } + } + } + } + + // Validate the plan + if let Err(e) = plan.validate() { + return Ok(format!("❌ Plan validation failed: {}", e)); + } + + // Write the plan + if let Err(e) = write_plan(session_id, &plan) { + return Ok(format!("❌ Failed to write plan: {}", e)); + } + + // Check if plan is now complete and trigger verification + if plan.is_complete() && plan.is_approved() { + plan_verify(&plan); + } + + Ok(format!("✅ Plan updated: {}", plan.status_summary())) +} + +/// Execute the `plan_approve` tool. +pub async fn execute_plan_approve( + _tool_call: &ToolCall, + ctx: &mut ToolContext<'_, W>, +) -> Result { + debug!("Processing plan_approve tool call"); + + let session_id = match ctx.session_id { + Some(id) => id, + None => return Ok("❌ No active session - plans are session-scoped.".to_string()), + }; + + // Load existing plan + let mut plan = match read_plan(session_id)? { + Some(p) => p, + None => return Ok("❌ No plan exists to approve. Use plan_write first.".to_string()), + }; + + if plan.is_approved() { + return Ok(format!( + "ℹ️ Plan already approved at revision {}. Current revision: {}", + plan.approved_revision.unwrap(), + plan.revision + )); + } + + // Approve the plan + plan.approve(); + + // Write back + if let Err(e) = write_plan(session_id, &plan) { + return Ok(format!("❌ Failed to save approved plan: {}", e)); + } + + Ok(format!( + "✅ Plan approved at revision {}. You may now begin implementation.", + plan.revision + )) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn make_test_check() -> Check { + Check::new("Test description", "test::target") + } + + fn make_test_checks() -> Checks { + Checks { + happy: make_test_check(), + negative: make_test_check(), + boundary: make_test_check(), + } + } + + fn make_test_item(id: &str) -> PlanItem { + PlanItem::new( + id, + "Test item description", + vec!["src/test.rs".to_string()], + make_test_checks(), + ) + } + + #[test] + fn test_plan_state_display() { + assert_eq!(PlanState::Todo.to_string(), "todo"); + assert_eq!(PlanState::Doing.to_string(), "doing"); + assert_eq!(PlanState::Done.to_string(), "done"); + assert_eq!(PlanState::Blocked.to_string(), "blocked"); + } + + #[test] + fn test_plan_state_from_str() { + assert_eq!("todo".parse::().unwrap(), PlanState::Todo); + assert_eq!("DOING".parse::().unwrap(), PlanState::Doing); + assert_eq!("Done".parse::().unwrap(), PlanState::Done); + assert!("invalid".parse::().is_err()); + } + + #[test] + fn test_check_validation() { + let valid = Check::new("desc", "target"); + assert!(valid.validate().is_ok()); + + let empty_desc = Check::new("", "target"); + assert!(empty_desc.validate().is_err()); + + let empty_target = Check::new("desc", ""); + assert!(empty_target.validate().is_err()); + } + + #[test] + fn test_plan_item_validation() { + let item = make_test_item("I1"); + assert!(item.validate().is_ok()); + + // Done item without evidence should fail + let mut done_item = make_test_item("I2"); + done_item.state = PlanState::Done; + assert!(done_item.validate().is_err()); + + // Done item with evidence but no notes should fail + done_item.evidence = vec!["src/test.rs:42".to_string()]; + assert!(done_item.validate().is_err()); + + // Done item with evidence and notes should pass + done_item.notes = Some("Implementation notes".to_string()); + assert!(done_item.validate().is_ok()); + } + + #[test] + fn test_plan_validation() { + let mut plan = Plan::new("test-plan"); + + // Empty plan should fail + assert!(plan.validate().is_err()); + + // Plan with item should pass + plan.items.push(make_test_item("I1")); + assert!(plan.validate().is_ok()); + + // Duplicate IDs should fail + plan.items.push(make_test_item("I1")); + assert!(plan.validate().is_err()); + } + + #[test] + fn test_plan_is_complete() { + let mut plan = Plan::new("test"); + plan.items.push(make_test_item("I1")); + plan.items.push(make_test_item("I2")); + + assert!(!plan.is_complete()); + + plan.items[0].state = PlanState::Done; + plan.items[0].evidence = vec!["test".to_string()]; + plan.items[0].notes = Some("notes".to_string()); + assert!(!plan.is_complete()); + + plan.items[1].state = PlanState::Blocked; + assert!(plan.is_complete()); + } + + #[test] + fn test_plan_approval() { + let mut plan = Plan::new("test"); + plan.items.push(make_test_item("I1")); + + assert!(!plan.is_approved()); + assert_eq!(plan.approved_revision, None); + + plan.approve(); + assert!(plan.is_approved()); + assert_eq!(plan.approved_revision, Some(1)); + } + + #[test] + fn test_yaml_extraction() { + let md = r#"# Plan: test + +**Status**: ... + +## Plan Data + +```yaml +plan_id: test +revision: 1 +items: [] +``` +"#; + + let yaml = extract_yaml_from_markdown(md).unwrap(); + assert!(yaml.contains("plan_id: test")); + } + + #[test] + fn test_plan_serialization_roundtrip() { + let mut plan = Plan::new("test-plan"); + plan.items.push(make_test_item("I1")); + plan.approve(); + + let yaml = serde_yaml::to_string(&plan).unwrap(); + let parsed: Plan = serde_yaml::from_str(&yaml).unwrap(); + + assert_eq!(parsed.plan_id, plan.plan_id); + assert_eq!(parsed.revision, plan.revision); + assert_eq!(parsed.approved_revision, plan.approved_revision); + assert_eq!(parsed.items.len(), plan.items.len()); + } +} diff --git a/crates/g3-core/src/tools/todo.rs b/crates/g3-core/src/tools/todo.rs deleted file mode 100644 index e65ff83..0000000 --- a/crates/g3-core/src/tools/todo.rs +++ /dev/null @@ -1,187 +0,0 @@ -//! TODO list management tools. - -use anyhow::Result; -use std::io::Write; -use tracing::debug; - -use crate::ui_writer::UiWriter; -use crate::ToolCall; - -use super::executor::ToolContext; - -/// Execute the `todo_read` tool. -pub async fn execute_todo_read( - tool_call: &ToolCall, - ctx: &mut ToolContext<'_, W>, -) -> Result { - debug!("Processing todo_read tool call"); - let _ = tool_call; // unused but kept for consistency - - let todo_path = ctx.get_todo_path(); - - if !todo_path.exists() { - // Also update in-memory content to stay in sync - let mut todo = ctx.todo_content.write().await; - *todo = String::new(); - ctx.ui_writer.print_todo_compact(None, false); - return Ok("📝 TODO list is empty (no todo.g3.md file found)".to_string()); - } - - match std::fs::read_to_string(&todo_path) { - Ok(content) => { - // Update in-memory content to stay in sync - let mut todo = ctx.todo_content.write().await; - *todo = content.clone(); - - // Check for staleness if enabled and we have a requirements SHA - if ctx.config.agent.check_todo_staleness { - if let Some(req_sha) = ctx.requirements_sha { - if let Some(staleness_result) = check_todo_staleness(&content, req_sha, ctx.ui_writer) { - return Ok(staleness_result); - } - } - } - - if content.trim().is_empty() { - ctx.ui_writer.print_todo_compact(None, false); - Ok("📝 TODO list is empty".to_string()) - } else { - ctx.ui_writer.print_todo_compact(Some(&content), false); - Ok(format!("📝 TODO list:\n{}", content)) - } - } - Err(e) => Ok(format!("❌ Failed to read TODO.md: {}", e)), - } -} - -/// Execute the `todo_write` tool. -pub async fn execute_todo_write( - tool_call: &ToolCall, - ctx: &mut ToolContext<'_, W>, -) -> Result { - debug!("Processing todo_write tool call"); - - let content_str = match tool_call.args.get("content").and_then(|v| v.as_str()) { - Some(c) => c, - None => return Ok("❌ Missing content argument".to_string()), - }; - - let char_count = content_str.chars().count(); - let max_chars = std::env::var("G3_TODO_MAX_CHARS") - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or(50_000); - - if max_chars > 0 && char_count > max_chars { - return Ok(format!( - "❌ TODO list too large: {} chars (max: {})", - char_count, max_chars - )); - } - - // Check if all todos are completed (all checkboxes are checked) - let has_incomplete = content_str - .lines() - .any(|line| line.trim().starts_with("- [ ]")); - - // If all todos are complete, delete the file instead of writing - // EXCEPT in planner mode (G3_TODO_PATH is set) - preserve for rename to completed_todo_*.md - let in_planner_mode = std::env::var("G3_TODO_PATH").is_ok(); - let todo_path = ctx.get_todo_path(); - - if !in_planner_mode - && !has_incomplete - && (content_str.contains("- [x]") || content_str.contains("- [X]")) - && todo_path.exists() - { - match std::fs::remove_file(&todo_path) { - Ok(_) => { - let mut todo = ctx.todo_content.write().await; - *todo = String::new(); - // Show the final completed TODOs - ctx.ui_writer.print_todo_compact(Some(content_str), true); - let mut result = String::from("✅ All TODOs completed! Removed todo.g3.md\n\nFinal status:\n"); - result.push_str(content_str); - return Ok(result); - } - Err(e) => return Ok(format!("❌ Failed to remove todo.g3.md: {}", e)), - } - } - - match std::fs::write(&todo_path, content_str) { - Ok(_) => { - // Also update in-memory content to stay in sync - let mut todo = ctx.todo_content.write().await; - *todo = content_str.to_string(); - ctx.ui_writer.print_todo_compact(Some(content_str), true); - Ok(format!( - "✅ TODO list updated ({} chars) and saved to todo.g3.md:\n{}", - char_count, content_str - )) - } - Err(e) => Ok(format!("❌ Failed to write todo.g3.md: {}", e)), - } -} - -/// Check if the TODO list is stale (generated from a different requirements file). -/// Returns Some(message) if staleness was detected and handled, None otherwise. -fn check_todo_staleness( - content: &str, - req_sha: &str, - ui_writer: &W, -) -> Option { - // Parse the first line for the SHA header - let first_line = content.lines().next()?; - - if !first_line.starts_with("{{Based on the requirements file with SHA256:") { - return None; - } - - let parts: Vec<&str> = first_line.split("SHA256:").collect(); - if parts.len() <= 1 { - return None; - } - - let todo_sha = parts[1].trim().trim_end_matches("}}").trim(); - if todo_sha == req_sha { - return None; - } - - let warning = format!( - "⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA: {}", - req_sha, todo_sha - ); - ui_writer.print_context_status(&warning); - - // Beep 6 times - print!("\x07\x07\x07\x07\x07\x07"); - let _ = std::io::stdout().flush(); - - let options = [ - "Ignore and Continue", - "Mark as Stale", - "Quit Application", - ]; - let choice = ui_writer.prompt_user_choice( - "Requirements have changed! What would you like to do?", - &options, - ); - - match choice { - 0 => { - // Ignore and Continue - ui_writer.print_context_status("⚠️ Ignoring staleness warning."); - None - } - 1 => { - // Mark as Stale - Some("⚠️ TODO list is stale (requirements changed). Please regenerate the TODO list to match the new requirements.".to_string()) - } - 2 => { - // Quit Application - ui_writer.print_context_status("❌ Quitting application as requested."); - std::process::exit(0); - } - _ => None, - } -} diff --git a/crates/g3-core/tests/stream_completion_characterization_test.rs b/crates/g3-core/tests/stream_completion_characterization_test.rs index 33be9b6..15192d3 100644 --- a/crates/g3-core/tests/stream_completion_characterization_test.rs +++ b/crates/g3-core/tests/stream_completion_characterization_test.rs @@ -589,36 +589,56 @@ mod tool_execution_integration { ); } - /// CHARACTERIZATION: TODO tools work through agent + /// CHARACTERIZATION: Plan tools work through agent #[tokio::test] #[serial] - async fn todo_tools_work() { + async fn plan_tools_work() { let temp_dir = TempDir::new().unwrap(); let mut agent = create_test_agent(&temp_dir).await; - // Write TODO + // Initialize session ID for plan tools (they are session-scoped) + agent.init_session_id_for_test("plan-tools-test"); + + // Write Plan let write_call = ToolCall { - tool: "todo_write".to_string(), + tool: "plan_write".to_string(), args: serde_json::json!({ - "content": "- [ ] Test task\n- [x] Done task" + "plan": r#"plan_id: test-plan +revision: 1 +items: + - id: I1 + description: Test task + state: todo + touches: + - src/test.rs + checks: + happy: + desc: Works correctly + target: test::module + negative: + desc: Handles errors + target: test::module + boundary: + desc: Edge cases + target: test::module"# }), }; let write_result = agent.execute_tool(&write_call).await.unwrap(); assert!( write_result.contains("✅"), - "Write should succeed: {}", + "Plan write should succeed: {}", write_result ); - // Read TODO + // Read Plan let read_call = ToolCall { - tool: "todo_read".to_string(), + tool: "plan_read".to_string(), args: serde_json::json!({}), }; let read_result = agent.execute_tool(&read_call).await.unwrap(); assert!( - read_result.contains("Test task"), - "Should read back TODO: {}", + read_result.contains("test-plan"), + "Should read back plan: {}", read_result ); } diff --git a/crates/g3-core/tests/test_todo_persistence.rs b/crates/g3-core/tests/test_todo_persistence.rs deleted file mode 100644 index bca8d6b..0000000 --- a/crates/g3-core/tests/test_todo_persistence.rs +++ /dev/null @@ -1,388 +0,0 @@ -use g3_core::ui_writer::NullUiWriter; -use g3_core::Agent; -use serial_test::serial; -use std::fs; -use std::path::PathBuf; -use tempfile::TempDir; - -/// Helper to create a test agent in a temporary directory -async fn create_test_agent_in_dir(temp_dir: &TempDir) -> Agent { - // Change to temp directory - std::env::set_current_dir(temp_dir.path()).unwrap(); - - // Create a minimal config - let config = g3_config::Config::default(); - let ui_writer = NullUiWriter; - - Agent::new(config, ui_writer).await.unwrap() -} - -/// Helper to get todo.g3.md path in temp directory -fn get_todo_path(temp_dir: &TempDir) -> PathBuf { - temp_dir.path().join("todo.g3.md") -} - -#[tokio::test] -#[serial] -async fn test_todo_write_creates_file() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - let todo_path = get_todo_path(&temp_dir); - - // Initially, todo.g3.md should not exist - assert!(!todo_path.exists(), "todo.g3.md should not exist initially"); - - // Create a tool call to write TODO - let tool_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3" - }), - }; - - // Execute the tool - let result = agent.execute_tool(&tool_call).await.unwrap(); - - // Should report success - assert!(result.contains("✅"), "Should report success: {}", result); - assert!( - result.contains("todo.g3.md"), - "Should mention todo.g3.md: {}", - result - ); - - // File should now exist - assert!(todo_path.exists(), "todo.g3.md should exist after write"); - - // File should contain the correct content - let content = fs::read_to_string(&todo_path).unwrap(); - assert_eq!(content, "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3"); -} - -#[tokio::test] -#[serial] -async fn test_todo_read_from_file() { - let temp_dir = TempDir::new().unwrap(); - let todo_path = get_todo_path(&temp_dir); - - // Pre-create a todo.g3.md file - let test_content = "# My TODO\n\n- [ ] First task\n- [x] Completed task"; - fs::write(&todo_path, test_content).unwrap(); - - // Create agent (should load from file) - let mut agent = create_test_agent_in_dir(&temp_dir).await; - - // Create a tool call to read TODO - let tool_call = g3_core::ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - - // Execute the tool - let result = agent.execute_tool(&tool_call).await.unwrap(); - - // Should contain the TODO content - assert!( - result.contains("📝 TODO list:"), - "Should have TODO list header: {}", - result - ); - assert!( - result.contains("First task"), - "Should contain first task: {}", - result - ); - assert!( - result.contains("Completed task"), - "Should contain completed task: {}", - result - ); -} - -#[tokio::test] -#[serial] -async fn test_todo_read_empty_file() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - - // Create a tool call to read TODO (file doesn't exist) - let tool_call = g3_core::ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - - // Execute the tool - let result = agent.execute_tool(&tool_call).await.unwrap(); - - // Should report empty - assert!(result.contains("empty"), "Should report empty: {}", result); -} - -#[tokio::test] -#[serial] -async fn test_todo_persistence_across_agents() { - let temp_dir = TempDir::new().unwrap(); - let todo_path = get_todo_path(&temp_dir); - - // Agent 1: Write TODO - { - let mut agent = create_test_agent_in_dir(&temp_dir).await; - let tool_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": "- [ ] Persistent task\n- [x] Done task" - }), - }; - agent.execute_tool(&tool_call).await.unwrap(); - } - - // Verify file exists - assert!( - todo_path.exists(), - "todo.g3.md should persist after agent drops" - ); - - // Agent 2: Read TODO (new agent instance) - { - let mut agent = create_test_agent_in_dir(&temp_dir).await; - let tool_call = g3_core::ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - let result = agent.execute_tool(&tool_call).await.unwrap(); - - // Should read the persisted content - assert!( - result.contains("Persistent task"), - "Should read persisted task: {}", - result - ); - assert!( - result.contains("Done task"), - "Should read done task: {}", - result - ); - } -} - -#[tokio::test] -#[serial] -async fn test_todo_update_preserves_file() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - let todo_path = get_todo_path(&temp_dir); - - // Write initial TODO - let write_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": "- [ ] Task 1\n- [ ] Task 2" - }), - }; - agent.execute_tool(&write_call).await.unwrap(); - - // Update TODO - let update_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3" - }), - }; - agent.execute_tool(&update_call).await.unwrap(); - - // Verify file has updated content - let content = fs::read_to_string(&todo_path).unwrap(); - assert_eq!(content, "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3"); -} - -#[tokio::test] -#[serial] -async fn test_todo_handles_large_content() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - let todo_path = get_todo_path(&temp_dir); - - // Create a large TODO (but under the 50k limit) - let mut large_content = String::from("# Large TODO\n\n"); - for i in 0..100 { - large_content.push_str(&format!( - "- [ ] Task {} with a long description that exceeds normal line lengths\n", - i - )); - } - - let tool_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": large_content - }), - }; - - let result = agent.execute_tool(&tool_call).await.unwrap(); - assert!( - result.contains("✅"), - "Should handle large content: {}", - result - ); - - // Verify file contains all content - let file_content = fs::read_to_string(&todo_path).unwrap(); - assert_eq!(file_content, large_content); - assert!(file_content.contains("Task 99"), "Should contain all tasks"); -} - -#[tokio::test] -#[serial] -async fn test_todo_respects_size_limit() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - - // Create content that exceeds the default 50k limit - let huge_content = "x".repeat(60_000); - - let tool_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": huge_content - }), - }; - - let result = agent.execute_tool(&tool_call).await.unwrap(); - - // Should reject content that's too large - assert!( - result.contains("❌"), - "Should reject oversized content: {}", - result - ); - assert!( - result.contains("too large"), - "Should mention size limit: {}", - result - ); -} - -#[tokio::test] -#[serial] -async fn test_todo_agent_initialization_loads_file() { - let temp_dir = TempDir::new().unwrap(); - let todo_path = get_todo_path(&temp_dir); - - // Pre-create todo.g3.md before agent initialization - let initial_content = "- [ ] Pre-existing task"; - fs::write(&todo_path, initial_content).unwrap(); - - // Create agent - should load the file during initialization - let mut agent = create_test_agent_in_dir(&temp_dir).await; - - // Read TODO - should return the pre-existing content - let tool_call = g3_core::ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - - let result = agent.execute_tool(&tool_call).await.unwrap(); - assert!( - result.contains("Pre-existing task"), - "Should load file on init: {}", - result - ); -} - -#[tokio::test] -#[serial] -async fn test_todo_handles_unicode_content() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - let todo_path = get_todo_path(&temp_dir); - - // Create TODO with unicode characters - let unicode_content = "- [ ] 日本語タスク\n- [ ] Émoji task 🚀\n- [x] Ελληνικά task"; - - let tool_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": unicode_content - }), - }; - - agent.execute_tool(&tool_call).await.unwrap(); - - // Verify file preserves unicode - let file_content = fs::read_to_string(&todo_path).unwrap(); - assert_eq!(file_content, unicode_content); - - // Verify reading back works - let read_call = g3_core::ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - - let result = agent.execute_tool(&read_call).await.unwrap(); - assert!( - result.contains("日本語"), - "Should preserve Japanese: {}", - result - ); - assert!(result.contains("🚀"), "Should preserve emoji: {}", result); - assert!( - result.contains("Ελληνικά"), - "Should preserve Greek: {}", - result - ); -} - -#[tokio::test] -#[serial] -async fn test_todo_empty_content_creates_empty_file() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - let todo_path = get_todo_path(&temp_dir); - - // Write empty TODO - let tool_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": "" - }), - }; - - agent.execute_tool(&tool_call).await.unwrap(); - - // File should exist but be empty - assert!(todo_path.exists(), "Empty todo.g3.md should create file"); - let content = fs::read_to_string(&todo_path).unwrap(); - assert_eq!(content, ""); -} - -#[tokio::test] -#[serial] -async fn test_todo_whitespace_only_content() { - let temp_dir = TempDir::new().unwrap(); - let mut agent = create_test_agent_in_dir(&temp_dir).await; - - // Write whitespace-only TODO - let tool_call = g3_core::ToolCall { - tool: "todo_write".to_string(), - args: serde_json::json!({ - "content": " \n\n \t \n" - }), - }; - - agent.execute_tool(&tool_call).await.unwrap(); - - // Read it back - let read_call = g3_core::ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - - let result = agent.execute_tool(&read_call).await.unwrap(); - - // Should report as empty (whitespace is trimmed) - assert!( - result.contains("empty"), - "Whitespace-only should be empty: {}", - result - ); -} diff --git a/crates/g3-core/tests/todo_staleness_test.rs b/crates/g3-core/tests/todo_staleness_test.rs deleted file mode 100644 index cbaf714..0000000 --- a/crates/g3-core/tests/todo_staleness_test.rs +++ /dev/null @@ -1,223 +0,0 @@ -use g3_config::Config; -use g3_core::ui_writer::UiWriter; -use g3_core::{Agent, ToolCall}; -use serial_test::serial; -use std::sync::{Arc, Mutex}; -use tempfile::TempDir; - -// Mock UI Writer for testing -#[derive(Clone)] -struct MockUiWriter { - output: Arc>>, - prompt_responses: Arc>>, - choice_responses: Arc>>, -} - -impl MockUiWriter { - fn new() -> Self { - Self { - output: Arc::new(Mutex::new(Vec::new())), - prompt_responses: Arc::new(Mutex::new(Vec::new())), - choice_responses: Arc::new(Mutex::new(Vec::new())), - } - } - - #[allow(dead_code)] - fn set_prompt_response(&self, response: bool) { - self.prompt_responses.lock().unwrap().push(response); - } - - #[allow(dead_code)] - fn set_choice_response(&self, response: usize) { - self.choice_responses.lock().unwrap().push(response); - } - - #[allow(dead_code)] - fn get_output(&self) -> Vec { - self.output.lock().unwrap().clone() - } -} - -impl UiWriter for MockUiWriter { - fn print(&self, message: &str) { - self.output.lock().unwrap().push(message.to_string()); - } - fn println(&self, message: &str) { - self.output.lock().unwrap().push(message.to_string()); - } - fn print_inline(&self, message: &str) { - self.output.lock().unwrap().push(message.to_string()); - } - fn print_system_prompt(&self, _prompt: &str) {} - fn print_context_status(&self, message: &str) { - self.output - .lock() - .unwrap() - .push(format!("STATUS: {}", message)); - } - fn print_g3_progress(&self, _message: &str) {} - fn print_g3_status(&self, _message: &str, _status: &str) {} - fn print_thin_result(&self, _result: &g3_core::ThinResult) {} - fn print_tool_header(&self, _tool_name: &str, _tool_args: Option<&serde_json::Value>) {} - fn print_tool_arg(&self, _key: &str, _value: &str) {} - fn print_tool_output_header(&self) {} - fn update_tool_output_line(&self, _line: &str) {} - fn print_tool_output_line(&self, _line: &str) {} - fn print_tool_output_summary(&self, _hidden_count: usize) {} - fn print_tool_timing(&self, _duration_str: &str, _tokens_delta: u32, _context_percentage: f32) {} - fn print_agent_prompt(&self) {} - fn print_agent_response(&self, _content: &str) {} - fn notify_sse_received(&self) {} - fn flush(&self) {} - fn wants_full_output(&self) -> bool { - false - } - fn prompt_user_yes_no(&self, message: &str) -> bool { - self.output - .lock() - .unwrap() - .push(format!("PROMPT: {}", message)); - self.prompt_responses.lock().unwrap().pop().unwrap_or(true) - } - fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize { - self.output - .lock() - .unwrap() - .push(format!("CHOICE: {} Options: {:?}", message, options)); - self.choice_responses.lock().unwrap().pop().unwrap_or(0) - } - fn print_tool_streaming_hint(&self, _tool_name: &str) {} - fn print_tool_streaming_active(&self) {} -} - -#[tokio::test] -#[serial] -async fn test_todo_staleness_check_matching_sha() { - let temp_dir = TempDir::new().unwrap(); - let todo_path = temp_dir.path().join("todo.g3.md"); - std::env::set_current_dir(&temp_dir).unwrap(); - - let sha = "abc123hash"; - let content = format!( - "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", - sha - ); - std::fs::write(&todo_path, content).unwrap(); - - let mut config = Config::default(); - config.agent.check_todo_staleness = true; - - let ui_writer = MockUiWriter::new(); - let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); - agent.set_requirements_sha(sha.to_string()); - - let tool_call = ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - let result = agent.execute_tool(&tool_call).await.unwrap(); - - assert!(result.contains("📝 TODO list:")); - assert!(!result.contains("⚠️ TODO list is stale")); -} - -#[tokio::test] -#[serial] -async fn test_todo_staleness_check_mismatch_sha_ignore() { - let temp_dir = TempDir::new().unwrap(); - let todo_path = temp_dir.path().join("todo.g3.md"); - std::env::set_current_dir(&temp_dir).unwrap(); - - let sha_file = "old_sha"; - let sha_req = "new_sha"; - let content = format!( - "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", - sha_file - ); - std::fs::write(&todo_path, content).unwrap(); - - let mut config = Config::default(); - config.agent.check_todo_staleness = true; - - let ui_writer = MockUiWriter::new(); - ui_writer.set_choice_response(0); // Ignore - - let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); - agent.set_requirements_sha(sha_req.to_string()); - - let tool_call = ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - let result = agent.execute_tool(&tool_call).await.unwrap(); - - assert!(result.contains("📝 TODO list:")); -} - -#[tokio::test] -#[serial] -async fn test_todo_staleness_check_mismatch_sha_mark_stale() { - let temp_dir = TempDir::new().unwrap(); - let todo_path = temp_dir.path().join("todo.g3.md"); - std::env::set_current_dir(&temp_dir).unwrap(); - - let sha_file = "old_sha"; - let sha_req = "new_sha"; - let content = format!( - "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", - sha_file - ); - std::fs::write(&todo_path, content).unwrap(); - - let mut config = Config::default(); - config.agent.check_todo_staleness = true; - - let ui_writer = MockUiWriter::new(); - ui_writer.set_choice_response(1); // Mark as Stale - - let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); - agent.set_requirements_sha(sha_req.to_string()); - - let tool_call = ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - let result = agent.execute_tool(&tool_call).await.unwrap(); - - assert!(result.contains("⚠️ TODO list is stale")); - assert!(result.contains("Please regenerate")); -} - -// Note: We cannot easily test "Quit" (index 2) because it calls std::process::exit(0) -// which would kill the test runner. We skip that test case here. - -#[tokio::test] -#[serial] -async fn test_todo_staleness_check_disabled() { - let temp_dir = TempDir::new().unwrap(); - let todo_path = temp_dir.path().join("todo.g3.md"); - std::env::set_current_dir(&temp_dir).unwrap(); - - let sha_file = "old_sha"; - let sha_req = "new_sha"; - let content = format!( - "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1", - sha_file - ); - std::fs::write(&todo_path, content).unwrap(); - - let mut config = Config::default(); - config.agent.check_todo_staleness = false; - - let ui_writer = MockUiWriter::new(); - let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap(); - agent.set_requirements_sha(sha_req.to_string()); - - let tool_call = ToolCall { - tool: "todo_read".to_string(), - args: serde_json::json!({}), - }; - let result = agent.execute_tool(&tool_call).await.unwrap(); - - assert!(result.contains("📝 TODO list:")); -} diff --git a/crates/g3-core/tests/tool_execution_roundtrip_test.rs b/crates/g3-core/tests/tool_execution_roundtrip_test.rs index c4cf30a..3bfeac4 100644 --- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs +++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs @@ -393,21 +393,38 @@ mod str_replace_execution { // Test: TODO tool execution // ============================================================================= -mod todo_execution { +mod plan_execution { use super::*; - /// Test writing and reading TODO + /// Test writing and reading Plan #[tokio::test] #[serial] - async fn test_todo_write_and_read() { + async fn test_plan_write_and_read() { let temp_dir = TempDir::new().unwrap(); let mut agent = create_test_agent(&temp_dir).await; + agent.init_session_id_for_test("plan-test"); - // Write TODO + // Write Plan let write_call = make_tool_call( - "todo_write", + "plan_write", serde_json::json!({ - "content": "- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3" + "plan": r#"plan_id: test-plan +revision: 1 +items: + - id: I1 + description: Task 1 + state: todo + touches: ["src/test.rs"] + checks: + happy: + desc: Works + target: test + negative: + desc: Errors + target: test + boundary: + desc: Edge + target: test"# }), ); @@ -415,52 +432,61 @@ mod todo_execution { assert!(write_result.contains("✅") || write_result.to_lowercase().contains("success"), "Write should succeed: {}", write_result); - // Read TODO - let read_call = make_tool_call("todo_read", serde_json::json!({})); + // Read Plan + let read_call = make_tool_call("plan_read", serde_json::json!({})); let read_result = agent.execute_tool(&read_call).await.unwrap(); + assert!(read_result.contains("test-plan"), "Should contain plan id: {}", read_result); assert!(read_result.contains("Task 1"), "Should contain Task 1: {}", read_result); - assert!(read_result.contains("Task 2"), "Should contain Task 2: {}", read_result); - assert!(read_result.contains("Task 3"), "Should contain Task 3: {}", read_result); } - /// Test reading empty TODO + /// Test reading empty Plan #[tokio::test] #[serial] - async fn test_todo_read_empty() { + async fn test_plan_read_empty() { let temp_dir = TempDir::new().unwrap(); let mut agent = create_test_agent(&temp_dir).await; + agent.init_session_id_for_test("plan-empty-test"); - let read_call = make_tool_call("todo_read", serde_json::json!({})); + let read_call = make_tool_call("plan_read", serde_json::json!({})); let result = agent.execute_tool(&read_call).await.unwrap(); - assert!(result.to_lowercase().contains("empty") || result.contains("no todo"), + assert!(result.contains("No plan") || result.to_lowercase().contains("no plan"), "Should indicate empty: {}", result); } - /// Test TODO persists to file + /// Test Plan approval #[tokio::test] #[serial] - async fn test_todo_persists_to_file() { + async fn test_plan_approve() { let temp_dir = TempDir::new().unwrap(); - let todo_path = temp_dir.path().join("todo.g3.md"); + let mut agent = create_test_agent(&temp_dir).await; + agent.init_session_id_for_test("plan-approve-test"); - { - let mut agent = create_test_agent(&temp_dir).await; - - let write_call = make_tool_call( - "todo_write", - serde_json::json!({ - "content": "- [ ] Persistent task" - }), - ); - - agent.execute_tool(&write_call).await.unwrap(); - } + // First write a plan + let write_call = make_tool_call( + "plan_write", + serde_json::json!({ + "plan": r#"plan_id: approve-test +revision: 1 +items: + - id: I1 + description: Test task + state: todo + touches: ["src/test.rs"] + checks: + happy: {desc: Works, target: test} + negative: {desc: Errors, target: test} + boundary: {desc: Edge, target: test}"# + }), + ); + agent.execute_tool(&write_call).await.unwrap(); - // File should exist after agent is dropped - assert!(todo_path.exists(), "TODO file should persist"); - let content = fs::read_to_string(&todo_path).unwrap(); - assert!(content.contains("Persistent task"), "Content should persist: {}", content); + // Approve the plan + let approve_call = make_tool_call("plan_approve", serde_json::json!({})); + let result = agent.execute_tool(&approve_call).await.unwrap(); + + assert!(result.contains("✅") && result.contains("approved"), + "Should approve plan: {}", result); } }