From a63950d8f5cb3765fc1d3703faca677542a6911b Mon Sep 17 00:00:00 2001
From: "Dhanji R. Prasanna" <d@wideplay.com>
Date: Mon, 2 Feb 2026 14:38:25 +1100
Subject: [PATCH] Add Plan Mode to replace TODO system

Plan Mode is a cognitive forcing system that requires reasoning about:
- Happy path
- Negative case
- Boundary condition

New tools:
- plan_read: Read current plan for session
- plan_write: Create/update plan with YAML content (validates structure)
- plan_approve: Mark current revision as approved

New command:
- /feature <description>: Start Plan Mode for a new feature

Plan schema requires:
- plan_id, revision, approved_revision
- items with id, description, state, touches, checks (happy/negative/boundary)
- evidence and notes required when marking items done

Verification:
- plan_verify() called automatically when all items are done/blocked

Removed:
- todo_read, todo_write tools
- todo.rs module and related tests
---
 analysis/memory.md                            |  53 +-
 crates/g3-cli/src/commands.rs                 |  30 +
 crates/g3-core/src/prompts.rs                 | 140 ++--
 crates/g3-core/src/tool_definitions.rs        |  99 ++-
 crates/g3-core/src/tool_dispatch.rs           |   9 +-
 crates/g3-core/src/tools/mod.rs               |   4 +-
 crates/g3-core/src/tools/plan.rs              | 674 ++++++++++++++++++
 crates/g3-core/src/tools/todo.rs              | 187 -----
 ...stream_completion_characterization_test.rs |  40 +-
 crates/g3-core/tests/test_todo_persistence.rs | 388 ----------
 crates/g3-core/tests/todo_staleness_test.rs   | 223 ------
 .../tests/tool_execution_roundtrip_test.rs    |  92 ++-
 12 files changed, 997 insertions(+), 942 deletions(-)
 create mode 100644 crates/g3-core/src/tools/plan.rs
 delete mode 100644 crates/g3-core/src/tools/todo.rs
 delete mode 100644 crates/g3-core/tests/test_todo_persistence.rs
 delete mode 100644 crates/g3-core/tests/todo_staleness_test.rs
diff --git a/analysis/memory.md b/analysis/memory.md
index f477225..6763227 100644
--- a/analysis/memory.md
+++ b/analysis/memory.md
@@ -1,5 +1,5 @@
 # Workspace Memory
-> Updated: 2026-01-30T01:10:54Z | Size: 13.2k chars
+> Updated: 2026-02-02T03:16:47Z | Size: 15.3k chars
 
 ### Remember Tool Wiring
 - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -240,4 +240,53 @@ Research tool is asynchronous - spawns scout agent in background, returns immedi
 2. Scout agent runs in background tokio task
 3. On completion, `PendingResearchManager.complete()` stores result
 4. At next iteration start or user prompt, `inject_completed_research()` adds to context
-5. Agent can check status with `research_status` tool or user with `/research` command
\ No newline at end of file
+5. Agent can check status with `research_status` tool or user with `/research` command
+
+### Plan Mode (replaces TODO system)
+Structured task planning with cognitive forcing - requires happy/negative/boundary checks.
+
+- `crates/g3-core/src/tools/plan.rs`
+  - `Plan` [200..240] - plan_id, revision, approved_revision, items[]
+  - `PlanItem` [110..145] - id, description, state, touches, checks, evidence, notes
+  - `PlanState` [25..45] - enum: Todo, Doing, Done, Blocked
+  - `Check` [60..85] - desc, target fields
+  - `Checks` [90..105] - happy, negative, boundary
+  - `get_plan_path()` [280..285] - returns `.g3/sessions/<id>/plan.g3.md`
+  - `read_plan()` [290..310] - loads plan from YAML in markdown
+  - `write_plan()` [315..335] - validates and saves plan
+  - `plan_verify()` [355..390] - placeholder called when all items done/blocked
+  - `execute_plan_read()` [395..420] - plan.read tool
+  - `execute_plan_write()` [425..490] - plan.write tool with validation
+  - `execute_plan_approve()` [495..530] - plan.approve tool
+
+- `crates/g3-core/src/tool_definitions.rs` [263..330] - plan.read, plan.write, plan.approve definitions
+- `crates/g3-core/src/tool_dispatch.rs` [36..38] - dispatch cases for plan tools
+- `crates/g3-cli/src/commands.rs` [460..490] - `/feature` command starts Plan Mode
+- `crates/g3-core/src/prompts.rs` [21..130] - SHARED_PLAN_SECTION replaces TODO section
+
+**Plan Schema (YAML)**:
+```yaml
+plan_id: feature-name
+revision: 1
+approved_revision: 1  # set by plan.approve
+items:
+  - id: I1
+    description: What to do
+    state: todo|doing|done|blocked
+    touches: [paths/modules]
+    checks:
+      happy: {desc, target}
+      negative: {desc, target}
+      boundary: {desc, target}
+    evidence: [file:line, test names]  # required when done
+    notes: Implementation explanation   # required when done
+```
+
+**Workflow**: `/feature <desc>` → agent drafts plan → user approves → agent implements → plan_verify() called when complete
+
+### Plan Mode Tool Names (IMPORTANT)
+Tool names must use underscores, not dots (Anthropic API restriction: `^[a-zA-Z0-9_-]{1,128}$`).
+
+- `plan_read` - Read current plan
+- `plan_write` - Create/update plan
+- `plan_approve` - Approve plan revision
\ No newline at end of file
diff --git a/crates/g3-cli/src/commands.rs b/crates/g3-cli/src/commands.rs
index 4d39eea..5f5e738 100644
--- a/crates/g3-cli/src/commands.rs
+++ b/crates/g3-cli/src/commands.rs
@@ -74,6 +74,7 @@ pub async fn handle_command<W: UiWriter>(
             output.print("  /readme    - Reload README.md and AGENTS.md from disk");
             output.print("  /stats     - Show detailed context and performance statistics");
             output.print("  /run <file> - Read file and execute as prompt");
+            output.print("  /feature <description> - Start Plan Mode for a new feature");
             output.print("  /help      - Show this help message");
             output.print("  exit/quit  - Exit the interactive session");
             output.print("");
@@ -452,6 +453,35 @@ pub async fn handle_command<W: UiWriter>(
             }
             Ok(true)
         }
+        cmd if cmd.starts_with("/feature") => {
+            let parts: Vec<&str> = cmd.splitn(2, ' ').collect();
+            if parts.len() < 2 || parts[1].trim().is_empty() {
+                output.print("Usage: /feature <description>");
+                output.print("Starts Plan Mode for a new feature. The agent will:");
+                output.print("  1. Research and draft a Plan with checks (happy/negative/boundary)");
+                output.print("  2. Ask clarifying questions if needed");
+                output.print("  3. Request approval before coding");
+                output.print("");
+                output.print("Example: /feature Add CSV import for comic book metadata");
+            } else {
+                let feature_description = parts[1].trim();
+                
+                // Construct the feature prompt that instructs the agent to use Plan Mode
+                let prompt = format!(
+                    "I want to implement a new feature: {}\n\n\
+                    Please use Plan Mode to help me implement this:\n\
+                    1. First, research the codebase to understand where this feature should live\n\
+                    2. Draft a Plan using `plan_write` with items that have all three checks (happy, negative, boundary)\n\
+                    3. Ask me any clarifying questions if needed\n\
+                    4. Then ask me to approve the plan before you start coding\n\n\
+                    Do NOT start coding until I approve the plan.",
+                    feature_description
+                );
+                
+                execute_task_with_retry(agent, &prompt, show_prompt, show_code, output).await;
+            }
+            Ok(true)
+        }
         "/unproject" => {
             if active_project.is_some() {
                 use crate::g3_status::G3Status;
diff --git a/crates/g3-core/src/prompts.rs b/crates/g3-core/src/prompts.rs
index db63cc6..0acd235 100644
--- a/crates/g3-core/src/prompts.rs
+++ b/crates/g3-core/src/prompts.rs
@@ -18,70 +18,92 @@ IMPORTANT: You must call tools to achieve goals. When you receive a request:
 For shell commands: Use the shell tool with the exact command needed. Always use `rg` (ripgrep) instead of `grep` - it's faster, has better defaults, and respects .gitignore. Avoid commands that produce a large amount of output, and consider piping those outputs to files. Example: If asked to list files, immediately call the shell tool with command parameter \"ls\".
 If you create temporary files for verification, place these in a subdir named 'tmp'. Do NOT pollute the current dir.";
 
-const SHARED_TODO_SECTION: &str = "\
-# Task Management with TODO Tools
+const SHARED_PLAN_SECTION: &str = "\
+# Task Management with Plan Mode
 
-**REQUIRED for multi-step tasks.** Use TODO tools when your task involves ANY of:
+**REQUIRED for multi-step tasks.** Use Plan Mode when your task involves ANY of:
 - Multiple files to create/modify (2+)
 - Multiple distinct steps (3+)
 - Dependencies between steps
 - Testing or verification needed
 - Uncertainty about approach
 
+Plan Mode is a cognitive forcing system that prevents:
+- Attention collapse
+- False claims of completeness
+- Happy-path-only implementations
+- Duplication/contradiction with existing code
+
 ## Workflow
 
-Every multi-step task follows this pattern:
-1. **Start**: Call todo_read, then todo_write to create your plan
-2. **During**: Execute steps, then todo_read and todo_write to mark progress
-3. **End**: Call todo_read to verify all items complete
-4. **Finally**, call `remember` to save info on new features created or discovered
+1. **Draft**: Call `plan_read` to check for existing plan, then `plan_write` to create/update
+2. **Approval**: Ask user to approve before coding (\"'approve', or edit plan?\")
+3. **Execute**: Implement items, updating plan with `plan_write` to mark progress
+4. **Complete**: When all items are done/blocked, verification runs automatically
+5. **Remember**: Call `remember` to save discovered code locations
 
-Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists are scoped to the current session and stored in the session directory.
+## Plan Schema
 
-## Examples
+Each plan item MUST have:
+- `id`: Stable identifier (e.g., \"I1\", \"I2\")
+- `description`: What will be done
+- `state`: todo | doing | done | blocked
+- `touches`: Paths/modules this affects (forces \"where does this live?\")
+- `checks`: Three required perspectives:
+  - `happy`: {desc, target} - Normal successful operation
+  - `negative`: {desc, target} - Error handling, invalid input
+  - `boundary`: {desc, target} - Edge cases, limits
+- `evidence`: (required when done) File:line refs, test names
+- `notes`: (required when done) Short implementation explanation
 
-**Example 1: Feature Implementation**
-User asks: \"Add user authentication with tests\"
+## Rules
 
-First action:
-{\"tool\": \"todo_read\", \"args\": {}}
+When drafting a plan, you MUST:
+- Keep items ≤ 7 by default
+- Commit to where the work will live (touches)
+- Provide all three checks (happy, negative, boundary)
 
-Then create plan:
-{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n  - [ ] Create User struct\\n  - [ ] Add login endpoint\\n  - [ ] Add password hashing\\n  - [ ] Write unit tests\\n  - [ ] Write integration tests\"}}
+When updating a plan:
+- Cannot remove items from an approved plan (mark as blocked instead)
+- Must provide evidence and notes when marking item as done
 
-After completing User struct:
-{\"tool\": \"todo_read\", \"args\": {}}
-{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Add user authentication\\n  - [x] Create User struct\\n  - [ ] Add login endpoint\\n  - [ ] Add password hashing\\n  - [ ] Write unit tests\\n  - [ ] Write integration tests\"}}
+## Example Plan Item
 
-**Example 2: Bug Fix**
-User asks: \"Fix the memory leak in cache module\"
+```yaml
+- id: I1
+  description: \"Add CSV import for comic book metadata\"
+  state: todo
+  touches: [\"src/import\", \"src/library\"]
+  checks:
+    happy:
+      desc: \"Valid CSV imports 3 comics\"
+      target: \"import::csv\"
+    negative:
+      desc: \"Missing column errors with MissingColumn\"
+      target: \"import::csv\"
+    boundary:
+      desc: \"Empty file yields empty import without error\"
+      target: \"import::csv\"
+```
 
-{\"tool\": \"todo_read\", \"args\": {}}
-{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Fix memory leak\\n  - [ ] Review cache.rs\\n  - [ ] Check for unclosed resources\\n  - [ ] Add drop implementation\\n  - [ ] Write test to verify fix\"}}
-
-**Example 3: Refactoring**
-User asks: \"Refactor database layer to use async/await\"
-
-{\"tool\": \"todo_read\", \"args\": {}}
-{\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Refactor to async\\n  - [ ] Update function signatures\\n  - [ ] Replace blocking calls\\n  - [ ] Update all callers\\n  - [ ] Update tests\"}}
-
-## Format
-
-Use markdown checkboxes:
-- \"- [ ]\" for incomplete tasks
-- \"- [x]\" for completed tasks
-- Indent with 2 spaces for subtasks
-
-Keep items short, specific, and action-oriented.
+When done, add evidence and notes:
+```yaml
+  state: done
+  evidence:
+    - \"src/import/csv.rs:42-118\"
+    - \"tests/import_csv.rs::test_valid_csv\"
+  notes: \"Extended existing parser instead of creating duplicate\"
+```
 
 ## Benefits
 
 ✓ Prevents missed steps
 ✓ Makes progress visible
 ✓ Helps recover from interruptions
-✓ Creates better summaries
+✓ Forces consideration of edge cases
+✓ Provides audit trail with evidence
 
-If you can complete it with 1-2 tool calls, skip TODO.";
+If you can complete it with 1-2 tool calls, skip Plan Mode.";
 
 const SHARED_TEMPORARY_FILES: &str = "\
 # Temporary files
@@ -153,7 +175,7 @@ Do NOT save duplicates - check the Workspace Memory section (loaded at startup)
 
 After discovering how session continuation works:
 
-{\"tool\": \"remember\", \"args\": {\"notes\": \"### Session Continuation\\nSave/restore session state across g3 invocations using symlink-based approach.\\n\\n- `crates/g3-core/src/session_continuation.rs`\\n  - `SessionContinuation` [850..2100] - artifact struct with session state, TODO snapshot, context %\\n  - `save_continuation()` [5765..7200] - saves to `.g3/sessions/<id>/latest.json`, updates symlink\\n  - `load_continuation()` [7250..8900] - follows `.g3/session` symlink to restore\\n  - `find_incomplete_agent_session()` [10500..13200] - finds sessions with incomplete TODOs for agent resume\"}}
+{\"tool\": \"remember\", \"args\": {\"notes\": \"### Session Continuation\\nSave/restore session state across g3 invocations using symlink-based approach.\\n\\n- `crates/g3-core/src/session_continuation.rs`\\n  - `SessionContinuation` [850..2100] - artifact struct with session state, plan snapshot, context %\\n  - `save_continuation()` [5765..7200] - saves to `.g3/sessions/<id>/latest.json`, updates symlink\\n  - `load_continuation()` [7250..8900] - follows `.g3/session` symlink to restore\\n  - `find_incomplete_agent_session()` [10500..13200] - finds sessions with incomplete plans for agent resume\"}}
 
 After discovering a useful pattern:
 
@@ -213,13 +235,17 @@ Short description for providers without native calling specs:
   - Format: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"path/to/file\", \"diff\": \"--- old\\n-old text\\n+++ new\\n+new text\"}}
   - Example: {\"tool\": \"str_replace\", \"args\": {\"file_path\": \"src/main.rs\", \"diff\": \"--- old\\n-old_code();\\n+++ new\\n+new_code();\"}}
 
-- **todo_read**: Read the current session's TODO list from todo.g3.md (session-scoped)
-  - Format: {\"tool\": \"todo_read\", \"args\": {}}
-  - Example: {\"tool\": \"todo_read\", \"args\": {}}
+- **plan_read**: Read the current Plan for this session
+  - Format: {\"tool\": \"plan_read\", \"args\": {}}
+  - Example: {\"tool\": \"plan_read\", \"args\": {}}
 
-- **todo_write**: Write or overwrite the session's todo.g3.md file (WARNING: overwrites completely, always read first)
-  - Format: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Task 1\\n- [ ] Task 2\"}}
-  - Example: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Implement feature\\n  - [ ] Write tests\\n  - [ ] Run tests\"}}
+- **plan_write**: Create or update the Plan with YAML content
+  - Format: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: my-plan\\nitems: [...]\"}}
+  - Example: {\"tool\": \"plan_write\", \"args\": {\"plan\": \"plan_id: feature-x\\nitems:\\n  - id: I1\\n    description: Add feature\\n    state: todo\\n    touches: [src/lib.rs]\\n    checks:\\n      happy: {desc: Works, target: lib}\\n      negative: {desc: Errors, target: lib}\\n      boundary: {desc: Edge, target: lib}\"}}
+
+- **plan_approve**: Approve the current plan revision (called by user)
+  - Format: {\"tool\": \"plan_approve\", \"args\": {}}
+  - Example: {\"tool\": \"plan_approve\", \"args\": {}}
 
 - **code_search**: Syntax-aware code search using tree-sitter. Supports Rust, Python, JavaScript, TypeScript.
   - Format: {\"tool\": \"code_search\", \"args\": {\"searches\": [{\"name\": \"label\", \"query\": \"tree-sitter query\", \"language\": \"rust|python|javascript|typescript\", \"paths\": [\"src/\"], \"context_lines\": 0}]}}
@@ -269,11 +295,6 @@ write_file(\"file2.txt\", \"...\")
 write_file(\"helper.rs\", \"...\")
 [DONE]";
 
-const NON_NATIVE_TODO_ADDENDUM: &str = "
-
-IMPORTANT: If you are provided with a SHA256 hash of the requirements file, you MUST include it as the very first line of the todo.g3.md file in the following format:
-`{{Based on the requirements file with SHA256: <SHA>}}`
-This ensures the TODO list is tracked against the specific version of requirements it was generated from.";
 
 // ============================================================================
 // COMPOSED PROMPTS
@@ -284,7 +305,7 @@ pub fn get_system_prompt_for_native() -> String {
     format!(
         "{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}",
         SHARED_INTRO,
-        SHARED_TODO_SECTION,
+        SHARED_PLAN_SECTION,
         SHARED_TEMPORARY_FILES,
         SHARED_WEB_RESEARCH,
         SHARED_WORKSPACE_MEMORY,
@@ -295,12 +316,11 @@ pub fn get_system_prompt_for_native() -> String {
 /// System prompt for providers without native tool calling (embedded models)
 pub fn get_system_prompt_for_non_native() -> String {
     format!(
-        "{}\n\n{}\n\n{}\n\n{}{}\n\n{}\n\n{}\n\n{}",
+        "{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}\n\n{}",
         SHARED_INTRO,
         NON_NATIVE_TOOL_FORMAT,
         NON_NATIVE_INSTRUCTIONS,
-        SHARED_TODO_SECTION,
-        NON_NATIVE_TODO_ADDENDUM,
+        SHARED_PLAN_SECTION,
         SHARED_WEB_RESEARCH,
         SHARED_WORKSPACE_MEMORY,
         SHARED_RESPONSE_GUIDELINES
@@ -311,7 +331,7 @@ pub fn get_system_prompt_for_non_native() -> String {
 const G3_IDENTITY_LINE: &str = "You are G3, an AI programming agent of the same skill level as a seasoned engineer at a major technology company. You analyze given tasks and write code to achieve goals.";
 
 /// Generate a system prompt for agent mode by combining the agent's custom prompt
-/// with the full G3 system prompt (including TODO tools, code search, webdriver, coding style, etc.)
+/// with the full G3 system prompt (including plan tools, code search, webdriver, coding style, etc.)
 ///
 /// The agent_prompt replaces only the G3 identity line at the start of the prompt.
 /// Everything else (tool instructions, coding guidelines, etc.) is preserved.
@@ -374,12 +394,12 @@ mod tests {
     }
 
     #[test]
-    fn test_both_prompts_have_todo_section() {
+    fn test_both_prompts_have_plan_section() {
         let native = get_system_prompt_for_native();
         let non_native = get_system_prompt_for_non_native();
         
-        assert!(native.contains("# Task Management with TODO Tools"));
-        assert!(non_native.contains("# Task Management with TODO Tools"));
+        assert!(native.contains("# Task Management with Plan Mode"));
+        assert!(non_native.contains("# Task Management with Plan Mode"));
     }
 
     #[test]
diff --git a/crates/g3-core/src/tool_definitions.rs b/crates/g3-core/src/tool_definitions.rs
index 8ddf75c..be581ba 100644
--- a/crates/g3-core/src/tool_definitions.rs
+++ b/crates/g3-core/src/tool_definitions.rs
@@ -193,29 +193,6 @@ fn create_core_tools(exclude_research: bool) -> Vec<Tool> {
                 "required": ["path", "window_id"]
             }),
         },
-        Tool {
-            name: "todo_read".to_string(),
-            description: "Read your current TODO list from todo.g3.md file in the session directory. Shows what tasks are planned and their status. Call this at the start of multi-step tasks to check for existing plans, and during execution to review progress before updating. TODO lists are scoped to the current session.".to_string(),
-            input_schema: json!({
-                "type": "object",
-                "properties": {},
-                "required": []
-            }),
-        },
-        Tool {
-            name: "todo_write".to_string(),
-            description: "Create or update your TODO list in todo.g3.md file with a complete task plan. Use markdown checkboxes: - [ ] for pending, - [x] for complete. This tool replaces the entire file content, so always call todo_read first to preserve existing content. Essential for multi-step tasks. TODO lists are scoped to the current session.".to_string(),
-            input_schema: json!({
-                "type": "object",
-                "properties": {
-                    "content": {
-                        "type": "string",
-                        "description": "The TODO list content to save. Use markdown checkbox format: - [ ] for incomplete tasks, - [x] for completed tasks. Support nested tasks with indentation."
-                    }
-                },
-                "required": ["content"]
-            }),
-        },
         Tool {
             name: "coverage".to_string(),
             description: "Generate a code coverage report for the entire workspace using cargo llvm-cov. This runs all tests with coverage instrumentation and returns a summary of coverage statistics. Requires llvm-tools-preview and cargo-llvm-cov to be installed (they will be auto-installed if missing).".to_string(),
@@ -288,6 +265,62 @@ fn create_core_tools(exclude_research: bool) -> Vec<Tool> {
         });
     }
 
+    // Plan Mode tools
+    tools.push(Tool {
+        name: "plan_read".to_string(),
+        description: "Read the current Plan for this session. Shows the plan structure with items, their states, checks (happy/negative/boundary), evidence, and notes. Use this to review the plan before making updates.".to_string(),
+        input_schema: json!({
+            "type": "object",
+            "properties": {},
+            "required": []
+        }),
+    });
+
+    tools.push(Tool {
+        name: "plan_write".to_string(),
+        description: r#"Create or update the Plan for this session. The plan must be provided as YAML with the following structure:
+
+- plan_id: Unique identifier for the plan
+- revision: Will be auto-incremented
+- items: Array of plan items, each with:
+  - id: Stable identifier (e.g., "I1")
+  - description: What will be done
+  - state: todo | doing | done | blocked
+  - touches: Array of paths/modules affected
+  - checks:
+      happy: {desc, target} - Normal successful operation
+      negative: {desc, target} - Error handling, invalid input
+      boundary: {desc, target} - Edge cases, limits
+  - evidence: Array of file:line refs, test names (required when done)
+  - notes: Implementation explanation (required when done)
+
+Rules:
+- Keep items ≤ 7 by default
+- All three checks (happy, negative, boundary) are required
+- Cannot remove items from an approved plan (mark as blocked instead)
+- Evidence and notes required when marking item as done"#.to_string(),
+        input_schema: json!({
+            "type": "object",
+            "properties": {
+                "plan": {
+                    "type": "string",
+                    "description": "The plan as YAML. Must include plan_id and items array."
+                }
+            },
+            "required": ["plan"]
+        }),
+    });
+
+    tools.push(Tool {
+        name: "plan_approve".to_string(),
+        description: "Mark the current plan revision as approved. This is called by the user (not the agent) to approve a drafted plan before implementation begins. Once approved, plan items cannot be removed (only marked as blocked). The agent should ask for approval after drafting a plan.".to_string(),
+        input_schema: json!({
+            "type": "object",
+            "properties": {},
+            "required": []
+        }),
+    });
+
     // Workspace memory tool (memory is auto-loaded at startup, only remember is needed)
     tools.push(Tool {
         name: "remember".to_string(),
@@ -523,11 +556,11 @@ mod tests {
     #[test]
     fn test_core_tools_count() {
         let tools = create_core_tools(false);
-        // Should have the core tools: shell, background_process, read_file, read_image,
-        // write_file, str_replace, screenshot,
-        // todo_read, todo_write, coverage, code_search, research, research_status, remember
-        // (15 total - memory is auto-loaded, only remember tool needed)
-        assert_eq!(tools.len(), 15);
+        // Core tools: shell, background_process, read_file, read_image,
+        // write_file, str_replace, screenshot, coverage, code_search,
+        // research, research_status, remember, plan_read, plan_write, plan_approve
+        // (16 total - memory is auto-loaded, only remember tool needed)
+        assert_eq!(tools.len(), 16);
     }
 
     #[test]
@@ -541,15 +574,15 @@ mod tests {
     fn test_create_tool_definitions_core_only() {
         let config = ToolConfig::default();
         let tools = create_tool_definitions(config);
-        assert_eq!(tools.len(), 15);
+        assert_eq!(tools.len(), 16);
     }
 
     #[test]
     fn test_create_tool_definitions_all_enabled() {
         let config = ToolConfig::new(true, true);
         let tools = create_tool_definitions(config);
-        // 15 core + 15 webdriver = 30
-        assert_eq!(tools.len(), 30);
+        // 16 core + 15 webdriver = 31
+        assert_eq!(tools.len(), 31);
     }
 
     #[test]
@@ -567,8 +600,8 @@ mod tests {
         let tools_with_research = create_core_tools(false);
         let tools_without_research = create_core_tools(true);
         
-        assert_eq!(tools_with_research.len(), 15);
-        assert_eq!(tools_without_research.len(), 13);  // research + research_status both excluded
+        assert_eq!(tools_with_research.len(), 16);
+        assert_eq!(tools_without_research.len(), 14);  // research + research_status both excluded
         
         assert!(tools_with_research.iter().any(|t| t.name == "research"));
         assert!(!tools_without_research.iter().any(|t| t.name == "research"));
diff --git a/crates/g3-core/src/tool_dispatch.rs b/crates/g3-core/src/tool_dispatch.rs
index 49ec507..67e1765 100644
--- a/crates/g3-core/src/tool_dispatch.rs
+++ b/crates/g3-core/src/tool_dispatch.rs
@@ -7,7 +7,7 @@ use anyhow::Result;
 use tracing::{debug, warn};
 
 use crate::tools::executor::ToolContext;
-use crate::tools::{acd, file_ops, memory, misc, research, shell, todo, webdriver};
+use crate::tools::{acd, file_ops, memory, misc, plan, research, shell, webdriver};
 use crate::ui_writer::UiWriter;
 use crate::ToolCall;
 
@@ -32,9 +32,10 @@ pub async fn dispatch_tool<W: UiWriter>(
         "write_file" => file_ops::execute_write_file(tool_call, ctx).await,
         "str_replace" => file_ops::execute_str_replace(tool_call, ctx).await,
 
-        // TODO management
-        "todo_read" => todo::execute_todo_read(tool_call, ctx).await,
-        "todo_write" => todo::execute_todo_write(tool_call, ctx).await,
+        // Plan Mode
+        "plan_read" => plan::execute_plan_read(tool_call, ctx).await,
+        "plan_write" => plan::execute_plan_write(tool_call, ctx).await,
+        "plan_approve" => plan::execute_plan_approve(tool_call, ctx).await,
 
         // Miscellaneous tools
         "screenshot" => misc::execute_take_screenshot(tool_call, ctx).await,
diff --git a/crates/g3-core/src/tools/mod.rs b/crates/g3-core/src/tools/mod.rs
index 69d0dfa..fa11b96 100644
--- a/crates/g3-core/src/tools/mod.rs
+++ b/crates/g3-core/src/tools/mod.rs
@@ -4,7 +4,7 @@
 //! Tools are organized by category:
 //! - `shell` - Shell command execution and background processes
 //! - `file_ops` - File reading, writing, and editing
-//! - `todo` - TODO list management
+//! - `plan` - Plan Mode for structured task planning
 //! - `webdriver` - Browser automation via WebDriver
 //! - `misc` - Other tools (screenshots, code search, etc.)
 //! - `research` - Web research via scout agent
@@ -16,9 +16,9 @@ pub mod acd;
 pub mod file_ops;
 pub mod memory;
 pub mod misc;
+pub mod plan;
 pub mod research;
 pub mod shell;
-pub mod todo;
 pub mod webdriver;
 
 pub use executor::ToolExecutor;
diff --git a/crates/g3-core/src/tools/plan.rs b/crates/g3-core/src/tools/plan.rs
new file mode 100644
index 0000000..f788b83
--- /dev/null
+++ b/crates/g3-core/src/tools/plan.rs
@@ -0,0 +1,674 @@
+//! Plan Mode - Structured task planning with cognitive forcing.
+//!
+//! This module implements Plan Mode, which replaces the TODO system with a
+//! checklist-style plan that forces reasoning about:
+//! - Happy path
+//! - Negative case  
+//! - Boundary condition
+//!
+//! A task is done ONLY when all plan items are satisfied with evidence.
+
+use anyhow::{anyhow, Result};
+use serde::{Deserialize, Serialize};
+use std::fmt;
+use std::path::PathBuf;
+use tracing::debug;
+
+use crate::paths::{ensure_session_dir, get_session_logs_dir};
+use crate::ui_writer::UiWriter;
+use crate::ToolCall;
+
+use super::executor::ToolContext;
+
+// ============================================================================
+// Plan Schema
+// ============================================================================
+
+/// State of a plan item.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum PlanState {
+    #[default]
+    Todo,
+    Doing,
+    Done,
+    Blocked,
+}
+
+impl fmt::Display for PlanState {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            PlanState::Todo => write!(f, "todo"),
+            PlanState::Doing => write!(f, "doing"),
+            PlanState::Done => write!(f, "done"),
+            PlanState::Blocked => write!(f, "blocked"),
+        }
+    }
+}
+
+impl std::str::FromStr for PlanState {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.to_lowercase().as_str() {
+            "todo" => Ok(PlanState::Todo),
+            "doing" => Ok(PlanState::Doing),
+            "done" => Ok(PlanState::Done),
+            "blocked" => Ok(PlanState::Blocked),
+            _ => Err(anyhow!("Invalid plan state: {}", s)),
+        }
+    }
+}
+
+/// A check with description and target.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct Check {
+    /// Description of what this check verifies
+    pub desc: String,
+    /// Target module/function/file this check applies to
+    pub target: String,
+}
+
+impl Check {
+    pub fn new(desc: impl Into<String>, target: impl Into<String>) -> Self {
+        Self {
+            desc: desc.into(),
+            target: target.into(),
+        }
+    }
+
+    /// Validate that the check has required fields.
+    pub fn validate(&self) -> Result<()> {
+        if self.desc.trim().is_empty() {
+            return Err(anyhow!("Check description cannot be empty"));
+        }
+        if self.target.trim().is_empty() {
+            return Err(anyhow!("Check target cannot be empty"));
+        }
+        Ok(())
+    }
+}
+
+/// The three required checks for each plan item.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+pub struct Checks {
+    /// Happy path check - normal successful operation
+    pub happy: Check,
+    /// Negative case check - error handling, invalid input
+    pub negative: Check,
+    /// Boundary condition check - edge cases, limits
+    pub boundary: Check,
+}
+
+impl Checks {
+    /// Validate all three checks.
+    pub fn validate(&self) -> Result<()> {
+        self.happy.validate().map_err(|e| anyhow!("happy check: {}", e))?;
+        self.negative.validate().map_err(|e| anyhow!("negative check: {}", e))?;
+        self.boundary.validate().map_err(|e| anyhow!("boundary check: {}", e))?;
+        Ok(())
+    }
+}
+
+/// A single item in the plan.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PlanItem {
+    /// Stable identifier (e.g., "I1", "I2")
+    pub id: String,
+    /// What will be done
+    pub description: String,
+    /// Current state
+    pub state: PlanState,
+    /// Paths/modules this affects
+    pub touches: Vec<String>,
+    /// The three required checks
+    pub checks: Checks,
+    /// Evidence when done (file:line, test names, snippets)
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub evidence: Vec<String>,
+    /// Short explanation including implementation nuances
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub notes: Option<String>,
+}
+
+impl PlanItem {
+    /// Create a new plan item with required fields.
+    pub fn new(
+        id: impl Into<String>,
+        description: impl Into<String>,
+        touches: Vec<String>,
+        checks: Checks,
+    ) -> Self {
+        Self {
+            id: id.into(),
+            description: description.into(),
+            state: PlanState::Todo,
+            touches,
+            checks,
+            evidence: Vec::new(),
+            notes: None,
+        }
+    }
+
+    /// Validate the plan item structure.
+    pub fn validate(&self) -> Result<()> {
+        if self.id.trim().is_empty() {
+            return Err(anyhow!("Item id cannot be empty"));
+        }
+        if self.description.trim().is_empty() {
+            return Err(anyhow!("Item description cannot be empty"));
+        }
+        if self.touches.is_empty() {
+            return Err(anyhow!("Item must specify at least one path/module in 'touches'"));
+        }
+        self.checks.validate().map_err(|e| anyhow!("Item '{}': {}", self.id, e))?;
+
+        // If done, must have evidence and notes
+        if self.state == PlanState::Done {
+            if self.evidence.is_empty() {
+                return Err(anyhow!(
+                    "Item '{}' is marked done but has no evidence",
+                    self.id
+                ));
+            }
+            if self.notes.as_ref().map(|n| n.trim().is_empty()).unwrap_or(true) {
+                return Err(anyhow!(
+                    "Item '{}' is marked done but has no notes",
+                    self.id
+                ));
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Check if this item is terminal (done or blocked).
+    pub fn is_terminal(&self) -> bool {
+        matches!(self.state, PlanState::Done | PlanState::Blocked)
+    }
+}
+
+/// A complete plan with metadata and items.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Plan {
+    /// Unique identifier for this plan
+    pub plan_id: String,
+    /// Current revision number (increments on each write)
+    pub revision: u32,
+    /// The revision that was approved (None if not yet approved)
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub approved_revision: Option<u32>,
+    /// The plan items
+    pub items: Vec<PlanItem>,
+}
+
+impl Plan {
+    /// Create a new plan with the given ID.
+    pub fn new(plan_id: impl Into<String>) -> Self {
+        Self {
+            plan_id: plan_id.into(),
+            revision: 1,
+            approved_revision: None,
+            items: Vec::new(),
+        }
+    }
+
+    /// Check if the plan has been approved.
+    pub fn is_approved(&self) -> bool {
+        self.approved_revision.is_some()
+    }
+
+    /// Approve the current revision.
+    pub fn approve(&mut self) {
+        self.approved_revision = Some(self.revision);
+    }
+
+    /// Check if all items are terminal (done or blocked).
+    pub fn is_complete(&self) -> bool {
+        !self.items.is_empty() && self.items.iter().all(|item| item.is_terminal())
+    }
+
+    /// Validate the entire plan structure.
+    pub fn validate(&self) -> Result<()> {
+        if self.plan_id.trim().is_empty() {
+            return Err(anyhow!("Plan ID cannot be empty"));
+        }
+
+        if self.items.is_empty() {
+            return Err(anyhow!("Plan must have at least one item"));
+        }
+
+        if self.items.len() > 7 {
+            // Warn but don't fail - this is a guideline
+            debug!("Plan has {} items (recommended max is 7)", self.items.len());
+        }
+
+        // Check for duplicate IDs
+        let mut seen_ids = std::collections::HashSet::new();
+        for item in &self.items {
+            if !seen_ids.insert(&item.id) {
+                return Err(anyhow!("Duplicate item ID: {}", item.id));
+            }
+            item.validate()?;
+        }
+
+        Ok(())
+    }
+
+    /// Get a summary of the plan status.
+    pub fn status_summary(&self) -> String {
+        let total = self.items.len();
+        let done = self.items.iter().filter(|i| i.state == PlanState::Done).count();
+        let doing = self.items.iter().filter(|i| i.state == PlanState::Doing).count();
+        let blocked = self.items.iter().filter(|i| i.state == PlanState::Blocked).count();
+        let todo = self.items.iter().filter(|i| i.state == PlanState::Todo).count();
+
+        let approved_str = if let Some(rev) = self.approved_revision {
+            format!(" (approved at rev {})", rev)
+        } else {
+            " (NOT APPROVED)".to_string()
+        };
+
+        format!(
+            "Plan '{}' rev {}{}: {}/{} done, {} doing, {} blocked, {} todo",
+            self.plan_id, self.revision, approved_str, done, total, doing, blocked, todo
+        )
+    }
+}
+
+// ============================================================================
+// Plan Storage
+// ============================================================================
+
+/// Get the path to the plan.g3.md file for a session.
+pub fn get_plan_path(session_id: &str) -> PathBuf {
+    get_session_logs_dir(session_id).join("plan.g3.md")
+}
+
+/// Read a plan from the session's plan.g3.md file.
+pub fn read_plan(session_id: &str) -> Result<Option<Plan>> {
+    let path = get_plan_path(session_id);
+    if !path.exists() {
+        return Ok(None);
+    }
+
+    let content = std::fs::read_to_string(&path)?;
+    
+    // Extract YAML from markdown code block
+    let yaml_content = extract_yaml_from_markdown(&content)?;
+    
+    let plan: Plan = serde_yaml::from_str(&yaml_content)?;
+    Ok(Some(plan))
+}
+
+/// Write a plan to the session's plan.g3.md file.
+pub fn write_plan(session_id: &str, plan: &Plan) -> Result<()> {
+    // Validate before writing
+    plan.validate()?;
+
+    let _ = ensure_session_dir(session_id)?;
+    let path = get_plan_path(session_id);
+
+    // Format as markdown with YAML code block
+    let content = format_plan_as_markdown(plan);
+    
+    std::fs::write(&path, content)?;
+    Ok(())
+}
+
+/// Extract YAML content from a markdown file with ```yaml code block.
+fn extract_yaml_from_markdown(content: &str) -> Result<String> {
+    // Look for ```yaml ... ``` block
+    let start_marker = "```yaml";
+    let end_marker = "```";
+
+    if let Some(start_idx) = content.find(start_marker) {
+        let yaml_start = start_idx + start_marker.len();
+        if let Some(end_idx) = content[yaml_start..].find(end_marker) {
+            let yaml = content[yaml_start..yaml_start + end_idx].trim();
+            return Ok(yaml.to_string());
+        }
+    }
+
+    // If no code block, try parsing the whole content as YAML
+    Ok(content.to_string())
+}
+
+/// Format a plan as markdown with embedded YAML.
+fn format_plan_as_markdown(plan: &Plan) -> String {
+    let yaml = serde_yaml::to_string(plan).unwrap_or_else(|_| "# Error serializing plan".to_string());
+    
+    let mut md = String::new();
+    md.push_str(&format!("# Plan: {}\n\n", plan.plan_id));
+    md.push_str(&format!("**Status**: {}\n\n", plan.status_summary()));
+    md.push_str("## Plan Data\n\n");
+    md.push_str("```yaml\n");
+    md.push_str(&yaml);
+    md.push_str("```\n");
+    
+    md
+}
+
+// ============================================================================
+// Plan Verification
+// ============================================================================
+
+/// Verify a completed plan. Called by the agent loop when all items are done/blocked.
+/// 
+/// This is a placeholder that prints the plan contents.
+/// In the future, this could perform additional validation.
+pub fn plan_verify(plan: &Plan) {
+    println!("\n{}", "=".repeat(60));
+    println!("PLAN VERIFY CALLED");
+    println!("{}", "=".repeat(60));
+    println!("Plan ID: {}", plan.plan_id);
+    println!("Revision: {}", plan.revision);
+    println!("Approved: {:?}", plan.approved_revision);
+    println!("Status: {}", plan.status_summary());
+    println!();
+    
+    for item in &plan.items {
+        println!("[{}] {} - {}", item.id, item.state, item.description);
+        println!("  Touches: {:?}", item.touches);
+        println!("  Checks:");
+        println!("    Happy: {} -> {}", item.checks.happy.desc, item.checks.happy.target);
+        println!("    Negative: {} -> {}", item.checks.negative.desc, item.checks.negative.target);
+        println!("    Boundary: {} -> {}", item.checks.boundary.desc, item.checks.boundary.target);
+        if !item.evidence.is_empty() {
+            println!("  Evidence:");
+            for e in &item.evidence {
+                println!("    - {}", e);
+            }
+        }
+        if let Some(notes) = &item.notes {
+            println!("  Notes: {}", notes);
+        }
+        println!();
+    }
+    println!("{}\n", "=".repeat(60));
+}
+
+// ============================================================================
+// Tool Implementations
+// ============================================================================
+
+/// Execute the `plan_read` tool.
+pub async fn execute_plan_read<W: UiWriter>(
+    _tool_call: &ToolCall,
+    ctx: &mut ToolContext<'_, W>,
+) -> Result<String> {
+    debug!("Processing plan_read tool call");
+
+    let session_id = match ctx.session_id {
+        Some(id) => id,
+        None => return Ok("❌ No active session - plans are session-scoped.".to_string()),
+    };
+
+    match read_plan(session_id)? {
+        Some(plan) => {
+            let yaml = serde_yaml::to_string(&plan)?;
+            Ok(format!(
+                "📋 {}\n\n```yaml\n{}```",
+                plan.status_summary(),
+                yaml
+            ))
+        }
+        None => Ok("📋 No plan exists for this session. Use plan_write to create one.".to_string()),
+    }
+}
+
+/// Execute the `plan_write` tool.
+pub async fn execute_plan_write<W: UiWriter>(
+    tool_call: &ToolCall,
+    ctx: &mut ToolContext<'_, W>,
+) -> Result<String> {
+    debug!("Processing plan_write tool call");
+
+    let session_id = match ctx.session_id {
+        Some(id) => id,
+        None => return Ok("❌ No active session - plans are session-scoped.".to_string()),
+    };
+
+    // Get the plan content from args
+    let plan_yaml = match tool_call.args.get("plan").and_then(|v| v.as_str()) {
+        Some(p) => p,
+        None => return Ok("❌ Missing 'plan' argument. Provide the plan as YAML.".to_string()),
+    };
+
+    // Parse the YAML
+    let mut plan: Plan = match serde_yaml::from_str(plan_yaml) {
+        Ok(p) => p,
+        Err(e) => return Ok(format!("❌ Invalid plan YAML: {}", e)),
+    };
+
+    // Load existing plan to preserve approved_revision and increment revision
+    if let Some(existing) = read_plan(session_id)? {
+        // Preserve approved_revision from existing plan
+        plan.approved_revision = existing.approved_revision;
+        // Increment revision
+        plan.revision = existing.revision + 1;
+
+        // If plan was approved, ensure checks are not removed
+        if existing.is_approved() {
+            // Verify all existing item IDs still exist
+            for existing_item in &existing.items {
+                if !plan.items.iter().any(|i| i.id == existing_item.id) {
+                    return Ok(format!(
+                        "❌ Cannot remove item '{}' from approved plan. Items can only be marked blocked, not removed.",
+                        existing_item.id
+                    ));
+                }
+            }
+        }
+    }
+
+    // Validate the plan
+    if let Err(e) = plan.validate() {
+        return Ok(format!("❌ Plan validation failed: {}", e));
+    }
+
+    // Write the plan
+    if let Err(e) = write_plan(session_id, &plan) {
+        return Ok(format!("❌ Failed to write plan: {}", e));
+    }
+
+    // Check if plan is now complete and trigger verification
+    if plan.is_complete() && plan.is_approved() {
+        plan_verify(&plan);
+    }
+
+    Ok(format!("✅ Plan updated: {}", plan.status_summary()))
+}
+
+/// Execute the `plan_approve` tool.
+pub async fn execute_plan_approve<W: UiWriter>(
+    _tool_call: &ToolCall,
+    ctx: &mut ToolContext<'_, W>,
+) -> Result<String> {
+    debug!("Processing plan_approve tool call");
+
+    let session_id = match ctx.session_id {
+        Some(id) => id,
+        None => return Ok("❌ No active session - plans are session-scoped.".to_string()),
+    };
+
+    // Load existing plan
+    let mut plan = match read_plan(session_id)? {
+        Some(p) => p,
+        None => return Ok("❌ No plan exists to approve. Use plan_write first.".to_string()),
+    };
+
+    if plan.is_approved() {
+        return Ok(format!(
+            "ℹ️ Plan already approved at revision {}. Current revision: {}",
+            plan.approved_revision.unwrap(),
+            plan.revision
+        ));
+    }
+
+    // Approve the plan
+    plan.approve();
+
+    // Write back
+    if let Err(e) = write_plan(session_id, &plan) {
+        return Ok(format!("❌ Failed to save approved plan: {}", e));
+    }
+
+    Ok(format!(
+        "✅ Plan approved at revision {}. You may now begin implementation.",
+        plan.revision
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_test_check() -> Check {
+        Check::new("Test description", "test::target")
+    }
+
+    fn make_test_checks() -> Checks {
+        Checks {
+            happy: make_test_check(),
+            negative: make_test_check(),
+            boundary: make_test_check(),
+        }
+    }
+
+    fn make_test_item(id: &str) -> PlanItem {
+        PlanItem::new(
+            id,
+            "Test item description",
+            vec!["src/test.rs".to_string()],
+            make_test_checks(),
+        )
+    }
+
+    #[test]
+    fn test_plan_state_display() {
+        assert_eq!(PlanState::Todo.to_string(), "todo");
+        assert_eq!(PlanState::Doing.to_string(), "doing");
+        assert_eq!(PlanState::Done.to_string(), "done");
+        assert_eq!(PlanState::Blocked.to_string(), "blocked");
+    }
+
+    #[test]
+    fn test_plan_state_from_str() {
+        assert_eq!("todo".parse::<PlanState>().unwrap(), PlanState::Todo);
+        assert_eq!("DOING".parse::<PlanState>().unwrap(), PlanState::Doing);
+        assert_eq!("Done".parse::<PlanState>().unwrap(), PlanState::Done);
+        assert!("invalid".parse::<PlanState>().is_err());
+    }
+
+    #[test]
+    fn test_check_validation() {
+        let valid = Check::new("desc", "target");
+        assert!(valid.validate().is_ok());
+
+        let empty_desc = Check::new("", "target");
+        assert!(empty_desc.validate().is_err());
+
+        let empty_target = Check::new("desc", "");
+        assert!(empty_target.validate().is_err());
+    }
+
+    #[test]
+    fn test_plan_item_validation() {
+        let item = make_test_item("I1");
+        assert!(item.validate().is_ok());
+
+        // Done item without evidence should fail
+        let mut done_item = make_test_item("I2");
+        done_item.state = PlanState::Done;
+        assert!(done_item.validate().is_err());
+
+        // Done item with evidence but no notes should fail
+        done_item.evidence = vec!["src/test.rs:42".to_string()];
+        assert!(done_item.validate().is_err());
+
+        // Done item with evidence and notes should pass
+        done_item.notes = Some("Implementation notes".to_string());
+        assert!(done_item.validate().is_ok());
+    }
+
+    #[test]
+    fn test_plan_validation() {
+        let mut plan = Plan::new("test-plan");
+        
+        // Empty plan should fail
+        assert!(plan.validate().is_err());
+
+        // Plan with item should pass
+        plan.items.push(make_test_item("I1"));
+        assert!(plan.validate().is_ok());
+
+        // Duplicate IDs should fail
+        plan.items.push(make_test_item("I1"));
+        assert!(plan.validate().is_err());
+    }
+
+    #[test]
+    fn test_plan_is_complete() {
+        let mut plan = Plan::new("test");
+        plan.items.push(make_test_item("I1"));
+        plan.items.push(make_test_item("I2"));
+
+        assert!(!plan.is_complete());
+
+        plan.items[0].state = PlanState::Done;
+        plan.items[0].evidence = vec!["test".to_string()];
+        plan.items[0].notes = Some("notes".to_string());
+        assert!(!plan.is_complete());
+
+        plan.items[1].state = PlanState::Blocked;
+        assert!(plan.is_complete());
+    }
+
+    #[test]
+    fn test_plan_approval() {
+        let mut plan = Plan::new("test");
+        plan.items.push(make_test_item("I1"));
+
+        assert!(!plan.is_approved());
+        assert_eq!(plan.approved_revision, None);
+
+        plan.approve();
+        assert!(plan.is_approved());
+        assert_eq!(plan.approved_revision, Some(1));
+    }
+
+    #[test]
+    fn test_yaml_extraction() {
+        let md = r#"# Plan: test
+
+**Status**: ...
+
+## Plan Data
+
+```yaml
+plan_id: test
+revision: 1
+items: []
+```
+"#;
+
+        let yaml = extract_yaml_from_markdown(md).unwrap();
+        assert!(yaml.contains("plan_id: test"));
+    }
+
+    #[test]
+    fn test_plan_serialization_roundtrip() {
+        let mut plan = Plan::new("test-plan");
+        plan.items.push(make_test_item("I1"));
+        plan.approve();
+
+        let yaml = serde_yaml::to_string(&plan).unwrap();
+        let parsed: Plan = serde_yaml::from_str(&yaml).unwrap();
+
+        assert_eq!(parsed.plan_id, plan.plan_id);
+        assert_eq!(parsed.revision, plan.revision);
+        assert_eq!(parsed.approved_revision, plan.approved_revision);
+        assert_eq!(parsed.items.len(), plan.items.len());
+    }
+}
diff --git a/crates/g3-core/src/tools/todo.rs b/crates/g3-core/src/tools/todo.rs
deleted file mode 100644
index e65ff83..0000000
--- a/crates/g3-core/src/tools/todo.rs
+++ /dev/null
@@ -1,187 +0,0 @@
-//! TODO list management tools.
-
-use anyhow::Result;
-use std::io::Write;
-use tracing::debug;
-
-use crate::ui_writer::UiWriter;
-use crate::ToolCall;
-
-use super::executor::ToolContext;
-
-/// Execute the `todo_read` tool.
-pub async fn execute_todo_read<W: UiWriter>(
-    tool_call: &ToolCall,
-    ctx: &mut ToolContext<'_, W>,
-) -> Result<String> {
-    debug!("Processing todo_read tool call");
-    let _ = tool_call; // unused but kept for consistency
-    
-    let todo_path = ctx.get_todo_path();
-
-    if !todo_path.exists() {
-        // Also update in-memory content to stay in sync
-        let mut todo = ctx.todo_content.write().await;
-        *todo = String::new();
-        ctx.ui_writer.print_todo_compact(None, false);
-        return Ok("📝 TODO list is empty (no todo.g3.md file found)".to_string());
-    }
-
-    match std::fs::read_to_string(&todo_path) {
-        Ok(content) => {
-            // Update in-memory content to stay in sync
-            let mut todo = ctx.todo_content.write().await;
-            *todo = content.clone();
-
-            // Check for staleness if enabled and we have a requirements SHA
-            if ctx.config.agent.check_todo_staleness {
-                if let Some(req_sha) = ctx.requirements_sha {
-                    if let Some(staleness_result) = check_todo_staleness(&content, req_sha, ctx.ui_writer) {
-                        return Ok(staleness_result);
-                    }
-                }
-            }
-
-            if content.trim().is_empty() {
-                ctx.ui_writer.print_todo_compact(None, false);
-                Ok("📝 TODO list is empty".to_string())
-            } else {
-                ctx.ui_writer.print_todo_compact(Some(&content), false);
-                Ok(format!("📝 TODO list:\n{}", content))
-            }
-        }
-        Err(e) => Ok(format!("❌ Failed to read TODO.md: {}", e)),
-    }
-}
-
-/// Execute the `todo_write` tool.
-pub async fn execute_todo_write<W: UiWriter>(
-    tool_call: &ToolCall,
-    ctx: &mut ToolContext<'_, W>,
-) -> Result<String> {
-    debug!("Processing todo_write tool call");
-    
-    let content_str = match tool_call.args.get("content").and_then(|v| v.as_str()) {
-        Some(c) => c,
-        None => return Ok("❌ Missing content argument".to_string()),
-    };
-
-    let char_count = content_str.chars().count();
-    let max_chars = std::env::var("G3_TODO_MAX_CHARS")
-        .ok()
-        .and_then(|s| s.parse().ok())
-        .unwrap_or(50_000);
-
-    if max_chars > 0 && char_count > max_chars {
-        return Ok(format!(
-            "❌ TODO list too large: {} chars (max: {})",
-            char_count, max_chars
-        ));
-    }
-
-    // Check if all todos are completed (all checkboxes are checked)
-    let has_incomplete = content_str
-        .lines()
-        .any(|line| line.trim().starts_with("- [ ]"));
-
-    // If all todos are complete, delete the file instead of writing
-    // EXCEPT in planner mode (G3_TODO_PATH is set) - preserve for rename to completed_todo_*.md
-    let in_planner_mode = std::env::var("G3_TODO_PATH").is_ok();
-    let todo_path = ctx.get_todo_path();
-
-    if !in_planner_mode
-        && !has_incomplete
-        && (content_str.contains("- [x]") || content_str.contains("- [X]"))
-        && todo_path.exists()
-    {
-        match std::fs::remove_file(&todo_path) {
-            Ok(_) => {
-                let mut todo = ctx.todo_content.write().await;
-                *todo = String::new();
-                // Show the final completed TODOs
-                ctx.ui_writer.print_todo_compact(Some(content_str), true);
-                let mut result = String::from("✅ All TODOs completed! Removed todo.g3.md\n\nFinal status:\n");
-                result.push_str(content_str);
-                return Ok(result);
-            }
-            Err(e) => return Ok(format!("❌ Failed to remove todo.g3.md: {}", e)),
-        }
-    }
-
-    match std::fs::write(&todo_path, content_str) {
-        Ok(_) => {
-            // Also update in-memory content to stay in sync
-            let mut todo = ctx.todo_content.write().await;
-            *todo = content_str.to_string();
-            ctx.ui_writer.print_todo_compact(Some(content_str), true);
-            Ok(format!(
-                "✅ TODO list updated ({} chars) and saved to todo.g3.md:\n{}",
-                char_count, content_str
-            ))
-        }
-        Err(e) => Ok(format!("❌ Failed to write todo.g3.md: {}", e)),
-    }
-}
-
-/// Check if the TODO list is stale (generated from a different requirements file).
-/// Returns Some(message) if staleness was detected and handled, None otherwise.
-fn check_todo_staleness<W: UiWriter>(
-    content: &str,
-    req_sha: &str,
-    ui_writer: &W,
-) -> Option<String> {
-    // Parse the first line for the SHA header
-    let first_line = content.lines().next()?;
-    
-    if !first_line.starts_with("{{Based on the requirements file with SHA256:") {
-        return None;
-    }
-
-    let parts: Vec<&str> = first_line.split("SHA256:").collect();
-    if parts.len() <= 1 {
-        return None;
-    }
-
-    let todo_sha = parts[1].trim().trim_end_matches("}}").trim();
-    if todo_sha == req_sha {
-        return None;
-    }
-
-    let warning = format!(
-        "⚠️ TODO list is stale! It was generated from a different requirements file.\nExpected SHA: {}\nFound SHA:    {}",
-        req_sha, todo_sha
-    );
-    ui_writer.print_context_status(&warning);
-
-    // Beep 6 times
-    print!("\x07\x07\x07\x07\x07\x07");
-    let _ = std::io::stdout().flush();
-
-    let options = [
-        "Ignore and Continue",
-        "Mark as Stale",
-        "Quit Application",
-    ];
-    let choice = ui_writer.prompt_user_choice(
-        "Requirements have changed! What would you like to do?",
-        &options,
-    );
-
-    match choice {
-        0 => {
-            // Ignore and Continue
-            ui_writer.print_context_status("⚠️ Ignoring staleness warning.");
-            None
-        }
-        1 => {
-            // Mark as Stale
-            Some("⚠️ TODO list is stale (requirements changed). Please regenerate the TODO list to match the new requirements.".to_string())
-        }
-        2 => {
-            // Quit Application
-            ui_writer.print_context_status("❌ Quitting application as requested.");
-            std::process::exit(0);
-        }
-        _ => None,
-    }
-}
diff --git a/crates/g3-core/tests/stream_completion_characterization_test.rs b/crates/g3-core/tests/stream_completion_characterization_test.rs
index 33be9b6..15192d3 100644
--- a/crates/g3-core/tests/stream_completion_characterization_test.rs
+++ b/crates/g3-core/tests/stream_completion_characterization_test.rs
@@ -589,36 +589,56 @@ mod tool_execution_integration {
         );
     }
 
-    /// CHARACTERIZATION: TODO tools work through agent
+    /// CHARACTERIZATION: Plan tools work through agent
     #[tokio::test]
     #[serial]
-    async fn todo_tools_work() {
+    async fn plan_tools_work() {
         let temp_dir = TempDir::new().unwrap();
         let mut agent = create_test_agent(&temp_dir).await;
 
-        // Write TODO
+        // Initialize session ID for plan tools (they are session-scoped)
+        agent.init_session_id_for_test("plan-tools-test");
+
+        // Write Plan
         let write_call = ToolCall {
-            tool: "todo_write".to_string(),
+            tool: "plan_write".to_string(),
             args: serde_json::json!({
-                "content": "- [ ] Test task\n- [x] Done task"
+                "plan": r#"plan_id: test-plan
+revision: 1
+items:
+  - id: I1
+    description: Test task
+    state: todo
+    touches:
+      - src/test.rs
+    checks:
+      happy:
+        desc: Works correctly
+        target: test::module
+      negative:
+        desc: Handles errors
+        target: test::module
+      boundary:
+        desc: Edge cases
+        target: test::module"#
             }),
         };
         let write_result = agent.execute_tool(&write_call).await.unwrap();
         assert!(
             write_result.contains("✅"),
-            "Write should succeed: {}",
+            "Plan write should succeed: {}",
             write_result
         );
 
-        // Read TODO
+        // Read Plan
         let read_call = ToolCall {
-            tool: "todo_read".to_string(),
+            tool: "plan_read".to_string(),
             args: serde_json::json!({}),
         };
         let read_result = agent.execute_tool(&read_call).await.unwrap();
         assert!(
-            read_result.contains("Test task"),
-            "Should read back TODO: {}",
+            read_result.contains("test-plan"),
+            "Should read back plan: {}",
             read_result
         );
     }
diff --git a/crates/g3-core/tests/test_todo_persistence.rs b/crates/g3-core/tests/test_todo_persistence.rs
deleted file mode 100644
index bca8d6b..0000000
--- a/crates/g3-core/tests/test_todo_persistence.rs
+++ /dev/null
@@ -1,388 +0,0 @@
-use g3_core::ui_writer::NullUiWriter;
-use g3_core::Agent;
-use serial_test::serial;
-use std::fs;
-use std::path::PathBuf;
-use tempfile::TempDir;
-
-/// Helper to create a test agent in a temporary directory
-async fn create_test_agent_in_dir(temp_dir: &TempDir) -> Agent<NullUiWriter> {
-    // Change to temp directory
-    std::env::set_current_dir(temp_dir.path()).unwrap();
-
-    // Create a minimal config
-    let config = g3_config::Config::default();
-    let ui_writer = NullUiWriter;
-
-    Agent::new(config, ui_writer).await.unwrap()
-}
-
-/// Helper to get todo.g3.md path in temp directory
-fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
-    temp_dir.path().join("todo.g3.md")
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_write_creates_file() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Initially, todo.g3.md should not exist
-    assert!(!todo_path.exists(), "todo.g3.md should not exist initially");
-
-    // Create a tool call to write TODO
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3"
-        }),
-    };
-
-    // Execute the tool
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    // Should report success
-    assert!(result.contains("✅"), "Should report success: {}", result);
-    assert!(
-        result.contains("todo.g3.md"),
-        "Should mention todo.g3.md: {}",
-        result
-    );
-
-    // File should now exist
-    assert!(todo_path.exists(), "todo.g3.md should exist after write");
-
-    // File should contain the correct content
-    let content = fs::read_to_string(&todo_path).unwrap();
-    assert_eq!(content, "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3");
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_read_from_file() {
-    let temp_dir = TempDir::new().unwrap();
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Pre-create a todo.g3.md file
-    let test_content = "# My TODO\n\n- [ ] First task\n- [x] Completed task";
-    fs::write(&todo_path, test_content).unwrap();
-
-    // Create agent (should load from file)
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-
-    // Create a tool call to read TODO
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-
-    // Execute the tool
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    // Should contain the TODO content
-    assert!(
-        result.contains("📝 TODO list:"),
-        "Should have TODO list header: {}",
-        result
-    );
-    assert!(
-        result.contains("First task"),
-        "Should contain first task: {}",
-        result
-    );
-    assert!(
-        result.contains("Completed task"),
-        "Should contain completed task: {}",
-        result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_read_empty_file() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-
-    // Create a tool call to read TODO (file doesn't exist)
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-
-    // Execute the tool
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    // Should report empty
-    assert!(result.contains("empty"), "Should report empty: {}", result);
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_persistence_across_agents() {
-    let temp_dir = TempDir::new().unwrap();
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Agent 1: Write TODO
-    {
-        let mut agent = create_test_agent_in_dir(&temp_dir).await;
-        let tool_call = g3_core::ToolCall {
-            tool: "todo_write".to_string(),
-            args: serde_json::json!({
-                "content": "- [ ] Persistent task\n- [x] Done task"
-            }),
-        };
-        agent.execute_tool(&tool_call).await.unwrap();
-    }
-
-    // Verify file exists
-    assert!(
-        todo_path.exists(),
-        "todo.g3.md should persist after agent drops"
-    );
-
-    // Agent 2: Read TODO (new agent instance)
-    {
-        let mut agent = create_test_agent_in_dir(&temp_dir).await;
-        let tool_call = g3_core::ToolCall {
-            tool: "todo_read".to_string(),
-            args: serde_json::json!({}),
-        };
-        let result = agent.execute_tool(&tool_call).await.unwrap();
-
-        // Should read the persisted content
-        assert!(
-            result.contains("Persistent task"),
-            "Should read persisted task: {}",
-            result
-        );
-        assert!(
-            result.contains("Done task"),
-            "Should read done task: {}",
-            result
-        );
-    }
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_update_preserves_file() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Write initial TODO
-    let write_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": "- [ ] Task 1\n- [ ] Task 2"
-        }),
-    };
-    agent.execute_tool(&write_call).await.unwrap();
-
-    // Update TODO
-    let update_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3"
-        }),
-    };
-    agent.execute_tool(&update_call).await.unwrap();
-
-    // Verify file has updated content
-    let content = fs::read_to_string(&todo_path).unwrap();
-    assert_eq!(content, "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3");
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_handles_large_content() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Create a large TODO (but under the 50k limit)
-    let mut large_content = String::from("# Large TODO\n\n");
-    for i in 0..100 {
-        large_content.push_str(&format!(
-            "- [ ] Task {} with a long description that exceeds normal line lengths\n",
-            i
-        ));
-    }
-
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": large_content
-        }),
-    };
-
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-    assert!(
-        result.contains("✅"),
-        "Should handle large content: {}",
-        result
-    );
-
-    // Verify file contains all content
-    let file_content = fs::read_to_string(&todo_path).unwrap();
-    assert_eq!(file_content, large_content);
-    assert!(file_content.contains("Task 99"), "Should contain all tasks");
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_respects_size_limit() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-
-    // Create content that exceeds the default 50k limit
-    let huge_content = "x".repeat(60_000);
-
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": huge_content
-        }),
-    };
-
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    // Should reject content that's too large
-    assert!(
-        result.contains("❌"),
-        "Should reject oversized content: {}",
-        result
-    );
-    assert!(
-        result.contains("too large"),
-        "Should mention size limit: {}",
-        result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_agent_initialization_loads_file() {
-    let temp_dir = TempDir::new().unwrap();
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Pre-create todo.g3.md before agent initialization
-    let initial_content = "- [ ] Pre-existing task";
-    fs::write(&todo_path, initial_content).unwrap();
-
-    // Create agent - should load the file during initialization
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-
-    // Read TODO - should return the pre-existing content
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-    assert!(
-        result.contains("Pre-existing task"),
-        "Should load file on init: {}",
-        result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_handles_unicode_content() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Create TODO with unicode characters
-    let unicode_content = "- [ ] 日本語タスク\n- [ ] Émoji task 🚀\n- [x] Ελληνικά task";
-
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": unicode_content
-        }),
-    };
-
-    agent.execute_tool(&tool_call).await.unwrap();
-
-    // Verify file preserves unicode
-    let file_content = fs::read_to_string(&todo_path).unwrap();
-    assert_eq!(file_content, unicode_content);
-
-    // Verify reading back works
-    let read_call = g3_core::ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-
-    let result = agent.execute_tool(&read_call).await.unwrap();
-    assert!(
-        result.contains("日本語"),
-        "Should preserve Japanese: {}",
-        result
-    );
-    assert!(result.contains("🚀"), "Should preserve emoji: {}", result);
-    assert!(
-        result.contains("Ελληνικά"),
-        "Should preserve Greek: {}",
-        result
-    );
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_empty_content_creates_empty_file() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-    let todo_path = get_todo_path(&temp_dir);
-
-    // Write empty TODO
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": ""
-        }),
-    };
-
-    agent.execute_tool(&tool_call).await.unwrap();
-
-    // File should exist but be empty
-    assert!(todo_path.exists(), "Empty todo.g3.md should create file");
-    let content = fs::read_to_string(&todo_path).unwrap();
-    assert_eq!(content, "");
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_whitespace_only_content() {
-    let temp_dir = TempDir::new().unwrap();
-    let mut agent = create_test_agent_in_dir(&temp_dir).await;
-
-    // Write whitespace-only TODO
-    let tool_call = g3_core::ToolCall {
-        tool: "todo_write".to_string(),
-        args: serde_json::json!({
-            "content": "   \n\n  \t  \n"
-        }),
-    };
-
-    agent.execute_tool(&tool_call).await.unwrap();
-
-    // Read it back
-    let read_call = g3_core::ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-
-    let result = agent.execute_tool(&read_call).await.unwrap();
-
-    // Should report as empty (whitespace is trimmed)
-    assert!(
-        result.contains("empty"),
-        "Whitespace-only should be empty: {}",
-        result
-    );
-}
diff --git a/crates/g3-core/tests/todo_staleness_test.rs b/crates/g3-core/tests/todo_staleness_test.rs
deleted file mode 100644
index cbaf714..0000000
--- a/crates/g3-core/tests/todo_staleness_test.rs
+++ /dev/null
@@ -1,223 +0,0 @@
-use g3_config::Config;
-use g3_core::ui_writer::UiWriter;
-use g3_core::{Agent, ToolCall};
-use serial_test::serial;
-use std::sync::{Arc, Mutex};
-use tempfile::TempDir;
-
-// Mock UI Writer for testing
-#[derive(Clone)]
-struct MockUiWriter {
-    output: Arc<Mutex<Vec<String>>>,
-    prompt_responses: Arc<Mutex<Vec<bool>>>,
-    choice_responses: Arc<Mutex<Vec<usize>>>,
-}
-
-impl MockUiWriter {
-    fn new() -> Self {
-        Self {
-            output: Arc::new(Mutex::new(Vec::new())),
-            prompt_responses: Arc::new(Mutex::new(Vec::new())),
-            choice_responses: Arc::new(Mutex::new(Vec::new())),
-        }
-    }
-
-    #[allow(dead_code)]
-    fn set_prompt_response(&self, response: bool) {
-        self.prompt_responses.lock().unwrap().push(response);
-    }
-
-    #[allow(dead_code)]
-    fn set_choice_response(&self, response: usize) {
-        self.choice_responses.lock().unwrap().push(response);
-    }
-
-    #[allow(dead_code)]
-    fn get_output(&self) -> Vec<String> {
-        self.output.lock().unwrap().clone()
-    }
-}
-
-impl UiWriter for MockUiWriter {
-    fn print(&self, message: &str) {
-        self.output.lock().unwrap().push(message.to_string());
-    }
-    fn println(&self, message: &str) {
-        self.output.lock().unwrap().push(message.to_string());
-    }
-    fn print_inline(&self, message: &str) {
-        self.output.lock().unwrap().push(message.to_string());
-    }
-    fn print_system_prompt(&self, _prompt: &str) {}
-    fn print_context_status(&self, message: &str) {
-        self.output
-            .lock()
-            .unwrap()
-            .push(format!("STATUS: {}", message));
-    }
-    fn print_g3_progress(&self, _message: &str) {}
-    fn print_g3_status(&self, _message: &str, _status: &str) {}
-    fn print_thin_result(&self, _result: &g3_core::ThinResult) {}
-    fn print_tool_header(&self, _tool_name: &str, _tool_args: Option<&serde_json::Value>) {}
-    fn print_tool_arg(&self, _key: &str, _value: &str) {}
-    fn print_tool_output_header(&self) {}
-    fn update_tool_output_line(&self, _line: &str) {}
-    fn print_tool_output_line(&self, _line: &str) {}
-    fn print_tool_output_summary(&self, _hidden_count: usize) {}
-    fn print_tool_timing(&self, _duration_str: &str, _tokens_delta: u32, _context_percentage: f32) {}
-    fn print_agent_prompt(&self) {}
-    fn print_agent_response(&self, _content: &str) {}
-    fn notify_sse_received(&self) {}
-    fn flush(&self) {}
-    fn wants_full_output(&self) -> bool {
-        false
-    }
-    fn prompt_user_yes_no(&self, message: &str) -> bool {
-        self.output
-            .lock()
-            .unwrap()
-            .push(format!("PROMPT: {}", message));
-        self.prompt_responses.lock().unwrap().pop().unwrap_or(true)
-    }
-    fn prompt_user_choice(&self, message: &str, options: &[&str]) -> usize {
-        self.output
-            .lock()
-            .unwrap()
-            .push(format!("CHOICE: {} Options: {:?}", message, options));
-        self.choice_responses.lock().unwrap().pop().unwrap_or(0)
-    }
-    fn print_tool_streaming_hint(&self, _tool_name: &str) {}
-    fn print_tool_streaming_active(&self) {}
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_staleness_check_matching_sha() {
-    let temp_dir = TempDir::new().unwrap();
-    let todo_path = temp_dir.path().join("todo.g3.md");
-    std::env::set_current_dir(&temp_dir).unwrap();
-
-    let sha = "abc123hash";
-    let content = format!(
-        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
-        sha
-    );
-    std::fs::write(&todo_path, content).unwrap();
-
-    let mut config = Config::default();
-    config.agent.check_todo_staleness = true;
-
-    let ui_writer = MockUiWriter::new();
-    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
-    agent.set_requirements_sha(sha.to_string());
-
-    let tool_call = ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    assert!(result.contains("📝 TODO list:"));
-    assert!(!result.contains("⚠️ TODO list is stale"));
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_staleness_check_mismatch_sha_ignore() {
-    let temp_dir = TempDir::new().unwrap();
-    let todo_path = temp_dir.path().join("todo.g3.md");
-    std::env::set_current_dir(&temp_dir).unwrap();
-
-    let sha_file = "old_sha";
-    let sha_req = "new_sha";
-    let content = format!(
-        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
-        sha_file
-    );
-    std::fs::write(&todo_path, content).unwrap();
-
-    let mut config = Config::default();
-    config.agent.check_todo_staleness = true;
-
-    let ui_writer = MockUiWriter::new();
-    ui_writer.set_choice_response(0); // Ignore
-
-    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
-    agent.set_requirements_sha(sha_req.to_string());
-
-    let tool_call = ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    assert!(result.contains("📝 TODO list:"));
-}
-
-#[tokio::test]
-#[serial]
-async fn test_todo_staleness_check_mismatch_sha_mark_stale() {
-    let temp_dir = TempDir::new().unwrap();
-    let todo_path = temp_dir.path().join("todo.g3.md");
-    std::env::set_current_dir(&temp_dir).unwrap();
-
-    let sha_file = "old_sha";
-    let sha_req = "new_sha";
-    let content = format!(
-        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
-        sha_file
-    );
-    std::fs::write(&todo_path, content).unwrap();
-
-    let mut config = Config::default();
-    config.agent.check_todo_staleness = true;
-
-    let ui_writer = MockUiWriter::new();
-    ui_writer.set_choice_response(1); // Mark as Stale
-
-    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
-    agent.set_requirements_sha(sha_req.to_string());
-
-    let tool_call = ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    assert!(result.contains("⚠️ TODO list is stale"));
-    assert!(result.contains("Please regenerate"));
-}
-
-// Note: We cannot easily test "Quit" (index 2) because it calls std::process::exit(0)
-// which would kill the test runner. We skip that test case here.
-
-#[tokio::test]
-#[serial]
-async fn test_todo_staleness_check_disabled() {
-    let temp_dir = TempDir::new().unwrap();
-    let todo_path = temp_dir.path().join("todo.g3.md");
-    std::env::set_current_dir(&temp_dir).unwrap();
-
-    let sha_file = "old_sha";
-    let sha_req = "new_sha";
-    let content = format!(
-        "{{{{Based on the requirements file with SHA256: {}}}}}\n- [ ] Task 1",
-        sha_file
-    );
-    std::fs::write(&todo_path, content).unwrap();
-
-    let mut config = Config::default();
-    config.agent.check_todo_staleness = false;
-
-    let ui_writer = MockUiWriter::new();
-    let mut agent = Agent::new_autonomous(config, ui_writer).await.unwrap();
-    agent.set_requirements_sha(sha_req.to_string());
-
-    let tool_call = ToolCall {
-        tool: "todo_read".to_string(),
-        args: serde_json::json!({}),
-    };
-    let result = agent.execute_tool(&tool_call).await.unwrap();
-
-    assert!(result.contains("📝 TODO list:"));
-}
diff --git a/crates/g3-core/tests/tool_execution_roundtrip_test.rs b/crates/g3-core/tests/tool_execution_roundtrip_test.rs
index c4cf30a..3bfeac4 100644
--- a/crates/g3-core/tests/tool_execution_roundtrip_test.rs
+++ b/crates/g3-core/tests/tool_execution_roundtrip_test.rs
@@ -393,21 +393,38 @@ mod str_replace_execution {
 // Test: TODO tool execution
 // =============================================================================
 
-mod todo_execution {
+mod plan_execution {
     use super::*;
 
-    /// Test writing and reading TODO
+    /// Test writing and reading Plan
     #[tokio::test]
     #[serial]
-    async fn test_todo_write_and_read() {
+    async fn test_plan_write_and_read() {
         let temp_dir = TempDir::new().unwrap();
         let mut agent = create_test_agent(&temp_dir).await;
+        agent.init_session_id_for_test("plan-test");
         
-        // Write TODO
+        // Write Plan
         let write_call = make_tool_call(
-            "todo_write",
+            "plan_write",
             serde_json::json!({
-                "content": "- [ ] Task 1\n- [x] Task 2\n- [ ] Task 3"
+                "plan": r#"plan_id: test-plan
+revision: 1
+items:
+  - id: I1
+    description: Task 1
+    state: todo
+    touches: ["src/test.rs"]
+    checks:
+      happy:
+        desc: Works
+        target: test
+      negative:
+        desc: Errors
+        target: test
+      boundary:
+        desc: Edge
+        target: test"#
             }),
         );
         
@@ -415,52 +432,61 @@ mod todo_execution {
         assert!(write_result.contains("✅") || write_result.to_lowercase().contains("success"),
             "Write should succeed: {}", write_result);
         
-        // Read TODO
-        let read_call = make_tool_call("todo_read", serde_json::json!({}));
+        // Read Plan
+        let read_call = make_tool_call("plan_read", serde_json::json!({}));
         let read_result = agent.execute_tool(&read_call).await.unwrap();
         
+        assert!(read_result.contains("test-plan"), "Should contain plan id: {}", read_result);
         assert!(read_result.contains("Task 1"), "Should contain Task 1: {}", read_result);
-        assert!(read_result.contains("Task 2"), "Should contain Task 2: {}", read_result);
-        assert!(read_result.contains("Task 3"), "Should contain Task 3: {}", read_result);
     }
 
-    /// Test reading empty TODO
+    /// Test reading empty Plan
     #[tokio::test]
     #[serial]
-    async fn test_todo_read_empty() {
+    async fn test_plan_read_empty() {
         let temp_dir = TempDir::new().unwrap();
         let mut agent = create_test_agent(&temp_dir).await;
+        agent.init_session_id_for_test("plan-empty-test");
         
-        let read_call = make_tool_call("todo_read", serde_json::json!({}));
+        let read_call = make_tool_call("plan_read", serde_json::json!({}));
         let result = agent.execute_tool(&read_call).await.unwrap();
         
-        assert!(result.to_lowercase().contains("empty") || result.contains("no todo"),
+        assert!(result.contains("No plan") || result.to_lowercase().contains("no plan"),
             "Should indicate empty: {}", result);
     }
 
-    /// Test TODO persists to file
+    /// Test Plan approval
     #[tokio::test]
     #[serial]
-    async fn test_todo_persists_to_file() {
+    async fn test_plan_approve() {
         let temp_dir = TempDir::new().unwrap();
-        let todo_path = temp_dir.path().join("todo.g3.md");
+        let mut agent = create_test_agent(&temp_dir).await;
+        agent.init_session_id_for_test("plan-approve-test");
         
-        {
-            let mut agent = create_test_agent(&temp_dir).await;
-            
-            let write_call = make_tool_call(
-                "todo_write",
-                serde_json::json!({
-                    "content": "- [ ] Persistent task"
-                }),
-            );
-            
-            agent.execute_tool(&write_call).await.unwrap();
-        }
+        // First write a plan
+        let write_call = make_tool_call(
+            "plan_write",
+            serde_json::json!({
+                "plan": r#"plan_id: approve-test
+revision: 1
+items:
+  - id: I1
+    description: Test task
+    state: todo
+    touches: ["src/test.rs"]
+    checks:
+      happy: {desc: Works, target: test}
+      negative: {desc: Errors, target: test}
+      boundary: {desc: Edge, target: test}"#
+            }),
+        );
+        agent.execute_tool(&write_call).await.unwrap();
         
-        // File should exist after agent is dropped
-        assert!(todo_path.exists(), "TODO file should persist");
-        let content = fs::read_to_string(&todo_path).unwrap();
-        assert!(content.contains("Persistent task"), "Content should persist: {}", content);
+        // Approve the plan
+        let approve_call = make_tool_call("plan_approve", serde_json::json!({}));
+        let result = agent.execute_tool(&approve_call).await.unwrap();
+        
+        assert!(result.contains("✅") && result.contains("approved"),
+            "Should approve plan: {}", result);
     }
 }