From 8eda691cb1f170029e0b9da7533bddb77c580410 Mon Sep 17 00:00:00 2001
From: "Dhanji R. Prasanna" <d@wideplay.com>
Date: Thu, 6 Nov 2025 15:24:57 +1100
Subject: [PATCH] todo persistence

---
 Cargo.lock                                    |  42 +++
 crates/g3-core/Cargo.toml                     |   4 +
 crates/g3-core/src/lib.rs                     |  66 +++-
 .../tests/test_todo_context_thinning.rs       | 214 +++++++++++
 crates/g3-core/tests/test_todo_persistence.rs | 331 ++++++++++++++++++
 5 files changed, 643 insertions(+), 14 deletions(-)
 create mode 100644 crates/g3-core/tests/test_todo_context_thinning.rs
 create mode 100644 crates/g3-core/tests/test_todo_persistence.rs
diff --git a/Cargo.lock b/Cargo.lock
index 98e726e..776b4ef 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1392,8 +1392,10 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
+ "serial_test",
  "shellexpand",
  "streaming-iterator",
+ "tempfile",
  "thiserror 1.0.69",
  "tokio",
  "tokio-stream",
@@ -2973,6 +2975,15 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "scc"
+version = "2.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46e6f046b7fef48e2660c57ed794263155d713de679057f2d0c169bfc6e756cc"
+dependencies = [
+ "sdd",
+]
+
 [[package]]
 name = "schannel"
 version = "0.1.28"
@@ -2988,6 +2999,12 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
+[[package]]
+name = "sdd"
+version = "3.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "490dcfcbfef26be6800d11870ff2df8774fa6e86d047e3e8c8a76b25655e41ca"
+
 [[package]]
 name = "security-framework"
 version = "2.11.1"
@@ -3105,6 +3122,31 @@ dependencies = [
  "unsafe-libyaml",
 ]
 
+[[package]]
+name = "serial_test"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9"
+dependencies = [
+ "futures",
+ "log",
+ "once_cell",
+ "parking_lot",
+ "scc",
+ "serial_test_derive",
+]
+
+[[package]]
+name = "serial_test_derive"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "sha2"
 version = "0.10.9"
diff --git a/crates/g3-core/Cargo.toml b/crates/g3-core/Cargo.toml
index 994442a..f9935f0 100644
--- a/crates/g3-core/Cargo.toml
+++ b/crates/g3-core/Cargo.toml
@@ -42,3 +42,7 @@ tree-sitter-haskell = { git = "https://github.com/tree-sitter/tree-sitter-haskel
 tree-sitter-scheme = "0.24"
 streaming-iterator = "0.1"
 walkdir = "2.4"
+
+[dev-dependencies]
+tempfile = "3.8"
+serial_test = "3.0"
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index afcd3fe..e425769 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -917,7 +917,18 @@ impl<W: UiWriter> Agent<W> {
             session_id: None,
             tool_call_metrics: Vec::new(),
             ui_writer,
-            todo_content: std::sync::Arc::new(tokio::sync::RwLock::new(String::new())),
+            todo_content: std::sync::Arc::new(tokio::sync::RwLock::new({
+                // Initialize from TODO.md file if it exists
+                let todo_path = std::env::current_dir()
+                    .ok()
+                    .map(|p| p.join("todo.g3.md"));
+                
+                if let Some(path) = todo_path {
+                    std::fs::read_to_string(&path).unwrap_or_default()
+                } else {
+                    String::new()
+                }
+            })),
             is_autonomous,
             quiet,
             computer_controller,
@@ -1121,7 +1132,7 @@ Every multi-step task follows this pattern:
 2. **During**: Execute steps, then todo_read and todo_write to mark progress
 3. **End**: Call todo_read to verify all items complete
 
-Note: todo_write replaces the entire list, so always read first to preserve content.
+Note: todo_write replaces the entire todo.g3.md file, so always read first to preserve content. TODO lists persist across g3 sessions in the workspace directory.
 
 ## Examples
 
@@ -1270,11 +1281,11 @@ Short description for providers without native calling specs:
 - **final_output**: Signal task completion with a detailed summary of work done in markdown format
   - Format: {\"tool\": \"final_output\", \"args\": {\"summary\": \"what_was_accomplished\"}
 
-- **todo_read**: Read the entire TODO list content
+- **todo_read**: Read the entire TODO list from todo.g3.md file in workspace directory
   - Format: {\"tool\": \"todo_read\", \"args\": {}}
   - Example: {\"tool\": \"todo_read\", \"args\": {}}
 
-- **todo_write**: Write or overwrite the entire TODO list (WARNING: overwrites completely, always read first)
+- **todo_write**: Write or overwrite the entire todo.g3.md file (WARNING: overwrites completely, always read first)
   - Format: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Task 1\\n- [ ] Task 2\"}}
   - Example: {\"tool\": \"todo_write\", \"args\": {\"content\": \"- [ ] Implement feature\\n  - [ ] Write tests\\n  - [ ] Run tests\"}}
 
@@ -2029,7 +2040,7 @@ If you can complete it with 1-2 tool calls, skip TODO.
             },
             Tool {
                 name: "todo_read".to_string(),
-                description: "Read your current TODO list to see what tasks are planned and their status. Call this at the start of multi-step tasks to check for existing plans, and during execution to review progress before updating.".to_string(),
+                description: "Read your current TODO list from todo.g3.md file in the workspace directory. Shows what tasks are planned and their status. Call this at the start of multi-step tasks to check for existing plans, and during execution to review progress before updating. TODO lists persist across g3 sessions.".to_string(),
                 input_schema: json!({
                     "type": "object",
                     "properties": {},
@@ -2038,7 +2049,7 @@ If you can complete it with 1-2 tool calls, skip TODO.
             },
             Tool {
                 name: "todo_write".to_string(),
-                description: "Create or update your TODO list with a complete task plan. Use markdown checkboxes: - [ ] for pending, - [x] for complete. This tool replaces the entire list, so always call todo_read first to preserve existing content. Essential for multi-step tasks.".to_string(),
+                description: "Create or update your TODO list in todo.g3.md file with a complete task plan. Use markdown checkboxes: - [ ] for pending, - [x] for complete. This tool replaces the entire file content, so always call todo_read first to preserve existing content. Essential for multi-step tasks. Changes persist across g3 sessions.".to_string(),
                 input_schema: json!({
                     "type": "object",
                     "properties": {
@@ -3394,7 +3405,7 @@ If you can complete it with 1-2 tool calls, skip TODO.
         Ok(TaskResult::new(final_response, self.context_window.clone()))
     }
 
-    async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
+    pub async fn execute_tool(&self, tool_call: &ToolCall) -> Result<String> {
         debug!("=== EXECUTING TOOL ===");
         debug!("Tool name: {}", tool_call.tool);
         debug!("Tool args (raw): {:?}", tool_call.args);
@@ -3922,11 +3933,29 @@ If you can complete it with 1-2 tool calls, skip TODO.
             }
             "todo_read" => {
                 debug!("Processing todo_read tool call");
-                let content = self.todo_content.read().await;
-                if content.is_empty() {
-                    Ok("📝 TODO list is empty".to_string())
+                // Read from todo.g3.md file in current workspace directory
+                let todo_path = std::env::current_dir()?.join("todo.g3.md");
+                
+                if !todo_path.exists() {
+                    // Also update in-memory content to stay in sync
+                    let mut todo = self.todo_content.write().await;
+                    *todo = String::new();
+                    Ok("📝 TODO list is empty (no todo.g3.md file found)".to_string())
                 } else {
-                    Ok(format!("📝 TODO list:\n{}", content.as_str()))
+                    match std::fs::read_to_string(&todo_path) {
+                        Ok(content) => {
+                            // Update in-memory content to stay in sync
+                            let mut todo = self.todo_content.write().await;
+                            *todo = content.clone();
+                            
+                            if content.trim().is_empty() {
+                                Ok("📝 TODO list is empty".to_string())
+                            } else {
+                                Ok(format!("📝 TODO list:\n{}", content))
+                            }
+                        }
+                        Err(e) => Ok(format!("❌ Failed to read TODO.md: {}", e)),
+                    }
                 }
             }
             "todo_write" => {
@@ -3943,9 +3972,18 @@ If you can complete it with 1-2 tool calls, skip TODO.
                             return Ok(format!("❌ TODO list too large: {} chars (max: {})", char_count, max_chars));
                         }
 
-                        let mut todo = self.todo_content.write().await;
-                        *todo = content_str.to_string();
-                        Ok(format!("✅ TODO list updated ({} chars)", char_count))
+                        // Write to todo.g3.md file in current workspace directory
+                        let todo_path = std::env::current_dir()?.join("todo.g3.md");
+                        
+                        match std::fs::write(&todo_path, content_str) {
+                            Ok(_) => {
+                                // Also update in-memory content to stay in sync
+                                let mut todo = self.todo_content.write().await;
+                                *todo = content_str.to_string();
+                                Ok(format!("✅ TODO list updated ({} chars) and saved to todo.g3.md", char_count))
+                            }
+                            Err(e) => Ok(format!("❌ Failed to write todo.g3.md: {}", e)),
+                        }
                     } else {
                         Ok("❌ Invalid content argument".to_string())
                     }
diff --git a/crates/g3-core/tests/test_todo_context_thinning.rs b/crates/g3-core/tests/test_todo_context_thinning.rs
new file mode 100644
index 0000000..016e3e6
--- /dev/null
+++ b/crates/g3-core/tests/test_todo_context_thinning.rs
@@ -0,0 +1,214 @@
+use g3_core::ContextWindow;
+use g3_providers::{Message, MessageRole};
+use serial_test::serial;
+
+#[test]
+#[serial]
+fn test_todo_read_results_not_thinned() {
+    let mut context = ContextWindow::new(10000);
+    
+    // Add a todo_read tool call
+    context.add_message(Message {
+        role: MessageRole::Assistant,
+        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
+    });
+    
+    // Add a large TODO result (> 500 chars)
+    let large_todo_result = format!(
+        "Tool result: 📝 TODO list:\n{}",
+        "- [ ] Task with long description\n".repeat(50)
+    );
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: large_todo_result.clone(),
+    });
+    
+    // Add more messages to ensure we have enough for "first third" logic
+    for i in 0..6 {
+        context.add_message(Message {
+            role: MessageRole::Assistant,
+            content: format!("Response {}", i),
+        });
+    }
+    
+    // Trigger thinning at 50%
+    context.used_tokens = 5000;
+    let (summary, _chars_saved) = context.thin_context();
+    
+    println!("Thinning summary: {}", summary);
+    
+    // Check that the TODO result was NOT thinned
+    let first_third_end = context.conversation_history.len() / 3;
+    for i in 0..first_third_end {
+        if let Some(msg) = context.conversation_history.get(i) {
+            if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
+                // TODO result should still be large (not thinned)
+                assert!(
+                    msg.content.len() > 500,
+                    "TODO result at index {} should not have been thinned. Content: {}",
+                    i,
+                    msg.content
+                );
+                assert!(
+                    msg.content.contains("📝 TODO list:"),
+                    "TODO result should still contain full content"
+                );
+            }
+        }
+    }
+}
+
+#[test]
+#[serial]
+fn test_todo_write_results_not_thinned() {
+    let mut context = ContextWindow::new(10000);
+    
+    // Add a todo_write tool call
+    let large_content = "- [ ] Task\n".repeat(100);
+    context.add_message(Message {
+        role: MessageRole::Assistant,
+        content: format!(r#"{{"tool": "todo_write", "args": {{"content": "{}"}}}}"#, large_content),
+    });
+    
+    // Add a large TODO write result
+    let large_todo_result = format!(
+        "Tool result: ✅ TODO list updated ({} chars) and saved to todo.g3.md",
+        large_content.len()
+    );
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: large_todo_result.clone(),
+    });
+    
+    // Add more messages
+    for i in 0..6 {
+        context.add_message(Message {
+            role: MessageRole::Assistant,
+            content: format!("Response {}", i),
+        });
+    }
+    
+    // Trigger thinning at 50%
+    context.used_tokens = 5000;
+    let (summary, _chars_saved) = context.thin_context();
+    
+    println!("Thinning summary: {}", summary);
+    
+    // Check that the TODO write result was NOT thinned
+    let first_third_end = context.conversation_history.len() / 3;
+    for i in 0..first_third_end {
+        if let Some(msg) = context.conversation_history.get(i) {
+            if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
+                // Should not be replaced with file reference
+                assert!(
+                    !msg.content.contains("Tool result saved to"),
+                    "TODO write result should not be thinned to file reference"
+                );
+                assert!(
+                    msg.content.contains("todo.g3.md"),
+                    "TODO write result should still contain todo.g3.md reference"
+                );
+            }
+        }
+    }
+}
+
+#[test]
+#[serial]
+fn test_non_todo_results_still_thinned() {
+    let mut context = ContextWindow::new(10000);
+    
+    // Add a non-TODO tool call (e.g., read_file)
+    context.add_message(Message {
+        role: MessageRole::Assistant,
+        content: r#"{"tool": "read_file", "args": {"file_path": "test.txt"}}"#.to_string(),
+    });
+    
+    // Add a large read_file result (> 500 chars)
+    let large_result = format!("Tool result: {}", "x".repeat(1500));
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: large_result,
+    });
+    
+    // Add more messages
+    for i in 0..6 {
+        context.add_message(Message {
+            role: MessageRole::Assistant,
+            content: format!("Response {}", i),
+        });
+    }
+    
+    // Trigger thinning at 50%
+    context.used_tokens = 5000;
+    let (summary, _chars_saved) = context.thin_context();
+    
+    println!("Thinning summary: {}", summary);
+    
+    // Should have thinned the non-TODO result
+    assert!(
+        summary.contains("1 tool result") || summary.contains("chars saved"),
+        "Non-TODO results should be thinned"
+    );
+    
+    // Check that the result was actually thinned
+    let first_third_end = context.conversation_history.len() / 3;
+    for i in 0..first_third_end {
+        if let Some(msg) = context.conversation_history.get(i) {
+            if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
+                // Should be replaced with file reference
+                assert!(
+                    msg.content.contains("Tool result saved to") || msg.content.len() < 1000,
+                    "Non-TODO result should have been thinned"
+                );
+            }
+        }
+    }
+}
+
+#[test]
+#[serial]
+fn test_todo_read_with_spaces_in_tool_name() {
+    let mut context = ContextWindow::new(10000);
+    
+    // Add a todo_read tool call with spaces (JSON formatting variation)
+    context.add_message(Message {
+        role: MessageRole::Assistant,
+        content: r#"{"tool": "todo_read", "args": {}}"#.to_string(),
+    });
+    
+    // Add a large TODO result
+    let large_todo_result = format!(
+        "Tool result: 📝 TODO list:\n{}",
+        "- [ ] Task\n".repeat(50)
+    );
+    context.add_message(Message {
+        role: MessageRole::User,
+        content: large_todo_result.clone(),
+    });
+    
+    // Add more messages
+    for i in 0..6 {
+        context.add_message(Message {
+            role: MessageRole::Assistant,
+            content: format!("Response {}", i),
+        });
+    }
+    
+    // Trigger thinning
+    context.used_tokens = 5000;
+    let (_summary, _chars_saved) = context.thin_context();
+    
+    // Verify TODO result was not thinned
+    let first_third_end = context.conversation_history.len() / 3;
+    for i in 0..first_third_end {
+        if let Some(msg) = context.conversation_history.get(i) {
+            if matches!(msg.role, MessageRole::User) && msg.content.starts_with("Tool result:") {
+                assert!(
+                    msg.content.len() > 500,
+                    "TODO result should not be thinned even with space in JSON"
+                );
+            }
+        }
+    }
+}
diff --git a/crates/g3-core/tests/test_todo_persistence.rs b/crates/g3-core/tests/test_todo_persistence.rs
new file mode 100644
index 0000000..f43eed3
--- /dev/null
+++ b/crates/g3-core/tests/test_todo_persistence.rs
@@ -0,0 +1,331 @@
+use g3_core::Agent;
+use g3_core::ui_writer::NullUiWriter;
+use serial_test::serial;
+use std::fs;
+use std::path::PathBuf;
+use tempfile::TempDir;
+
+
+/// Helper to create a test agent in a temporary directory
+async fn create_test_agent_in_dir(temp_dir: &TempDir) -> Agent<NullUiWriter> {
+    // Change to temp directory
+    std::env::set_current_dir(temp_dir.path()).unwrap();
+    
+    // Create a minimal config
+    let config = g3_config::Config::default();
+    let ui_writer = NullUiWriter;
+    
+    Agent::new(config, ui_writer).await.unwrap()
+}
+
+/// Helper to get todo.g3.md path in temp directory
+fn get_todo_path(temp_dir: &TempDir) -> PathBuf {
+    temp_dir.path().join("todo.g3.md")
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_write_creates_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Initially, todo.g3.md should not exist
+    assert!(!todo_path.exists(), "todo.g3.md should not exist initially");
+    
+    // Create a tool call to write TODO
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3"
+        }),
+    };
+    
+    // Execute the tool
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+    
+    // Should report success
+    assert!(result.contains("✅"), "Should report success: {}", result);
+    assert!(result.contains("todo.g3.md"), "Should mention todo.g3.md: {}", result);
+    
+    // File should now exist
+    assert!(todo_path.exists(), "todo.g3.md should exist after write");
+    
+    // File should contain the correct content
+    let content = fs::read_to_string(&todo_path).unwrap();
+    assert_eq!(content, "- [ ] Task 1\n- [ ] Task 2\n- [x] Task 3");
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_read_from_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Pre-create a todo.g3.md file
+    let test_content = "# My TODO\n\n- [ ] First task\n- [x] Completed task";
+    fs::write(&todo_path, test_content).unwrap();
+    
+    // Create agent (should load from file)
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    
+    // Create a tool call to read TODO
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    
+    // Execute the tool
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+    
+    // Should contain the TODO content
+    assert!(result.contains("📝 TODO list:"), "Should have TODO list header: {}", result);
+    assert!(result.contains("First task"), "Should contain first task: {}", result);
+    assert!(result.contains("Completed task"), "Should contain completed task: {}", result);
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_read_empty_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    
+    // Create a tool call to read TODO (file doesn't exist)
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    
+    // Execute the tool
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+    
+    // Should report empty
+    assert!(result.contains("empty"), "Should report empty: {}", result);
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_persistence_across_agents() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Agent 1: Write TODO
+    {
+        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let tool_call = g3_core::ToolCall {
+            tool: "todo_write".to_string(),
+            args: serde_json::json!({
+                "content": "- [ ] Persistent task\n- [x] Done task"
+            }),
+        };
+        agent.execute_tool(&tool_call).await.unwrap();
+    }
+    
+    // Verify file exists
+    assert!(todo_path.exists(), "todo.g3.md should persist after agent drops");
+    
+    // Agent 2: Read TODO (new agent instance)
+    {
+        let agent = create_test_agent_in_dir(&temp_dir).await;
+        let tool_call = g3_core::ToolCall {
+            tool: "todo_read".to_string(),
+            args: serde_json::json!({}),
+        };
+        let result = agent.execute_tool(&tool_call).await.unwrap();
+        
+        // Should read the persisted content
+        assert!(result.contains("Persistent task"), "Should read persisted task: {}", result);
+        assert!(result.contains("Done task"), "Should read done task: {}", result);
+    }
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_update_preserves_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Write initial TODO
+    let write_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": "- [ ] Task 1\n- [ ] Task 2"
+        }),
+    };
+    agent.execute_tool(&write_call).await.unwrap();
+    
+    // Update TODO
+    let update_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3"
+        }),
+    };
+    agent.execute_tool(&update_call).await.unwrap();
+    
+    // Verify file has updated content
+    let content = fs::read_to_string(&todo_path).unwrap();
+    assert_eq!(content, "- [x] Task 1\n- [ ] Task 2\n- [ ] Task 3");
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_handles_large_content() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Create a large TODO (but under the 50k limit)
+    let mut large_content = String::from("# Large TODO\n\n");
+    for i in 0..100 {
+        large_content.push_str(&format!("- [ ] Task {} with a long description that exceeds normal line lengths\n", i));
+    }
+    
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": large_content
+        }),
+    };
+    
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+    assert!(result.contains("✅"), "Should handle large content: {}", result);
+    
+    // Verify file contains all content
+    let file_content = fs::read_to_string(&todo_path).unwrap();
+    assert_eq!(file_content, large_content);
+    assert!(file_content.contains("Task 99"), "Should contain all tasks");
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_respects_size_limit() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    
+    // Create content that exceeds the default 50k limit
+    let huge_content = "x".repeat(60_000);
+    
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": huge_content
+        }),
+    };
+    
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+    
+    // Should reject content that's too large
+    assert!(result.contains("❌"), "Should reject oversized content: {}", result);
+    assert!(result.contains("too large"), "Should mention size limit: {}", result);
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_agent_initialization_loads_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Pre-create todo.g3.md before agent initialization
+    let initial_content = "- [ ] Pre-existing task";
+    fs::write(&todo_path, initial_content).unwrap();
+    
+    // Create agent - should load the file during initialization
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    
+    // Read TODO - should return the pre-existing content
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    
+    let result = agent.execute_tool(&tool_call).await.unwrap();
+    assert!(result.contains("Pre-existing task"), "Should load file on init: {}", result);
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_handles_unicode_content() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Create TODO with unicode characters
+    let unicode_content = "- [ ] 日本語タスク\n- [ ] Émoji task 🚀\n- [x] Ελληνικά task";
+    
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": unicode_content
+        }),
+    };
+    
+    agent.execute_tool(&tool_call).await.unwrap();
+    
+    // Verify file preserves unicode
+    let file_content = fs::read_to_string(&todo_path).unwrap();
+    assert_eq!(file_content, unicode_content);
+    
+    // Verify reading back works
+    let read_call = g3_core::ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    
+    let result = agent.execute_tool(&read_call).await.unwrap();
+    assert!(result.contains("日本語"), "Should preserve Japanese: {}", result);
+    assert!(result.contains("🚀"), "Should preserve emoji: {}", result);
+    assert!(result.contains("Ελληνικά"), "Should preserve Greek: {}", result);
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_empty_content_creates_empty_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    let todo_path = get_todo_path(&temp_dir);
+    
+    // Write empty TODO
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": ""
+        }),
+    };
+    
+    agent.execute_tool(&tool_call).await.unwrap();
+    
+    // File should exist but be empty
+    assert!(todo_path.exists(), "Empty todo.g3.md should create file");
+    let content = fs::read_to_string(&todo_path).unwrap();
+    assert_eq!(content, "");
+}
+
+#[tokio::test]
+#[serial]
+async fn test_todo_whitespace_only_content() {
+    let temp_dir = TempDir::new().unwrap();
+    let agent = create_test_agent_in_dir(&temp_dir).await;
+    
+    // Write whitespace-only TODO
+    let tool_call = g3_core::ToolCall {
+        tool: "todo_write".to_string(),
+        args: serde_json::json!({
+            "content": "   \n\n  \t  \n"
+        }),
+    };
+    
+    agent.execute_tool(&tool_call).await.unwrap();
+    
+    // Read it back
+    let read_call = g3_core::ToolCall {
+        tool: "todo_read".to_string(),
+        args: serde_json::json!({}),
+    };
+    
+    let result = agent.execute_tool(&read_call).await.unwrap();
+    
+    // Should report as empty (whitespace is trimmed)
+    assert!(result.contains("empty"), "Whitespace-only should be empty: {}", result);
+}