diff --git a/analysis/memory.md b/analysis/memory.md
index faf1f1f..9aa0294 100644
--- a/analysis/memory.md
+++ b/analysis/memory.md
@@ -1,5 +1,5 @@
 # Project Memory
-> Updated: 2026-01-15T00:50:41Z | Size: 12.9k chars
+> Updated: 2026-01-19T08:32:03Z | Size: 14.0k chars
 
 ### Remember Tool Wiring
 - `crates/g3-core/src/tools/memory.rs` [0..5000] - `execute_remember()`, `get_memory_path()`, `merge_memory()`
@@ -229,4 +229,27 @@ Injects agent+language-specific guidance when running in agent mode in a workspa
 
 To add a new agent+lang prompt:
 1. Create `prompts/langs/<agent>.<lang>.md`
-2. Add entry to `AGENT_LANGUAGE_PROMPTS` in `language_prompts.rs` with `include_str!`
\ No newline at end of file
+2. Add entry to `AGENT_LANGUAGE_PROMPTS` in `language_prompts.rs` with `include_str!`
+
+### MockProvider for Testing
+Configurable mock LLM provider for integration testing without real API calls.
+
+- `crates/g3-providers/src/mock.rs`
+  - `MockProvider` [220..320] - mock provider with response queue, request tracking
+  - `MockResponse` [35..200] - configurable response with chunks and usage
+  - `MockChunk` [45..100] - individual streaming chunk (content, finished, tool_calls)
+  - `scenarios` module [410..480] - preset scenarios: `text_only_response()`, `multi_turn()`, `tool_then_response()`
+
+- `crates/g3-core/tests/mock_provider_integration_test.rs`
+  - `test_butler_bug_scenario()` - reproduces consecutive user messages bug
+  - `test_text_only_response_saves_to_context()` - verifies text responses saved
+  - `test_multi_turn_text_only_maintains_alternation()` - verifies user/assistant alternation
+
+Usage pattern:
+```rust
+let provider = MockProvider::new()
+    .with_response(MockResponse::text("Hello!"));
+let mut registry = ProviderRegistry::new();
+registry.register(provider);
+let agent = Agent::new_for_test(config, NullUiWriter, registry).await?;
+```
\ No newline at end of file
diff --git a/crates/g3-core/src/lib.rs b/crates/g3-core/src/lib.rs
index 16b900e..94dc420 100644
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
@@ -2432,11 +2432,6 @@ Skip if nothing new. Be brief."#;
                                     self.ui_writer.print_agent_response(&filtered_content);
                                     self.ui_writer.flush();
                                     current_response.push_str(&filtered_content);
-
-                                    // Mark parser buffer as consumed up to current position
-                                    // This prevents tool-call-like patterns in displayed text
-                                    // from triggering false positives in has_unexecuted_tool_call()
-                                    parser.mark_tool_calls_consumed();
                                 }
                             }
                         }
diff --git a/crates/g3-core/tests/mock_provider_integration_test.rs b/crates/g3-core/tests/mock_provider_integration_test.rs
index a00174b..3fa420d 100644
--- a/crates/g3-core/tests/mock_provider_integration_test.rs
+++ b/crates/g3-core/tests/mock_provider_integration_test.rs
@@ -226,3 +226,239 @@ async fn test_butler_bug_scenario() {
         assistant_count
     );
 }
+
+// =============================================================================
+// Parser Poisoning Tests (commits 999ac6f, d68f059, 4c36cc0)
+// =============================================================================
+
+/// Test the parser directly with the same chunks to isolate the issue
+#[tokio::test]
+async fn test_parser_directly_with_inline_json_chunks() {
+    use g3_core::streaming_parser::StreamingToolParser;
+    use g3_providers::CompletionChunk;
+    
+    let mut parser = StreamingToolParser::new();
+    
+    // Simulate the exact chunks from the mock provider
+    let chunk1 = CompletionChunk {
+        content: "To run a command, you can use the format ".to_string(),
+        tool_calls: None,
+        finished: false,
+        stop_reason: None,
+        tool_call_streaming: None,
+        usage: None,
+    };
+    
+    let chunk2 = CompletionChunk {
+        content: r#"{"tool": "shell", "args": {"command": "ls"}}"#.to_string(),
+        tool_calls: None,
+        finished: false,
+        stop_reason: None,
+        tool_call_streaming: None,
+        usage: None,
+    };
+    
+    let tools1 = parser.process_chunk(&chunk1);
+    let tools2 = parser.process_chunk(&chunk2);
+    
+    assert!(tools1.is_empty(), "Chunk 1 should not produce tools");
+    assert!(tools2.is_empty(), "Chunk 2 should NOT produce tools - JSON is inline, not on its own line");
+    
+    // Also check has_unexecuted_tool_call and has_incomplete_tool_call
+    assert!(!parser.has_unexecuted_tool_call(), "Should NOT have unexecuted tool call - JSON is inline");
+    assert!(!parser.has_incomplete_tool_call(), "Should NOT have incomplete tool call");
+}
+
+// These tests verify that inline JSON patterns in prose don't trigger
+// false tool call detection, which would cause the agent to return
+// control mid-task.
+
+/// Test: Inline JSON in prose should NOT trigger tool call detection
+/// 
+/// Bug: When the LLM explained tool call format in prose like:
+///   "You can use {"tool": "shell", ...} to run commands"
+/// The parser would incorrectly detect this as a tool call.
+///
+/// Fix: Only detect tool calls that appear on their own line.
+#[tokio::test]
+async fn test_inline_json_in_prose_not_detected_as_tool() {
+    let provider = MockProvider::new()
+        .with_response(MockResponse::streaming(vec![
+            "To run a command, you can use the format ",
+            r#"{"tool": "shell", "args": {"command": "ls"}}"#,
+            " in your request. ",
+            "Let me know if you need help!",
+        ]))
+        // Add a default response in case auto-continue is triggered (which would be a bug)
+        .with_default_response(MockResponse::text("[BUG: Auto-continue was triggered - inline JSON was detected as tool call]"));
+
+    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
+
+    let result = agent.execute_task("How do I run commands?", None, false).await;
+    assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
+
+    // The response should be saved as text, not executed as a tool
+    let history = &agent.get_context_window().conversation_history;
+    let assistant_msg = history
+        .iter()
+        .rev()
+        .find(|m| matches!(m.role, MessageRole::Assistant))
+        .expect("Should have an assistant message");
+    
+    // The inline JSON should be preserved in the response
+    assert!(
+        assistant_msg.content.contains("tool") && assistant_msg.content.contains("shell"),
+        "Response should contain the inline JSON example: {}",
+        assistant_msg.content
+    );
+}
+
+/// Test: JSON tool call on its own line SHOULD be detected
+///
+/// This is the normal case - real tool calls from LLMs appear on their own line.
+#[tokio::test]
+async fn test_json_on_own_line_detected_as_tool() {
+    // This test uses native tool calling to verify tool detection works
+    let provider = MockProvider::new()
+        .with_native_tool_calling(true)
+        .with_response(MockResponse::native_tool_call(
+            "shell",
+            serde_json::json!({"command": "echo hello"}),
+        ));
+
+    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
+
+    // The task should detect the tool call
+    // Note: This will fail because we don't have a real shell, but that's OK
+    // We just want to verify the tool call was detected
+    let result = agent.execute_task("Run echo hello", None, false).await;
+    
+    // The result might be an error (tool execution fails in test env)
+    // but we can check if a tool was attempted by looking at context
+    let history = &agent.get_context_window().conversation_history;
+    
+    // Should have user message at minimum
+    assert!(
+        history.iter().any(|m| matches!(m.role, MessageRole::User)),
+        "Should have user message"
+    );
+}
+
+/// Test: Response with emoji and special characters doesn't crash
+///
+/// Bug: UTF-8 multi-byte characters caused panics when truncating strings
+/// using byte indices instead of character indices.
+#[tokio::test]
+async fn test_utf8_response_handling() {
+    let provider = MockProvider::new()
+        .with_response(MockResponse::text(
+            "Here's the result! 🎉\n\n\
+            • First item with bullet\n\
+            • Second item 中文字符\n\
+            • Third item émojis: 🚀🔥💯\n\n\
+            Done! ✅"
+        ));
+
+    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
+
+    let result = agent.execute_task("Show me a list", None, false).await;
+    assert!(result.is_ok(), "Task should succeed with UTF-8 content: {:?}", result.err());
+
+    let history = &agent.get_context_window().conversation_history;
+    let assistant_msg = history
+        .iter()
+        .rev()
+        .find(|m| matches!(m.role, MessageRole::Assistant))
+        .expect("Should have an assistant message");
+    
+    // Verify the UTF-8 content is preserved
+    assert!(assistant_msg.content.contains("🎉"), "Should contain emoji");
+    assert!(assistant_msg.content.contains("中文"), "Should contain CJK characters");
+    assert!(assistant_msg.content.contains("•"), "Should contain bullet points");
+}
+
+/// Test: Very long response with UTF-8 doesn't panic on truncation
+#[tokio::test]
+async fn test_long_utf8_response_no_panic() {
+    // Create a response with lots of multi-byte characters
+    let long_content = "🔥".repeat(1000) + &"Test content with emoji 🎉 and more 中文字符 here. ".repeat(100);
+    
+    let provider = MockProvider::new()
+        .with_response(MockResponse::text(&long_content));
+
+    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
+
+    // This should not panic even with lots of multi-byte characters
+    let result = agent.execute_task("Generate long content", None, false).await;
+    assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
+}
+
+/// Test: Response is not duplicated in output
+///
+/// Bug (cebec23): Response was printed twice - once during streaming
+/// and again after task completion.
+#[tokio::test]
+async fn test_response_not_duplicated() {
+    let provider = MockProvider::new()
+        .with_response(MockResponse::text("This is a unique response that should appear once."));
+
+    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
+
+    let result = agent.execute_task("Say something unique", None, false).await;
+    assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
+
+    // Check the TaskResult - it should have the response
+    let task_result = result.unwrap();
+    
+    // The response field might be empty (content was streamed) or contain the response
+    // Either way, the context should have exactly one assistant message with this content
+    let history = &agent.get_context_window().conversation_history;
+    let assistant_messages: Vec<_> = history
+        .iter()
+        .filter(|m| matches!(m.role, MessageRole::Assistant))
+        .filter(|m| m.content.contains("unique response"))
+        .collect();
+    
+    assert_eq!(
+        assistant_messages.len(), 1,
+        "Should have exactly one assistant message with the response, got {}",
+        assistant_messages.len()
+    );
+}
+
+/// Test: Multiple chunks streamed correctly without duplication
+#[tokio::test]
+async fn test_streaming_no_chunk_duplication() {
+    let provider = MockProvider::new()
+        .with_response(MockResponse::streaming(vec![
+            "Part 1. ",
+            "Part 2. ",
+            "Part 3. ",
+            "Part 4.",
+        ]));
+
+    let (mut agent, _temp_dir) = create_agent_with_mock(provider).await;
+
+    let result = agent.execute_task("Stream something", None, false).await;
+    assert!(result.is_ok(), "Task should succeed: {:?}", result.err());
+
+    let history = &agent.get_context_window().conversation_history;
+    let assistant_msg = history
+        .iter()
+        .rev()
+        .find(|m| matches!(m.role, MessageRole::Assistant))
+        .expect("Should have an assistant message");
+    
+    // Each part should appear exactly once
+    let content = &assistant_msg.content;
+    assert_eq!(
+        content.matches("Part 1").count(), 1,
+        "Part 1 should appear exactly once in: {}",
+        content
+    );
+    assert_eq!(
+        content.matches("Part 2").count(), 1,
+        "Part 2 should appear exactly once in: {}",
+        content
+    );
+}
diff --git a/crates/g3-core/tests/parser_sanitization_test.rs b/crates/g3-core/tests/parser_sanitization_test.rs
index eaa6351..48a0127 100644
--- a/crates/g3-core/tests/parser_sanitization_test.rs
+++ b/crates/g3-core/tests/parser_sanitization_test.rs
@@ -432,3 +432,53 @@ mod streaming_repro {
         assert_eq!(tools[0].tool, "shell");
     }
 }
+
+/// Test that inline JSON is not detected even when stream finishes
+/// This tests the try_parse_all_json_tool_calls_from_buffer path
+#[test]
+fn test_inline_json_not_detected_at_stream_end() {
+    use g3_core::StreamingToolParser;
+    use g3_providers::CompletionChunk;
+    
+    fn chunk(content: &str, finished: bool) -> CompletionChunk {
+        CompletionChunk {
+            content: content.to_string(),
+            finished,
+            tool_calls: None,
+            usage: None,
+            stop_reason: if finished { Some("end_turn".to_string()) } else { None },
+            tool_call_streaming: None,
+        }
+    }
+    
+    let mut parser = StreamingToolParser::new();
+    
+    // Send chunks exactly as MockProvider would
+    let tools = parser.process_chunk(&chunk("To run a command, you can use the format ", false));
+    assert!(tools.is_empty(), "Chunk 1: no tools");
+    
+    let tools = parser.process_chunk(&chunk(r#"{"tool": "shell", "args": {"command": "ls"}}"#, false));
+    assert!(tools.is_empty(), "Chunk 2: inline JSON should not trigger tool detection");
+    
+    let tools = parser.process_chunk(&chunk(" in your request. ", false));
+    assert!(tools.is_empty(), "Chunk 3: no tools");
+    
+    let tools = parser.process_chunk(&chunk("Let me know if you need help!", false));
+    assert!(tools.is_empty(), "Chunk 4: no tools");
+    
+    // Finish chunk - this triggers try_parse_all_json_tool_calls_from_buffer
+    let tools = parser.process_chunk(&chunk("", true));
+    assert!(
+        tools.is_empty(),
+        "Finish chunk: inline JSON should NOT be detected as tool call. Found: {:?}",
+        tools.iter().map(|t| &t.tool).collect::<Vec<_>>()
+    );
+    
+    // Verify the full buffer content
+    let buffer = parser.get_text_content();
+    assert!(
+        buffer.contains(r#"{"tool": "shell"#),
+        "Buffer should contain the inline JSON: {}",
+        buffer
+    );
+}