diff --git a/crates/g3-core/src/context_window.rs b/crates/g3-core/src/context_window.rs
index e0c7cb3..0b2d677 100644
--- a/crates/g3-core/src/context_window.rs
+++ b/crates/g3-core/src/context_window.rs
@@ -13,23 +13,22 @@ use tracing::{debug, warn};
 use crate::paths::get_thinned_dir;
 use crate::ToolCall;
 
+// ============================================================================
+// Types
+// ============================================================================
+
 /// Result of a context thinning operation.
 /// Contains semantic data for the UI layer to format.
 #[derive(Debug, Clone)]
 pub struct ThinResult {
-    /// Scope of the thinning operation
     pub scope: ThinScope,
-    /// Context percentage before thinning
     pub before_percentage: u32,
-    /// Context percentage after thinning
     pub after_percentage: u32,
     /// Number of tool result messages that were thinned
     pub leaned_count: usize,
     /// Number of tool calls in assistant messages that were thinned
     pub tool_call_leaned_count: usize,
-    /// Total characters saved
     pub chars_saved: usize,
-    /// Whether any changes were made
     pub had_changes: bool,
 }
 
@@ -58,20 +57,29 @@ impl ThinScope {
     }
 }
 
-/// Represents a modification to be applied to a message
+/// Represents a modification to be applied to a message during thinning
 #[derive(Debug)]
 enum ThinModification {
-    /// Replace the entire message content
-    ReplaceContent { index: usize, new_content: String, chars_saved: usize },
+    ReplaceContent {
+        index: usize,
+        new_content: String,
+        chars_saved: usize,
+    },
 }
 
+// ============================================================================
+// ContextWindow
+// ============================================================================
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ContextWindow {
     pub used_tokens: u32,
     pub total_tokens: u32,
-    pub cumulative_tokens: u32, // Track cumulative tokens across all interactions
+    /// Track cumulative tokens across all interactions
+    pub cumulative_tokens: u32,
     pub conversation_history: Vec<Message>,
-    pub last_thinning_percentage: u32, // Track the last percentage at which we thinned
+    /// Track the last percentage at which we thinned
+    pub last_thinning_percentage: u32,
 }
 
 impl ContextWindow {
@@ -85,19 +93,21 @@ impl ContextWindow {
         }
     }
 
+    // ========================================================================
+    // Message Management
+    // ========================================================================
+
     pub fn add_message(&mut self, message: Message) {
         self.add_message_with_tokens(message, None);
     }
 
     /// Add a message with optional token count from the provider
     pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option<u32>) {
-        // Skip messages with empty content to avoid API errors
         if message.content.trim().is_empty() {
             warn!("Skipping empty message to avoid API error");
             return;
         }
 
-        // Use provided token count if available, otherwise estimate
         let token_count = tokens.unwrap_or_else(|| Self::estimate_tokens(&message.content));
         self.used_tokens += token_count;
         self.cumulative_tokens += token_count;
@@ -109,66 +119,9 @@ impl ContextWindow {
         );
     }
 
-    /// Update token usage from provider response
-    /// NOTE: This only updates cumulative_tokens (total API usage tracking).
-    /// It does NOT update used_tokens because:
-    /// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message)
-    /// 2. completion_tokens will be tracked when the assistant message is added via add_message
-    /// Adding total_tokens here would cause double/triple counting and break the 80% threshold check.
-    pub fn update_usage_from_response(&mut self, usage: &Usage) {
-        // Only update cumulative tokens for API usage tracking
-        // Do NOT update used_tokens - that's tracked via add_message to avoid double counting
-        self.cumulative_tokens += usage.total_tokens;
-
-        debug!(
-            "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})",
-            usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
-        );
-    }
-
-    /// More accurate token estimation
-    pub fn estimate_tokens(text: &str) -> u32 {
-        // Better heuristic:
-        // - Average English text: ~4 characters per token
-        // - Code/JSON: ~3 characters per token (more symbols)
-        // - Add 10% buffer for safety
-        let base_estimate = if text.contains("{") || text.contains("```") || text.contains("fn ") {
-            (text.len() as f32 / 3.0).ceil() as u32 // Code/JSON
-        } else {
-            (text.len() as f32 / 4.0).ceil() as u32 // Regular text
-        };
-        (base_estimate as f32 * 1.1).ceil() as u32 // Add 10% buffer
-    }
-
-    pub fn update_usage(&mut self, usage: &Usage) {
-        // Deprecated: Use update_usage_from_response instead
-        self.update_usage_from_response(usage);
-    }
-
-    /// Update cumulative token usage (for streaming) when no provider usage data is available
-    /// NOTE: This only updates cumulative_tokens, not used_tokens.
-    /// The assistant message will be added via add_message which tracks used_tokens.
-    pub fn add_streaming_tokens(&mut self, new_tokens: u32) {
-        // Only update cumulative tokens - used_tokens is tracked via add_message
-        self.cumulative_tokens += new_tokens;
-        debug!(
-            "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})",
-            new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
-        );
-    }
-
-    pub fn percentage_used(&self) -> f32 {
-        if self.total_tokens == 0 {
-            0.0
-        } else {
-            (self.used_tokens as f32 / self.total_tokens as f32) * 100.0
-        }
-    }
-
-    /// Clear the conversation history while preserving system messages
-    /// Used by /clear command to start fresh
+    /// Clear the conversation history while preserving system messages.
+    /// Used by /clear command to start fresh.
     pub fn clear_conversation(&mut self) {
-        // Keep only system messages (system prompt, README, etc.)
         let system_messages: Vec<Message> = self
             .conversation_history
             .iter()
@@ -185,24 +138,102 @@ impl ContextWindow {
         self.last_thinning_percentage = 0;
     }
 
+    // ========================================================================
+    // Token Tracking
+    // ========================================================================
+
+    /// Update token usage from provider response.
+    ///
+    /// NOTE: This only updates cumulative_tokens (total API usage tracking).
+    /// It does NOT update used_tokens because:
+    /// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message)
+    /// 2. completion_tokens will be tracked when the assistant message is added via add_message
+    /// Adding total_tokens here would cause double/triple counting and break the 80% threshold check.
+    pub fn update_usage_from_response(&mut self, usage: &Usage) {
+        self.cumulative_tokens += usage.total_tokens;
+        debug!(
+            "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})",
+            usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
+        );
+    }
+
+    /// Deprecated: Use update_usage_from_response instead
+    pub fn update_usage(&mut self, usage: &Usage) {
+        self.update_usage_from_response(usage);
+    }
+
+    /// Update cumulative token usage (for streaming) when no provider usage data is available.
+    /// NOTE: This only updates cumulative_tokens, not used_tokens.
+    pub fn add_streaming_tokens(&mut self, new_tokens: u32) {
+        self.cumulative_tokens += new_tokens;
+        debug!(
+            "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})",
+            new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
+        );
+    }
+
+    /// Recalculate token usage based on current conversation history.
+    pub fn recalculate_tokens(&mut self) {
+        self.used_tokens = self
+            .conversation_history
+            .iter()
+            .map(|m| Self::estimate_tokens(&m.content))
+            .sum();
+        debug!("Recalculated tokens after thinning: {} tokens", self.used_tokens);
+    }
+
+    /// More accurate token estimation.
+    pub fn estimate_tokens(text: &str) -> u32 {
+        // Heuristic:
+        // - Average English text: ~4 characters per token
+        // - Code/JSON: ~3 characters per token (more symbols)
+        // - Add 10% buffer for safety
+        let base_estimate = if text.contains('{') || text.contains("```") || text.contains("fn ") {
+            (text.len() as f32 / 3.0).ceil() as u32
+        } else {
+            (text.len() as f32 / 4.0).ceil() as u32
+        };
+        (base_estimate as f32 * 1.1).ceil() as u32
+    }
+
+    // ========================================================================
+    // Capacity Queries
+    // ========================================================================
+
+    pub fn percentage_used(&self) -> f32 {
+        if self.total_tokens == 0 {
+            0.0
+        } else {
+            (self.used_tokens as f32 / self.total_tokens as f32) * 100.0
+        }
+    }
+
     pub fn remaining_tokens(&self) -> u32 {
         self.total_tokens.saturating_sub(self.used_tokens)
     }
 
-    /// Check if we should trigger compaction (at 80% capacity)
+    /// Check if we should trigger compaction (at 80% capacity or 150k tokens).
     pub fn should_compact(&self) -> bool {
-        // Trigger at 80% OR if we're getting close to absolute limits
-        // This prevents issues with models that have large contexts but still hit limits
-        let percentage_trigger = self.percentage_used() >= 80.0;
-
-        // Also trigger if we're approaching common token limits
-        // Most models start having issues around 150k tokens
-        let absolute_trigger = self.used_tokens > 150_000;
-
-        percentage_trigger || absolute_trigger
+        self.percentage_used() >= 80.0 || self.used_tokens > 150_000
     }
 
-    /// Create a summary request prompt for the current conversation
+    /// Check if we should trigger context thinning.
+    /// Triggers at 50%, 60%, 70%, and 80% thresholds.
+    pub fn should_thin(&self) -> bool {
+        let current_percentage = self.percentage_used() as u32;
+        if current_percentage < 50 {
+            return false;
+        }
+
+        let current_threshold = (current_percentage / 10) * 10;
+        current_threshold > self.last_thinning_percentage && current_threshold <= 80
+    }
+
+    // ========================================================================
+    // Compaction / Summary
+    // ========================================================================
+
+    /// Create a summary request prompt for the current conversation.
     pub fn create_summary_prompt(&self) -> String {
         "Please provide a comprehensive summary of our conversation so far. Include:
 
@@ -216,122 +247,56 @@ impl ContextWindow {
 Format this as a detailed but concise summary that can be used to resume the conversation from scratch while maintaining full context.".to_string()
     }
 
-    /// Reset the context window with a summary
-    /// Preserves the original system prompt as the first message
+    /// Reset the context window with a summary.
+    /// Preserves the original system prompt as the first message.
     pub fn reset_with_summary(
         &mut self,
         summary: String,
         latest_user_message: Option<String>,
     ) -> usize {
-        // Calculate chars saved (old history minus new summary)
-        let old_chars: usize = self
-            .conversation_history
-            .iter()
-            .map(|m| m.content.len())
-            .sum();
-
-        // Preserve the original system prompt (first message) and optionally the README (second message)
-        let original_system_prompt = self.conversation_history.first().cloned();
-        let readme_message = self.conversation_history.get(1).and_then(|msg| {
-            if matches!(msg.role, MessageRole::System)
-                && (msg.content.contains("Project README")
-                    || msg.content.contains("Agent Configuration"))
-            {
-                Some(msg.clone())
-            } else {
-                None
-            }
-        });
-
-        // Clear the conversation history
-        self.conversation_history.clear();
-        self.used_tokens = 0;
-
-        // Re-add the original system prompt first (critical invariant)
-        if let Some(system_prompt) = original_system_prompt {
-            self.add_message(system_prompt);
-        }
-
-        // Re-add the README message if it existed
-        if let Some(readme) = readme_message {
-            self.add_message(readme);
-        }
-
-        // Add the summary as a system message
-        let summary_message = Message::new(
-            MessageRole::System,
-            format!("Previous conversation summary:\n\n{}", summary),
-        );
-        self.add_message(summary_message);
-
-        // Add the latest user message if provided
-        if let Some(user_msg) = latest_user_message {
-            self.add_message(Message::new(MessageRole::User, user_msg));
-        }
-
-        let new_chars: usize = self
-            .conversation_history
-            .iter()
-            .map(|m| m.content.len())
-            .sum();
-        old_chars.saturating_sub(new_chars)
+        self.reset_with_summary_and_stub(summary, latest_user_message, None)
     }
 
-    /// Reset context window with a summary and optional ACD stub
-    /// Preserves the original system prompt as the first message
-    /// If stub is provided, it's added as a system message before the summary
+    /// Reset context window with a summary and optional ACD stub.
+    /// Preserves the original system prompt as the first message.
+    /// If stub is provided, it's added as a system message before the summary.
     pub fn reset_with_summary_and_stub(
         &mut self,
         summary: String,
         latest_user_message: Option<String>,
         stub: Option<String>,
     ) -> usize {
-        // Calculate chars saved (old history minus new summary)
         let old_chars: usize = self
             .conversation_history
             .iter()
             .map(|m| m.content.len())
             .sum();
 
-        // Preserve the original system prompt (first message) and optionally the README (second message)
-        let original_system_prompt = self.conversation_history.first().cloned();
-        let readme_message = self.conversation_history.get(1).and_then(|msg| {
-            if matches!(msg.role, MessageRole::System)
-                && (msg.content.contains("Project README")
-                    || msg.content.contains("Agent Configuration"))
-            {
-                Some(msg.clone())
-            } else {
-                None
-            }
-        });
+        // Extract preserved messages before clearing
+        let preserved = self.extract_preserved_messages();
 
-        // Clear the conversation history
+        // Clear and rebuild
         self.conversation_history.clear();
         self.used_tokens = 0;
 
-        // Re-add the original system prompt first (critical invariant)
-        if let Some(system_prompt) = original_system_prompt {
+        // Re-add preserved messages
+        if let Some(system_prompt) = preserved.system_prompt {
             self.add_message(system_prompt);
         }
-
-        // Re-add the README message if it existed
-        if let Some(readme) = readme_message {
+        if let Some(readme) = preserved.readme {
             self.add_message(readme);
         }
 
-        // Add the ACD stub if provided (before summary so LLM knows about dehydrated context)
+        // Add ACD stub if provided (before summary so LLM knows about dehydrated context)
         if let Some(stub_content) = stub {
-            let stub_message = Message::new(MessageRole::System, stub_content);
-            self.add_message(stub_message);
+            self.add_message(Message::new(MessageRole::System, stub_content));
         }
 
-        // Add the summary as a system message
-        let summary_message = Message::new(
+        // Add the summary
+        self.add_message(Message::new(
             MessageRole::System,
             format!("Previous conversation summary:\n\n{}", summary),
-        );
-        self.add_message(summary_message);
+        ));
 
         // Add the latest user message if provided
         if let Some(user_msg) = latest_user_message {
@@ -346,20 +311,39 @@ Format this as a detailed but concise summary that can be used to resume the con
         old_chars.saturating_sub(new_chars)
     }
 
-    /// Check if we should trigger context thinning
-    /// Triggers at 50%, 60%, 70%, and 80% thresholds
-    pub fn should_thin(&self) -> bool {
-        let current_percentage = self.percentage_used() as u32;
+    /// Extract messages that should be preserved across compaction.
+    fn extract_preserved_messages(&self) -> PreservedMessages {
+        let system_prompt = self.conversation_history.first().cloned();
 
-        // Check if we've crossed a new 10% threshold starting at 50%
-        if current_percentage >= 50 {
-            let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
-            if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
-                return true;
+        let readme = self.conversation_history.get(1).and_then(|msg| {
+            if matches!(msg.role, MessageRole::System)
+                && (msg.content.contains("Project README")
+                    || msg.content.contains("Agent Configuration"))
+            {
+                Some(msg.clone())
+            } else {
+                None
             }
-        }
+        });
 
-        false
+        PreservedMessages {
+            system_prompt,
+            readme,
+        }
+    }
+
+    // ========================================================================
+    // Context Thinning
+    // ========================================================================
+
+    /// Thin context (first third only).
+    pub fn thin_context(&mut self, session_id: Option<&str>) -> ThinResult {
+        self.thin_context_with_scope(session_id, ThinScope::FirstThird)
+    }
+
+    /// Thin entire context (all messages).
+    pub fn thin_context_all(&mut self, session_id: Option<&str>) -> ThinResult {
+        self.thin_context_with_scope(session_id, ThinScope::All)
     }
 
     /// Perform context thinning: scan messages and replace large tool results with file references.
@@ -367,9 +351,6 @@ Format this as a detailed but concise summary that can be used to resume the con
     /// # Arguments
     /// * `session_id` - If provided, thinned content is saved to .g3/session/<session_id>/thinned/
     /// * `scope` - Controls which messages to process (first third or all)
-    ///
-    /// # Returns
-    /// A `ThinResult` with semantic data about the operation
     pub fn thin_context_with_scope(
         &mut self,
         session_id: Option<&str>,
@@ -383,73 +364,58 @@ Format this as a detailed but concise summary that can be used to resume the con
             self.last_thinning_percentage = current_threshold;
         }
 
-        // Calculate message range based on scope
-        let total_messages = self.conversation_history.len();
-        let end_index = match scope {
-            ThinScope::FirstThird => (total_messages / 3).max(1),
-            ThinScope::All => total_messages,
-        };
-
-        // Determine output directory: use session dir if available, otherwise ~/tmp
+        // Resolve output directory
         let tmp_dir = match Self::resolve_thinned_dir(session_id, scope) {
             Ok(dir) => dir,
-            Err(_) => {
-                return ThinResult {
-                    scope,
-                    before_percentage: current_percentage,
-                    after_percentage: current_percentage,
-                    leaned_count: 0,
-                    tool_call_leaned_count: 0,
-                    chars_saved: 0,
-                    had_changes: false,
-                };
-            }
+            Err(_) => return ThinResult::no_changes(scope, current_percentage),
         };
 
-        // Collect modifications to apply (avoids borrow checker issues)
-        let modifications = self.collect_thin_modifications(end_index, &tmp_dir, scope.file_prefix());
+        // Calculate message range based on scope
+        let end_index = match scope {
+            ThinScope::FirstThird => (self.conversation_history.len() / 3).max(1),
+            ThinScope::All => self.conversation_history.len(),
+        };
 
-        // Count results
-        let mut leaned_count = 0;
-        let mut tool_call_leaned_count = 0;
-        let mut chars_saved = 0;
-
-        // Apply modifications
-        for modification in &modifications {
-            match modification {
-                ThinModification::ReplaceContent { index, new_content, chars_saved: saved } => {
-                    if let Some(msg) = self.conversation_history.get_mut(*index) {
-                        // Determine if this was a tool result or tool call based on content
-                        if msg.content.starts_with("Tool result:") {
-                            leaned_count += 1;
-                        } else {
-                            tool_call_leaned_count += 1;
-                        }
-                        msg.content = new_content.clone();
-                        chars_saved += saved;
-                    }
-                }
-            }
-        }
+        // Collect and apply modifications
+        let modifications =
+            self.collect_thin_modifications(end_index, &tmp_dir, scope.file_prefix());
+        let (leaned_count, tool_call_leaned_count, chars_saved) =
+            self.apply_thin_modifications(&modifications);
 
         // Recalculate token usage after thinning
         self.recalculate_tokens();
 
-        // Get new percentage after thinning
-        let new_percentage = self.percentage_used() as u32;
-
-        // Build result message
-        self.build_thin_result(
+        ThinResult {
             scope,
-            current_percentage,
-            new_percentage,
+            before_percentage: current_percentage,
+            after_percentage: self.percentage_used() as u32,
             leaned_count,
             tool_call_leaned_count,
             chars_saved,
-        )
+            had_changes: leaned_count > 0 || tool_call_leaned_count > 0,
+        }
     }
 
-    /// Collect all modifications needed for thinning without mutating
+    /// Resolve the directory for storing thinned content.
+    fn resolve_thinned_dir(session_id: Option<&str>, scope: ThinScope) -> Result<String, String> {
+        let dir = if let Some(sid) = session_id {
+            get_thinned_dir(sid).to_string_lossy().to_string()
+        } else {
+            shellexpand::tilde("~/tmp").to_string()
+        };
+
+        if let Err(e) = std::fs::create_dir_all(&dir) {
+            warn!("Failed to create thinned directory: {}", e);
+            return Err(format!(
+                "⚠️  Context {} failed: could not create directory",
+                scope.error_action()
+            ));
+        }
+
+        Ok(dir)
+    }
+
+    /// Collect all modifications needed for thinning without mutating.
     fn collect_thin_modifications(
         &self,
         end_index: usize,
@@ -459,36 +425,29 @@ Format this as a detailed but concise summary that can be used to resume the con
         let mut modifications = Vec::new();
 
         for i in 0..end_index {
-            if let Some(message) = self.conversation_history.get(i) {
-                // Check if the previous message was a TODO tool call
-                let is_todo_result = self.is_todo_tool_result(i);
+            let Some(message) = self.conversation_history.get(i) else {
+                continue;
+            };
 
-                // Process User messages that look like tool results
-                if matches!(message.role, MessageRole::User)
-                    && message.content.starts_with("Tool result:")
-                    && !is_todo_result
-                    && message.content.len() > 500
+            // Process User messages that look like tool results
+            if matches!(message.role, MessageRole::User)
+                && message.content.starts_with("Tool result:")
+                && !self.is_todo_tool_result(i)
+                && message.content.len() > 500
+            {
+                if let Some(m) =
+                    Self::create_tool_result_modification(&message.content, i, tmp_dir, file_prefix)
                 {
-                    if let Some(modification) = Self::create_tool_result_modification(
-                        &message.content,
-                        i,
-                        tmp_dir,
-                        file_prefix,
-                    ) {
-                        modifications.push(modification);
-                    }
+                    modifications.push(m);
                 }
+            }
 
-                // Process Assistant messages that contain tool calls with large arguments
-                if matches!(message.role, MessageRole::Assistant) {
-                    if let Some(modification) = Self::create_tool_call_modification(
-                        &message.content,
-                        i,
-                        tmp_dir,
-                        file_prefix,
-                    ) {
-                        modifications.push(modification);
-                    }
+            // Process Assistant messages that contain tool calls with large arguments
+            if matches!(message.role, MessageRole::Assistant) {
+                if let Some(m) =
+                    Self::create_tool_call_modification(&message.content, i, tmp_dir, file_prefix)
+                {
+                    modifications.push(m);
                 }
             }
         }
@@ -496,59 +455,55 @@ Format this as a detailed but concise summary that can be used to resume the con
         modifications
     }
 
-    /// Thin context (first third only)
-    pub fn thin_context(&mut self, session_id: Option<&str>) -> ThinResult {
-        self.thin_context_with_scope(session_id, ThinScope::FirstThird)
-    }
+    /// Apply collected modifications and return counts.
+    fn apply_thin_modifications(
+        &mut self,
+        modifications: &[ThinModification],
+    ) -> (usize, usize, usize) {
+        let mut leaned_count = 0;
+        let mut tool_call_leaned_count = 0;
+        let mut chars_saved = 0;
 
-    /// Thin entire context (all messages)
-    pub fn thin_context_all(&mut self, session_id: Option<&str>) -> ThinResult {
-        self.thin_context_with_scope(session_id, ThinScope::All)
-    }
+        for modification in modifications {
+            let ThinModification::ReplaceContent {
+                index,
+                new_content,
+                chars_saved: saved,
+            } = modification;
 
-    /// Resolve the directory for storing thinned content
-    fn resolve_thinned_dir(session_id: Option<&str>, scope: ThinScope) -> Result<String, String> {
-        if let Some(sid) = session_id {
-            let thinned_dir = get_thinned_dir(sid);
-            if let Err(e) = std::fs::create_dir_all(&thinned_dir) {
-                warn!("Failed to create thinned directory: {}", e);
-                return Err(format!(
-                    "⚠️  Context {} failed: could not create thinned directory",
-                    scope.error_action()
-                ));
+            if let Some(msg) = self.conversation_history.get_mut(*index) {
+                if msg.content.starts_with("Tool result:") {
+                    leaned_count += 1;
+                } else {
+                    tool_call_leaned_count += 1;
+                }
+                msg.content = new_content.clone();
+                chars_saved += saved;
             }
-            Ok(thinned_dir.to_string_lossy().to_string())
-        } else {
-            let fallback_dir = shellexpand::tilde("~/tmp").to_string();
-            if let Err(e) = std::fs::create_dir_all(&fallback_dir) {
-                warn!("Failed to create ~/tmp directory: {}", e);
-                return Err(format!(
-                    "⚠️  Context {} failed: could not create ~/tmp directory",
-                    scope.error_action()
-                ));
-            }
-            Ok(fallback_dir)
         }
+
+        (leaned_count, tool_call_leaned_count, chars_saved)
     }
 
-    /// Check if message at index i is a result of a TODO tool call
+    /// Check if message at index i is a result of a TODO tool call.
     fn is_todo_tool_result(&self, i: usize) -> bool {
         if i == 0 {
             return false;
         }
 
-        if let Some(prev_message) = self.conversation_history.get(i - 1) {
-            if matches!(prev_message.role, MessageRole::Assistant) {
-                return prev_message.content.contains(r#""tool":"todo_read""#)
-                    || prev_message.content.contains(r#""tool":"todo_write""#)
-                    || prev_message.content.contains(r#""tool": "todo_read""#)
-                    || prev_message.content.contains(r#""tool": "todo_write""#);
-            }
-        }
-        false
+        self.conversation_history
+            .get(i - 1)
+            .map(|prev| {
+                matches!(prev.role, MessageRole::Assistant)
+                    && (prev.content.contains(r#""tool":"todo_read""#)
+                        || prev.content.contains(r#""tool":"todo_write""#)
+                        || prev.content.contains(r#""tool": "todo_read""#)
+                        || prev.content.contains(r#""tool": "todo_write""#))
+            })
+            .unwrap_or(false)
     }
 
-    /// Create a modification for thinning a tool result message
+    /// Create a modification for thinning a tool result message.
     fn create_tool_result_modification(
         content: &str,
         index: usize,
@@ -583,7 +538,7 @@ Format this as a detailed but concise summary that can be used to resume the con
         })
     }
 
-    /// Create a modification for thinning tool calls in an assistant message
+    /// Create a modification for thinning tool calls in an assistant message.
     fn create_tool_call_modification(
         content: &str,
         index: usize,
@@ -644,8 +599,8 @@ Format this as a detailed but concise summary that can be used to resume the con
         })
     }
 
-    /// Thin write_file args by saving content to file
-    /// Returns (chars_saved, new_args) if thinned
+    /// Thin write_file args by saving content to file.
+    /// Returns (chars_saved, new_args) if thinned.
     fn thin_write_file_args(
         args: &serde_json::Value,
         index: usize,
@@ -654,9 +609,8 @@ Format this as a detailed but concise summary that can be used to resume the con
     ) -> Option<(usize, serde_json::Value)> {
         let args_obj = args.as_object()?;
         let content_str = args_obj.get("content")?.as_str()?;
-        let content_len = content_str.len();
 
-        if content_len <= 500 {
+        if content_str.len() <= 500 {
             return None;
         }
 
@@ -664,13 +618,15 @@ Format this as a detailed but concise summary that can be used to resume the con
             .duration_since(std::time::UNIX_EPOCH)
             .unwrap_or_default()
             .as_secs();
-        let filename = format!("{}_write_file_content_{}_{}.txt", file_prefix, timestamp, index);
+        let filename = format!(
+            "{}_write_file_content_{}_{}.txt",
+            file_prefix, timestamp, index
+        );
         let file_path = format!("{}/{}", tmp_dir, filename);
 
-        if std::fs::write(&file_path, content_str).is_err() {
-            return None;
-        }
+        std::fs::write(&file_path, content_str).ok()?;
 
+        let content_len = content_str.len();
         let mut new_args = args_obj.clone();
         new_args.insert(
             "content".to_string(),
@@ -685,8 +641,8 @@ Format this as a detailed but concise summary that can be used to resume the con
         Some((content_len, serde_json::Value::Object(new_args)))
     }
 
-    /// Thin str_replace args by saving diff to file
-    /// Returns (chars_saved, new_args) if thinned
+    /// Thin str_replace args by saving diff to file.
+    /// Returns (chars_saved, new_args) if thinned.
     fn thin_str_replace_args(
         args: &serde_json::Value,
         index: usize,
@@ -695,9 +651,8 @@ Format this as a detailed but concise summary that can be used to resume the con
     ) -> Option<(usize, serde_json::Value)> {
         let args_obj = args.as_object()?;
         let diff_str = args_obj.get("diff")?.as_str()?;
-        let diff_len = diff_str.len();
 
-        if diff_len <= 500 {
+        if diff_str.len() <= 500 {
             return None;
         }
 
@@ -705,13 +660,15 @@ Format this as a detailed but concise summary that can be used to resume the con
             .duration_since(std::time::UNIX_EPOCH)
             .unwrap_or_default()
             .as_secs();
-        let filename = format!("{}_str_replace_diff_{}_{}.txt", file_prefix, timestamp, index);
+        let filename = format!(
+            "{}_str_replace_diff_{}_{}.txt",
+            file_prefix, timestamp, index
+        );
         let file_path = format!("{}/{}", tmp_dir, filename);
 
-        if std::fs::write(&file_path, diff_str).is_err() {
-            return None;
-        }
+        std::fs::write(&file_path, diff_str).ok()?;
 
+        let diff_len = diff_str.len();
         let mut new_args = args_obj.clone();
         new_args.insert(
             "diff".to_string(),
@@ -726,41 +683,11 @@ Format this as a detailed but concise summary that can be used to resume the con
         Some((diff_len, serde_json::Value::Object(new_args)))
     }
 
-    /// Build the result message for thinning operations
-    fn build_thin_result(
-        &self,
-        scope: ThinScope,
-        current_percentage: u32,
-        new_percentage: u32,
-        leaned_count: usize,
-        tool_call_leaned_count: usize,
-        chars_saved: usize,
-    ) -> ThinResult {
-        let had_changes = leaned_count > 0 || tool_call_leaned_count > 0;
-        ThinResult {
-            scope,
-            before_percentage: current_percentage,
-            after_percentage: new_percentage,
-            leaned_count,
-            tool_call_leaned_count,
-            chars_saved: if had_changes { chars_saved } else { 0 },
-            had_changes,
-        }
-    }
+    // ========================================================================
+    // JSON Utilities
+    // ========================================================================
 
-    /// Recalculate token usage based on current conversation history
-    /// Recalculate the token count based on current conversation history.
-    pub fn recalculate_tokens(&mut self) {
-        let mut total = 0;
-        for message in &self.conversation_history {
-            total += Self::estimate_tokens(&message.content);
-        }
-        self.used_tokens = total;
-
-        debug!("Recalculated tokens after thinning: {} tokens", total);
-    }
-
-    /// Helper function to find the end of a JSON object
+    /// Find the end position of a JSON object.
     pub fn find_json_end(json_str: &str) -> Option<usize> {
         let mut brace_count = 0;
         let mut in_string = false;
@@ -790,6 +717,35 @@ Format this as a detailed but concise summary that can be used to resume the con
     }
 }
 
+// ============================================================================
+// Helper Types
+// ============================================================================
+
+/// Messages preserved across compaction.
+struct PreservedMessages {
+    system_prompt: Option<Message>,
+    readme: Option<Message>,
+}
+
+impl ThinResult {
+    /// Create a ThinResult indicating no changes were made.
+    fn no_changes(scope: ThinScope, percentage: u32) -> Self {
+        Self {
+            scope,
+            before_percentage: percentage,
+            after_percentage: percentage,
+            leaned_count: 0,
+            tool_call_leaned_count: 0,
+            chars_saved: 0,
+            had_changes: false,
+        }
+    }
+}
+
+// ============================================================================
+// Tests
+// ============================================================================
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -836,20 +792,20 @@ mod tests {
     #[test]
     fn test_should_thin_thresholds() {
         let mut cw = ContextWindow::new(100);
-        
+
         // Below 50% - should not thin
         cw.used_tokens = 49;
         assert!(!cw.should_thin());
-        
+
         // At 50% - should thin (first time)
         cw.used_tokens = 50;
         assert!(cw.should_thin());
-        
+
         // After thinning at 50%, shouldn't thin again until 60%
         cw.last_thinning_percentage = 50;
         cw.used_tokens = 55;
         assert!(!cw.should_thin());
-        
+
         // At 60% - should thin again
         cw.used_tokens = 60;
         assert!(cw.should_thin());
@@ -859,7 +815,6 @@ mod tests {
     fn test_estimate_tokens_regular_text() {
         let text = "Hello world, this is a test.";
         let tokens = ContextWindow::estimate_tokens(text);
-        // ~28 chars / 4 * 1.1 = ~8 tokens
         assert!(tokens > 0 && tokens < 20);
     }
 
@@ -867,7 +822,6 @@ mod tests {
     fn test_estimate_tokens_code() {
         let code = "fn main() { println!(\"hello\"); }";
         let tokens = ContextWindow::estimate_tokens(code);
-        // Code uses 3 chars per token estimate
         assert!(tokens > 0);
     }
 
diff --git a/crates/g3-core/src/streaming_parser.rs b/crates/g3-core/src/streaming_parser.rs
index 9150d38..7d3ebe6 100644
--- a/crates/g3-core/src/streaming_parser.rs
+++ b/crates/g3-core/src/streaming_parser.rs
@@ -20,20 +20,285 @@ const TOOL_CALL_PATTERNS: [&str; 4] = [
     r#"{ "tool" :"#,
 ];
 
-/// Search direction for tool call pattern matching.
-#[derive(Clone, Copy, PartialEq)]
-enum SearchDirection {
-    Forward,
-    Backward,
+// ============================================================================
+// Code Fence Tracking
+// ============================================================================
+
+/// Tracks whether we're inside a markdown code fence (``` block).
+///
+/// Used during streaming to avoid parsing JSON examples inside code blocks
+/// as tool calls.
+#[derive(Debug, Default)]
+struct CodeFenceTracker {
+    /// Whether we're currently inside a code fence
+    in_fence: bool,
+    /// Buffer for the current incomplete line (text since last newline)
+    current_line: String,
 }
 
+impl CodeFenceTracker {
+    fn new() -> Self {
+        Self::default()
+    }
+
+    /// Update fence state based on new streaming content.
+    fn process(&mut self, content: &str) {
+        for ch in content.chars() {
+            if ch == '\n' {
+                self.check_and_toggle_fence();
+                self.current_line.clear();
+            } else {
+                self.current_line.push(ch);
+            }
+        }
+    }
+
+    /// Check if current_line is a code fence marker and toggle state if so.
+    fn check_and_toggle_fence(&mut self) {
+        let trimmed = self.current_line.trim_start();
+        if trimmed.starts_with("```") && trimmed.chars().take_while(|&c| c == '`').count() >= 3 {
+            self.in_fence = !self.in_fence;
+            debug!(
+                "Code fence toggled: in_fence={} (line: {:?})",
+                self.in_fence, self.current_line
+            );
+        }
+    }
+
+    fn is_in_fence(&self) -> bool {
+        self.in_fence
+    }
+
+    fn reset(&mut self) {
+        self.in_fence = false;
+        self.current_line.clear();
+    }
+}
+
+/// Find all code fence ranges in text (for batch processing).
+///
+/// Returns a vector of (start, end) byte positions where code fences are.
+/// Each range represents content INSIDE a fence (between ``` markers).
+fn find_code_fence_ranges(text: &str) -> Vec<(usize, usize)> {
+    let mut ranges = Vec::new();
+    let mut in_fence = false;
+    let mut fence_start = 0;
+    let mut line_start = 0;
+
+    for (i, ch) in text.char_indices() {
+        if ch == '\n' {
+            let line = &text[line_start..i];
+            let trimmed = line.trim_start();
+
+            if trimmed.starts_with("```")
+                && trimmed.chars().take_while(|&c| c == '`').count() >= 3
+            {
+                if in_fence {
+                    ranges.push((fence_start, line_start));
+                    in_fence = false;
+                } else {
+                    fence_start = i + 1; // +1 to skip the newline
+                    in_fence = true;
+                }
+            }
+            line_start = i + 1;
+        }
+    }
+
+    // If we ended while still in a fence, include everything to the end
+    if in_fence {
+        ranges.push((fence_start, text.len()));
+    }
+
+    ranges
+}
+
+fn is_position_in_fence_ranges(pos: usize, ranges: &[(usize, usize)]) -> bool {
+    ranges.iter().any(|(start, end)| pos >= *start && pos < *end)
+}
+
+// ============================================================================
+// JSON Parsing Utilities
+// ============================================================================
+
+/// Find the end position (byte index) of a complete JSON object in the text.
+/// Returns None if no complete JSON object is found.
+fn find_json_object_end(text: &str) -> Option<usize> {
+    let mut brace_count = 0;
+    let mut in_string = false;
+    let mut escape_next = false;
+    let mut found_start = false;
+
+    for (i, ch) in text.char_indices() {
+        if escape_next {
+            escape_next = false;
+            continue;
+        }
+
+        match ch {
+            '\\' => escape_next = true,
+            '"' => in_string = !in_string,
+            '{' if !in_string => {
+                brace_count += 1;
+                found_start = true;
+            }
+            '}' if !in_string => {
+                brace_count -= 1;
+                if brace_count == 0 && found_start {
+                    return Some(i);
+                }
+            }
+            _ => {}
+        }
+    }
+
+    None
+}
+
+/// Check if a partial JSON tool call has been invalidated by subsequent content.
+///
+/// Detects two invalidation cases:
+/// 1. Unescaped newline inside a JSON string (invalid JSON)
+/// 2. Newline followed by non-JSON prose (e.g., regular text, not `"`, `{`, `}`, etc.)
+fn is_json_invalidated(json_text: &str) -> bool {
+    let mut in_string = false;
+    let mut escape_next = false;
+    let mut chars = json_text.char_indices().peekable();
+
+    while let Some((_, ch)) = chars.next() {
+        if escape_next {
+            escape_next = false;
+            continue;
+        }
+
+        match ch {
+            '\\' => escape_next = true,
+            '"' => in_string = !in_string,
+            '\n' if in_string => return true, // Unescaped newline in string = invalid
+            '\n' if !in_string => {
+                // Skip whitespace after newline
+                while let Some(&(_, next_ch)) = chars.peek() {
+                    if next_ch != ' ' && next_ch != '\t' {
+                        break;
+                    }
+                    chars.next();
+                }
+
+                // Check if next char is valid JSON continuation
+                if let Some(&(_, next_ch)) = chars.peek() {
+                    let valid_json_char = matches!(
+                        next_ch,
+                        '"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | '0'..='9' | 't' | 'f' | 'n' | '\n'
+                    );
+                    if !valid_json_char {
+                        return true;
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+
+    false
+}
+
+/// Detect malformed tool calls where LLM prose leaked into JSON keys.
+///
+/// When the LLM "stutters" or mixes formats, it sometimes emits JSON where
+/// the keys are actually fragments of conversational text rather than valid
+/// parameter names.
+fn args_contain_prose_fragments(args: &serde_json::Map<String, serde_json::Value>) -> bool {
+    const PROSE_MARKERS: &[&str] = &[
+        "I'll", "Let me", "Here's", "I can", "I need", "First", "Now", "The ",
+    ];
+
+    args.keys().any(|key| {
+        key.len() > 100
+            || key.contains('\n')
+            || PROSE_MARKERS.iter().any(|marker| key.contains(marker))
+    })
+}
+
+// ============================================================================
+// Tool Call Pattern Matching
+// ============================================================================
+
+/// Check if a position in text is "on its own line" - meaning it's either
+/// at the start of the text, or preceded by a newline with only whitespace
+/// between the newline and the position.
+fn is_on_own_line(text: &str, pos: usize) -> bool {
+    if pos == 0 {
+        return true;
+    }
+    let line_start = text[..pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
+    text[line_start..pos].chars().all(|c| c.is_whitespace())
+}
+
+/// Find the first tool call pattern that appears on its own line.
+fn find_first_tool_call_start(text: &str) -> Option<usize> {
+    find_tool_call_start(text, false)
+}
+
+/// Find the last tool call pattern that appears on its own line.
+fn find_last_tool_call_start(text: &str) -> Option<usize> {
+    find_tool_call_start(text, true)
+}
+
+/// Find a tool call pattern in text, optionally searching backwards.
+/// Only matches patterns on their own line (at start or after newline + whitespace).
+fn find_tool_call_start(text: &str, find_last: bool) -> Option<usize> {
+    let mut best_pos: Option<usize> = None;
+
+    for pattern in &TOOL_CALL_PATTERNS {
+        if find_last {
+            // Search backwards
+            let mut search_end = text.len();
+            while search_end > 0 {
+                if let Some(pos) = text[..search_end].rfind(pattern) {
+                    if is_on_own_line(text, pos) {
+                        if best_pos.map_or(true, |best| pos > best) {
+                            best_pos = Some(pos);
+                        }
+                        break;
+                    }
+                    search_end = pos;
+                } else {
+                    break;
+                }
+            }
+        } else {
+            // Search forwards
+            let mut search_start = 0;
+            while search_start < text.len() {
+                if let Some(rel) = text[search_start..].find(pattern) {
+                    let pos = search_start + rel;
+                    if is_on_own_line(text, pos) {
+                        if best_pos.map_or(true, |best| pos < best) {
+                            best_pos = Some(pos);
+                        }
+                        break;
+                    }
+                    search_start = pos + 1;
+                } else {
+                    break;
+                }
+            }
+        }
+    }
+
+    best_pos
+}
+
+// ============================================================================
+// StreamingToolParser
+// ============================================================================
+
 /// Modern streaming tool parser that properly handles native tool calls and SSE chunks.
 #[derive(Debug)]
 pub struct StreamingToolParser {
     /// Buffer for accumulating text content
     text_buffer: String,
     /// Position in text_buffer up to which tool calls have been consumed/executed.
-    /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools.
     last_consumed_position: usize,
     /// Whether we've received a message_stop event
     message_stopped: bool,
@@ -41,13 +306,8 @@ pub struct StreamingToolParser {
     in_json_tool_call: bool,
     /// Start position of JSON tool call (for fallback parsing)
     json_tool_start: Option<usize>,
-    /// Whether we're currently inside a code fence (``` block)
-    /// When true, tool call detection is disabled to prevent false positives
-    /// from JSON examples in code blocks.
-    in_code_fence: bool,
-    /// Buffer for the current incomplete line (text since last newline)
-    /// Used to detect code fence markers which must be at line start.
-    current_line: String,
+    /// Tracks code fence state during streaming
+    fence_tracker: CodeFenceTracker,
 }
 
 impl Default for StreamingToolParser {
@@ -64,125 +324,28 @@ impl StreamingToolParser {
             message_stopped: false,
             in_json_tool_call: false,
             json_tool_start: None,
-            in_code_fence: false,
-            current_line: String::new(),
+            fence_tracker: CodeFenceTracker::new(),
         }
     }
 
-    /// Find a tool call pattern in text, searching in the specified direction.
-    /// Only matches patterns on their own line (at start or after newline + whitespace).
-    fn find_tool_call_start(text: &str, direction: SearchDirection) -> Option<usize> {
-        let mut best_start: Option<usize> = None;
-
-        for pattern in &TOOL_CALL_PATTERNS {
-            match direction {
-                SearchDirection::Forward => {
-                    let mut search_start = 0;
-                    while search_start < text.len() {
-                        if let Some(rel) = text[search_start..].find(pattern) {
-                            let pos = search_start + rel;
-                            if Self::is_on_own_line(text, pos) {
-                                if best_start.map_or(true, |best| pos < best) {
-                                    best_start = Some(pos);
-                                }
-                                break;
-                            }
-                            search_start = pos + 1;
-                        } else {
-                            break;
-                        }
-                    }
-                }
-                SearchDirection::Backward => {
-                    let mut search_end = text.len();
-                    while search_end > 0 {
-                        if let Some(pos) = text[..search_end].rfind(pattern) {
-                            if Self::is_on_own_line(text, pos) {
-                                if best_start.map_or(true, |best| pos > best) {
-                                    best_start = Some(pos);
-                                }
-                                break;
-                            }
-                            search_end = pos;
-                        } else {
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        best_start
-    }
-
-    /// Find the starting position of the FIRST tool call pattern on its own line.
-    pub fn find_first_tool_call_start(text: &str) -> Option<usize> {
-        Self::find_tool_call_start(text, SearchDirection::Forward)
-    }
-
-    /// Find the starting position of the LAST tool call pattern on its own line.
-    pub fn find_last_tool_call_start(text: &str) -> Option<usize> {
-        Self::find_tool_call_start(text, SearchDirection::Backward)
-    }
-
-    /// Check if a position in text is "on its own line" - meaning it's either
-    /// at the start of the text, or preceded by a newline with only whitespace
-    /// between the newline and the position.
-    pub fn is_on_own_line(text: &str, pos: usize) -> bool {
-        if pos == 0 {
-            return true;
-        }
-        // Find the start of the current line (position after the last newline before pos)
-        let line_start = text[..pos].rfind('\n').map(|p| p + 1).unwrap_or(0);
-        // Check if everything between line_start and pos is whitespace
-        text[line_start..pos].chars().all(|c| c.is_whitespace())
-    }
-
-    /// Detect malformed tool calls where LLM prose leaked into JSON keys.
-    ///
-    /// When the LLM "stutters" or mixes formats, it sometimes emits JSON where
-    /// the keys are actually fragments of conversational text rather than valid
-    /// parameter names. This heuristic catches such cases by looking for:
-    /// - Unusually long keys (>100 chars)
-    /// - Newlines in keys (never valid in JSON keys)
-    /// - Common LLM response phrases that indicate prose, not parameters
-    fn args_contain_prose_fragments(args: &serde_json::Map<String, serde_json::Value>) -> bool {
-        const PROSE_MARKERS: &[&str] = &[
-            "I'll", "Let me", "Here's", "I can", "I need", "First", "Now", "The ",
-        ];
-
-        args.keys().any(|key| {
-            key.len() > 100
-                || key.contains('\n')
-                || PROSE_MARKERS.iter().any(|marker| key.contains(marker))
-        })
-    }
-
     /// Process a streaming chunk and return completed tool calls if any.
     pub fn process_chunk(&mut self, chunk: &g3_providers::CompletionChunk) -> Vec<ToolCall> {
         let mut completed_tools = Vec::new();
 
         // Add text content to buffer and track code fence state
         if !chunk.content.is_empty() {
-            // Update code fence state based on new content
-            self.update_code_fence_state(&chunk.content);
-            
+            self.fence_tracker.process(&chunk.content);
             self.text_buffer.push_str(&chunk.content);
         }
 
-        // Handle native tool calls - return them immediately when received.
-        // This allows tools to be executed as soon as they're fully parsed,
-        // preventing duplicate tool calls from being accumulated.
+        // Handle native tool calls - return them immediately when received
         if let Some(ref tool_calls) = chunk.tool_calls {
             debug!("Received native tool calls: {:?}", tool_calls);
-
-            // Convert and return tool calls immediately
             for tool_call in tool_calls {
-                let converted_tool = ToolCall {
+                completed_tools.push(ToolCall {
                     tool: tool_call.tool.clone(),
                     args: tool_call.args.clone(),
-                };
-                completed_tools.push(converted_tool);
+                });
             }
         }
 
@@ -193,7 +356,7 @@ impl StreamingToolParser {
 
             // When stream finishes, find ALL JSON tool calls in the accumulated buffer
             if completed_tools.is_empty() && !self.text_buffer.is_empty() {
-                let all_tools = self.try_parse_all_json_tool_calls_from_buffer();
+                let all_tools = self.parse_all_json_tool_calls();
                 if !all_tools.is_empty() {
                     debug!(
                         "Found {} JSON tool calls in buffer at stream end",
@@ -204,98 +367,72 @@ impl StreamingToolParser {
             }
         }
 
-        // Fallback: Try to parse JSON tool calls from current chunk content if no native tool calls
-        // IMPORTANT: Skip JSON fallback parsing when inside a code fence to prevent
-        // false positives from JSON examples in code blocks.
-        if completed_tools.is_empty() && !chunk.content.is_empty() && !chunk.finished {
-            if !self.in_code_fence {
-                if let Some(json_tool) = self.try_parse_json_tool_call(&chunk.content) {
-                    completed_tools.push(json_tool);
-                }
+        // Fallback: Try to parse JSON tool calls from current chunk content if no native tool calls.
+        // Skip when inside a code fence to prevent false positives from JSON examples.
+        if completed_tools.is_empty()
+            && !chunk.content.is_empty()
+            && !chunk.finished
+            && !self.fence_tracker.is_in_fence()
+        {
+            if let Some(json_tool) = self.try_parse_streaming_json_tool_call() {
+                completed_tools.push(json_tool);
             }
         }
 
         completed_tools
     }
 
-    /// Fallback method to parse JSON tool calls from text content.
-    /// 
-    /// This method maintains state (`in_json_tool_call`, `json_tool_start`) to track
-    /// partial JSON tool calls across streaming chunks. When a pattern like `{"tool":`
-    /// is found on its own line, we enter "in JSON tool call" mode and wait for the
-    /// JSON to complete.
-    /// 
-    /// IMPORTANT: We must also detect when the JSON has been **invalidated** - i.e.,
-    /// when subsequent content makes it clear this isn't a real tool call. For example:
-    /// - `{"tool": "read_file` followed by `\nsome regular text` is NOT a tool call
-    /// - The newline followed by non-JSON text invalidates the partial JSON
-    fn try_parse_json_tool_call(&mut self, _content: &str) -> Option<ToolCall> {
-        // Pre-compute fence ranges to skip tool calls inside code fences
-        let fence_ranges = Self::find_code_fence_ranges(&self.text_buffer);
-        
-        // If we're not currently in a JSON tool call, look for the start
+    /// Try to parse a JSON tool call from the streaming buffer.
+    ///
+    /// Maintains state (`in_json_tool_call`, `json_tool_start`) to track
+    /// partial JSON tool calls across streaming chunks.
+    fn try_parse_streaming_json_tool_call(&mut self) -> Option<ToolCall> {
+        let fence_ranges = find_code_fence_ranges(&self.text_buffer);
+
+        // If not currently in a JSON tool call, look for the start
         if !self.in_json_tool_call {
-            // Only search in the unconsumed portion of the buffer to avoid
-            // re-parsing already-executed tool calls
             let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
-            // Use find_first_tool_call_start to find the FIRST tool call, not the last.
-            // This ensures we process tool calls in order when multiple arrive together.
-            if let Some(relative_pos) = Self::find_first_tool_call_start(unchecked_buffer) {
+            if let Some(relative_pos) = find_first_tool_call_start(unchecked_buffer) {
                 let pos = self.last_consumed_position + relative_pos;
-                
-                // Skip if this position is inside a code fence
-                if Self::is_position_in_fence_ranges(pos, &fence_ranges) {
+
+                // Skip if inside a code fence
+                if is_position_in_fence_ranges(pos, &fence_ranges) {
                     debug!("Skipping tool call at position {} - inside code fence", pos);
                     return None;
                 }
-                
-                debug!("Found JSON tool call pattern at position {} (relative: {})", pos, relative_pos);
+
+                debug!(
+                    "Found JSON tool call pattern at position {} (relative: {})",
+                    pos, relative_pos
+                );
                 self.in_json_tool_call = true;
                 self.json_tool_start = Some(pos);
             }
         }
 
-        // If we're in a JSON tool call, try to find the end and parse it
+        // If in a JSON tool call, try to find the end and parse it
         if self.in_json_tool_call {
             if let Some(start_pos) = self.json_tool_start {
                 let json_text = &self.text_buffer[start_pos..];
 
                 // Try to find a complete JSON object
-                if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
+                if let Some(end_pos) = find_json_object_end(json_text) {
                     let json_str = &json_text[..=end_pos];
                     debug!("Attempting to parse JSON tool call: {}", json_str);
 
-                    // Try to parse as a ToolCall
-                    if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
-                        // Validate that args is an object with reasonable keys
-                        if let Some(args_obj) = tool_call.args.as_object() {
-                            if Self::args_contain_prose_fragments(args_obj) {
-                                debug!(
-                                    "Detected malformed tool call with message-like keys, skipping"
-                                );
-                                self.in_json_tool_call = false;
-                                self.json_tool_start = None;
-                                return None;
-                            }
-
-                            debug!("Successfully parsed valid JSON tool call: {:?}", tool_call);
-                            self.in_json_tool_call = false;
-                            self.json_tool_start = None;
-                            return Some(tool_call);
-                        }
-                        debug!("Tool call args is not an object, skipping");
-                    } else {
-                        debug!("Failed to parse JSON tool call: {}", json_str);
+                    if let Some(tool_call) = self.try_parse_tool_call_json(json_str) {
+                        self.in_json_tool_call = false;
+                        self.json_tool_start = None;
+                        return Some(tool_call);
                     }
-                    // Reset and continue looking
+
+                    // Parse failed, reset and continue looking
                     self.in_json_tool_call = false;
                     self.json_tool_start = None;
                 }
-                
-                // If we didn't find a complete JSON object, check if the partial JSON
-                // has been invalidated by subsequent content (e.g., a newline followed
-                // by regular text when not inside a string).
-                if self.in_json_tool_call && Self::is_json_invalidated(json_text) {
+
+                // Check if the partial JSON has been invalidated
+                if self.in_json_tool_call && is_json_invalidated(json_text) {
                     debug!("JSON tool call invalidated by subsequent content, clearing state");
                     self.in_json_tool_call = false;
                     self.json_tool_start = None;
@@ -309,61 +446,64 @@ impl StreamingToolParser {
     }
 
     /// Parse ALL JSON tool calls from the accumulated text buffer.
-    /// This finds all complete tool calls, not just the last one.
-    fn try_parse_all_json_tool_calls_from_buffer(&self) -> Vec<ToolCall> {
+    fn parse_all_json_tool_calls(&self) -> Vec<ToolCall> {
         let mut tool_calls = Vec::new();
         let mut search_start = 0;
-        
-        // Pre-compute fence ranges to know which positions are inside code fences
-        // This is more efficient than re-scanning for each potential tool call
-        let fence_ranges = Self::find_code_fence_ranges(&self.text_buffer);
+        let fence_ranges = find_code_fence_ranges(&self.text_buffer);
 
         while search_start < self.text_buffer.len() {
             let search_text = &self.text_buffer[search_start..];
 
-            // Find the next tool call pattern
-            if let Some(relative_pos) = Self::find_first_tool_call_start(search_text) {
-                let abs_start = search_start + relative_pos;
-                let json_text = &self.text_buffer[abs_start..];
-                
-                // Skip if this position is inside a code fence
-                if Self::is_position_in_fence_ranges(abs_start, &fence_ranges) {
-                    // Move past this position and continue searching
-                    search_start = abs_start + 1;
-                    continue;
-                }
-
-                // Try to find a complete JSON object
-                if let Some(end_pos) = Self::find_complete_json_object_end(json_text) {
-                    let json_str = &json_text[..=end_pos];
-
-                    if let Ok(tool_call) = serde_json::from_str::<ToolCall>(json_str) {
-                        if let Some(args_obj) = tool_call.args.as_object() {
-                            if !Self::args_contain_prose_fragments(args_obj) {
-                                debug!(
-                                    "Found tool call at position {}: {:?}",
-                                    abs_start, tool_call.tool
-                                );
-                                tool_calls.push(tool_call);
-                            }
-                        }
-                    }
-                    // Move past this tool call
-                    search_start = abs_start + end_pos + 1;
-                } else {
-                    // Incomplete JSON, stop searching
-                    break;
-                }
-            } else {
-                // No more tool call patterns found
+            let Some(relative_pos) = find_first_tool_call_start(search_text) else {
                 break;
+            };
+
+            let abs_start = search_start + relative_pos;
+            let json_text = &self.text_buffer[abs_start..];
+
+            // Skip if inside a code fence
+            if is_position_in_fence_ranges(abs_start, &fence_ranges) {
+                search_start = abs_start + 1;
+                continue;
             }
+
+            // Try to find a complete JSON object
+            let Some(end_pos) = find_json_object_end(json_text) else {
+                break; // Incomplete JSON, stop searching
+            };
+
+            let json_str = &json_text[..=end_pos];
+            if let Some(tool_call) = self.try_parse_tool_call_json(json_str) {
+                debug!("Found tool call at position {}: {:?}", abs_start, tool_call.tool);
+                tool_calls.push(tool_call);
+            }
+
+            search_start = abs_start + end_pos + 1;
         }
 
         tool_calls
     }
 
-    /// Get the accumulated text content (excluding tool calls).
+    /// Try to parse a JSON string as a ToolCall, validating the args.
+    fn try_parse_tool_call_json(&self, json_str: &str) -> Option<ToolCall> {
+        let tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
+
+        // Validate that args is an object with reasonable keys
+        let args_obj = tool_call.args.as_object()?;
+        if args_contain_prose_fragments(args_obj) {
+            debug!("Detected malformed tool call with message-like keys, skipping");
+            return None;
+        }
+
+        debug!("Successfully parsed valid JSON tool call: {:?}", tool_call);
+        Some(tool_call)
+    }
+
+    // ========================================================================
+    // Public Accessors
+    // ========================================================================
+
+    /// Get the accumulated text content.
     pub fn get_text_content(&self) -> &str {
         &self.text_buffer
     }
@@ -383,246 +523,43 @@ impl StreamingToolParser {
     }
 
     /// Check if the text buffer contains an incomplete JSON tool call.
-    /// This detects cases where the LLM started emitting a tool call but the stream ended
-    /// before the JSON was complete (truncated output).
     pub fn has_incomplete_tool_call(&self) -> bool {
-        // Only check the unconsumed portion of the buffer
         let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
-        if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
-            let json_text = &unchecked_buffer[start_pos..];
-            // If JSON is complete, it's not incomplete
-            if Self::find_complete_json_object_end(json_text).is_some() {
-                return false;
-            }
-            // If JSON has been invalidated by subsequent content, it's not a real tool call
-            if Self::is_json_invalidated(json_text) {
-                return false;
-            }
-            // Otherwise, it's a genuinely incomplete tool call
-            true
-        } else {
-            false
+        let Some(start_pos) = find_last_tool_call_start(unchecked_buffer) else {
+            return false;
+        };
+
+        let json_text = &unchecked_buffer[start_pos..];
+
+        // Complete or invalidated = not incomplete
+        if find_json_object_end(json_text).is_some() || is_json_invalidated(json_text) {
+            return false;
         }
+
+        true
     }
 
     /// Check if the text buffer contains an unexecuted tool call.
-    /// This detects cases where the LLM emitted a complete tool call JSON
-    /// but it wasn't parsed/executed (e.g., due to parsing issues).
     pub fn has_unexecuted_tool_call(&self) -> bool {
-        // Only check the unconsumed portion of the buffer
         let unchecked_buffer = &self.text_buffer[self.last_consumed_position..];
-        if let Some(start_pos) = Self::find_last_tool_call_start(unchecked_buffer) {
-            let json_text = &unchecked_buffer[start_pos..];
-            // If the JSON IS complete, it means there's an unexecuted tool call
-            if let Some(json_end) = Self::find_complete_json_object_end(json_text) {
-                let json_only = &json_text[..=json_end];
-                return serde_json::from_str::<serde_json::Value>(json_only).is_ok();
-            }
-        }
-        false
+        let Some(start_pos) = find_last_tool_call_start(unchecked_buffer) else {
+            return false;
+        };
+
+        let json_text = &unchecked_buffer[start_pos..];
+        let Some(json_end) = find_json_object_end(json_text) else {
+            return false;
+        };
+
+        let json_only = &json_text[..=json_end];
+        serde_json::from_str::<serde_json::Value>(json_only).is_ok()
     }
 
     /// Mark all tool calls up to the current buffer position as consumed/executed.
-    /// This prevents has_unexecuted_tool_call() from returning true for already-executed tools.
     pub fn mark_tool_calls_consumed(&mut self) {
         self.last_consumed_position = self.text_buffer.len();
     }
 
-    /// Check if a partial JSON tool call has been invalidated by subsequent content.
-    ///
-    /// Detects two invalidation cases:
-    /// 1. Unescaped newline inside a JSON string (invalid JSON)
-    /// 2. Newline followed by non-JSON prose (e.g., regular text, not `"`, `{`, `}`, etc.)
-    fn is_json_invalidated(json_text: &str) -> bool {
-        let mut in_string = false;
-        let mut escape_next = false;
-        let mut chars = json_text.char_indices().peekable();
-        
-        while let Some((_, ch)) = chars.next() {
-            if escape_next {
-                escape_next = false;
-                continue;
-            }
-            
-            match ch {
-                '\\' => escape_next = true,
-                '"' => in_string = !in_string,
-                // Unescaped newline inside a string is invalid JSON
-                '\n' if in_string => return true,
-                '\n' if !in_string => {
-                    // Skip whitespace after newline
-                    while let Some(&(_, next_ch)) = chars.peek() {
-                        if next_ch != ' ' && next_ch != '\t' {
-                            break
-                        }
-                        chars.next();
-                    }
-
-                    // Check if next char is valid JSON continuation
-                    if let Some(&(_, next_ch)) = chars.peek() {
-                        // Valid: ", {, }, [, ], :, ,, -, digits, t/f/n (true/false/null), newline
-                        let valid_json_char = matches!(
-                            next_ch,
-                            '"' | '{' | '}' | '[' | ']' | ':' | ',' | '-' | '0'..='9' | 't' | 'f' | 'n' | '\n'
-                        );
-                        if !valid_json_char {
-                            return true;
-                        }
-                    }
-                }
-                _ => {}
-            }
-        }
-
-        false
-    }
-
-    /// Find the end position (byte index) of a complete JSON object in the text.
-    /// Returns None if no complete JSON object is found.
-    pub fn find_complete_json_object_end(text: &str) -> Option<usize> {
-        let mut brace_count = 0;
-        let mut in_string = false;
-        let mut escape_next = false;
-        let mut found_start = false;
-
-        for (i, ch) in text.char_indices() {
-            if escape_next {
-                escape_next = false;
-                continue;
-            }
-
-            match ch {
-                '\\' => escape_next = true,
-                '"' => in_string = !in_string,
-                '{' if !in_string => {
-                    brace_count += 1;
-                    found_start = true;
-                }
-                '}' if !in_string => {
-                    brace_count -= 1;
-                    if brace_count == 0 && found_start {
-                        return Some(i); // Return the byte index of the closing brace
-                    }
-                }
-                _ => {}
-            }
-        }
-
-        None // No complete JSON object found
-    }
-
-    /// Find all code fence ranges in the given text.
-    ///
-    /// Returns a vector of (start, end) byte positions where code fences are.
-    /// Each range represents content INSIDE a fence (between ``` markers).
-    fn find_code_fence_ranges(text: &str) -> Vec<(usize, usize)> {
-        let mut ranges = Vec::new();
-        let mut in_fence = false;
-        let mut fence_start = 0;
-        let mut line_start = 0;
-        
-        for (i, ch) in text.char_indices() {
-            if ch == '\n' {
-                // Check if the line we just finished is a fence marker
-                let line = &text[line_start..i];
-                let trimmed = line.trim_start();
-                
-                if trimmed.starts_with("```") {
-                    let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
-                    if backtick_count >= 3 {
-                        if in_fence {
-                            // Closing fence - record the range
-                            // The range is from after the opening fence line to before this line
-                            ranges.push((fence_start, line_start));
-                            in_fence = false;
-                        } else {
-                            // Opening fence - mark the start (after this line)
-                            fence_start = i + 1; // +1 to skip the newline
-                            in_fence = true;
-                        }
-                    }
-                }
-                
-                line_start = i + 1;
-            }
-        }
-        
-        // If we ended while still in a fence, include everything to the end
-        if in_fence {
-            ranges.push((fence_start, text.len()));
-        }
-        
-        ranges
-    }
-    
-    /// Check if a position is inside any of the fence ranges.
-    fn is_position_in_fence_ranges(pos: usize, ranges: &[(usize, usize)]) -> bool {
-        ranges.iter().any(|(start, end)| pos >= *start && pos < *end)
-    }
-
-    /// Update code fence state based on new content.
-    ///
-    /// Tracks whether we're inside a markdown code fence (``` block).
-    /// Code fences toggle state when we see a line that starts with ```
-    /// (with optional leading whitespace).
-    ///
-    /// This is intentionally simple - we don't try to handle:
-    /// - Fences inside comments (// ```, # ```, etc.)
-    /// - Nested fences (which aren't valid markdown anyway)
-    /// - Indented code blocks (4 spaces)
-    ///
-    /// The simple approach handles 90%+ of real-world cases where LLMs
-    /// show JSON examples in code blocks.
-    fn update_code_fence_state(&mut self, content: &str) {
-        for ch in content.chars() {
-            if ch == '\n' {
-                // End of line - check if this line is a fence marker
-                self.check_and_toggle_fence();
-                self.current_line.clear();
-            } else {
-                self.current_line.push(ch);
-            }
-        }
-    }
-
-    /// Check if current_line is a code fence marker and toggle state if so.
-    ///
-    /// A fence marker is a line that starts with ``` (after optional whitespace).
-    /// The fence can have a language tag (```json, ```rust, etc.) which we ignore.
-    fn check_and_toggle_fence(&mut self) {
-        let trimmed = self.current_line.trim_start();
-        
-        // Must start with at least 3 backticks
-        if !trimmed.starts_with("```") {
-            return;
-        }
-        
-        // Count consecutive backticks
-        let backtick_count = trimmed.chars().take_while(|&c| c == '`').count();
-        if backtick_count < 3 {
-            return;
-        }
-        
-        // This is a fence marker - toggle state
-        self.in_code_fence = !self.in_code_fence;
-        debug!(
-            "Code fence toggled: in_code_fence={} (line: {:?})",
-            self.in_code_fence,
-            self.current_line
-        );
-    }
-
-    /// Reset the parser state for a new message.
-    pub fn reset(&mut self) {
-        self.text_buffer.clear();
-        self.last_consumed_position = 0;
-        self.message_stopped = false;
-        self.in_json_tool_call = false;
-        self.json_tool_start = None;
-        self.in_code_fence = false;
-        self.current_line = String::new();
-    }
-
     /// Get the current text buffer length (for position tracking).
     pub fn text_buffer_len(&self) -> usize {
         self.text_buffer.len()
@@ -637,8 +574,46 @@ impl StreamingToolParser {
     pub fn json_tool_start_position(&self) -> Option<usize> {
         self.json_tool_start
     }
+
+    /// Reset the parser state for a new message.
+    pub fn reset(&mut self) {
+        self.text_buffer.clear();
+        self.last_consumed_position = 0;
+        self.message_stopped = false;
+        self.in_json_tool_call = false;
+        self.json_tool_start = None;
+        self.fence_tracker.reset();
+    }
+
+    // ========================================================================
+    // Static Methods (for external use)
+    // ========================================================================
+
+    /// Find the starting position of the FIRST tool call pattern on its own line.
+    pub fn find_first_tool_call_start(text: &str) -> Option<usize> {
+        find_first_tool_call_start(text)
+    }
+
+    /// Find the starting position of the LAST tool call pattern on its own line.
+    pub fn find_last_tool_call_start(text: &str) -> Option<usize> {
+        find_last_tool_call_start(text)
+    }
+
+    /// Check if a position in text is "on its own line".
+    pub fn is_on_own_line(text: &str, pos: usize) -> bool {
+        is_on_own_line(text, pos)
+    }
+
+    /// Find the end position of a complete JSON object.
+    pub fn find_complete_json_object_end(text: &str) -> Option<usize> {
+        find_json_object_end(text)
+    }
 }
 
+// ============================================================================
+// Tests
+// ============================================================================
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -646,34 +621,27 @@ mod tests {
     #[test]
     fn test_find_complete_json_object_end_simple() {
         let text = r#"{"tool":"shell","args":{"command":"ls"}}"#;
-        assert_eq!(
-            StreamingToolParser::find_complete_json_object_end(text),
-            Some(text.len() - 1)
-        );
+        assert_eq!(find_json_object_end(text), Some(text.len() - 1));
     }
 
     #[test]
     fn test_find_complete_json_object_end_nested() {
         let text = r#"{"tool":"write","args":{"content":"{nested}"}}"#;
-        assert_eq!(
-            StreamingToolParser::find_complete_json_object_end(text),
-            Some(text.len() - 1)
-        );
+        assert_eq!(find_json_object_end(text), Some(text.len() - 1));
     }
 
     #[test]
     fn test_find_complete_json_object_end_incomplete() {
         let text = r#"{"tool":"shell","args":{"command":"ls""#;
-        assert_eq!(StreamingToolParser::find_complete_json_object_end(text), None);
+        assert_eq!(find_json_object_end(text), None);
     }
 
     #[test]
     fn test_tool_call_patterns() {
-        // Test that all patterns are detected
-        assert!(StreamingToolParser::find_first_tool_call_start(r#"{"tool":"test"}"#).is_some());
-        assert!(StreamingToolParser::find_first_tool_call_start(r#"{ "tool":"test"}"#).is_some());
-        assert!(StreamingToolParser::find_first_tool_call_start(r#"{"tool" :"test"}"#).is_some());
-        assert!(StreamingToolParser::find_first_tool_call_start(r#"{ "tool" :"test"}"#).is_some());
+        assert!(find_first_tool_call_start(r#"{"tool":"test"}"#).is_some());
+        assert!(find_first_tool_call_start(r#"{ "tool":"test"}"#).is_some());
+        assert!(find_first_tool_call_start(r#"{"tool" :"test"}"#).is_some());
+        assert!(find_first_tool_call_start(r#"{ "tool" :"test"}"#).is_some());
     }
 
     #[test]
@@ -692,11 +660,8 @@ mod tests {
 
     #[test]
     fn test_multiple_tool_calls_processed_in_order() {
-        // Test that when multiple tool calls arrive together, they are processed
-        // in order (first one first, not last one first)
         let mut parser = StreamingToolParser::new();
-        
-        // Simulate two tool calls arriving in the same chunk
+
         let content = r#"Some text before
 
 {"tool": "shell", "args": {"command": "first"}}
@@ -704,7 +669,7 @@ mod tests {
 {"tool": "shell", "args": {"command": "second"}}
 
 Some text after"#;
-        
+
         let chunk = g3_providers::CompletionChunk {
             content: content.to_string(),
             finished: true,
@@ -713,167 +678,129 @@ Some text after"#;
             stop_reason: None,
             tool_call_streaming: None,
         };
-        
-        let tools = parser.process_chunk(&chunk);
-        
-        // Should find both tool calls
-        assert_eq!(tools.len(), 2, "Expected 2 tool calls, got {}", tools.len());
-        
-        // First tool call should be "first", not "second"
-        assert_eq!(tools[0].tool, "shell");
-        assert_eq!(tools[0].args["command"], "first", 
-            "First tool call should have command 'first', got {:?}", tools[0].args);
-        
-        // Second tool call should be "second"
-        assert_eq!(tools[1].tool, "shell");
-        assert_eq!(tools[1].args["command"], "second",
-            "Second tool call should have command 'second', got {:?}", tools[1].args);
-    }
 
+        let tools = parser.process_chunk(&chunk);
+
+        assert_eq!(tools.len(), 2, "Expected 2 tool calls, got {}", tools.len());
+        assert_eq!(tools[0].args["command"], "first");
+        assert_eq!(tools[1].args["command"], "second");
+    }
 
     #[test]
     fn test_find_first_vs_last_tool_call() {
-        // Both tool calls are on their own lines
         let text = "{\"tool\": \"first\"}\n{\"tool\": \"second\"}";
-        
-        let first_pos = StreamingToolParser::find_first_tool_call_start(text);
-        let last_pos = StreamingToolParser::find_last_tool_call_start(text);
-        
-        assert!(first_pos.is_some(), "Should find first tool call");
-        assert!(last_pos.is_some(), "Should find last tool call");
-        assert!(first_pos.unwrap() < last_pos.unwrap(), 
-            "First position ({:?}) should be less than last position ({:?})", first_pos, last_pos);
+
+        let first_pos = find_first_tool_call_start(text);
+        let last_pos = find_last_tool_call_start(text);
+
+        assert!(first_pos.is_some());
+        assert!(last_pos.is_some());
+        assert!(first_pos.unwrap() < last_pos.unwrap());
     }
 
     #[test]
     fn test_inline_tool_call_ignored() {
-        // Tool call pattern inline with other text should NOT be detected
         let text = "Here is an example: {\"tool\": \"shell\"} in text";
-        assert!(StreamingToolParser::find_first_tool_call_start(text).is_none(),
-            "Inline tool call pattern should be ignored");
-        assert!(StreamingToolParser::find_last_tool_call_start(text).is_none(),
-            "Inline tool call pattern should be ignored");
+        assert!(find_first_tool_call_start(text).is_none());
+        assert!(find_last_tool_call_start(text).is_none());
     }
 
     #[test]
     fn test_standalone_tool_call_detected() {
-        // Tool call on its own line (at start of text) should be detected
         let text = r#"{"tool": "shell", "args": {"command": "ls"}}"#;
-        assert!(StreamingToolParser::find_first_tool_call_start(text).is_some(),
-            "Standalone tool call should be detected");
+        assert!(find_first_tool_call_start(text).is_some());
     }
 
     #[test]
     fn test_indented_tool_call_detected() {
-        // Tool call with leading whitespace should be detected
         let text = r#"  {"tool": "shell", "args": {"command": "ls"}}"#;
-        assert!(StreamingToolParser::find_first_tool_call_start(text).is_some(),
-            "Indented tool call should be detected");
+        assert!(find_first_tool_call_start(text).is_some());
     }
 
     #[test]
     fn test_tool_call_after_newline_detected() {
-        // Tool call after a newline should be detected
         let text = "Some prose here\n{\"tool\": \"shell\", \"args\": {}}";
-        let pos = StreamingToolParser::find_first_tool_call_start(text);
-        assert!(pos.is_some(), "Tool call after newline should be detected");
-        assert_eq!(pos.unwrap(), 16, "Should find tool call at position after newline");
+        let pos = find_first_tool_call_start(text);
+        assert!(pos.is_some());
+        assert_eq!(pos.unwrap(), 16);
     }
 
     #[test]
     fn test_inline_ignored_but_standalone_detected() {
-        // Mixed: inline on first line (ignored), standalone on second line (detected)
         let text = "Some text with {\"tool\": \"inline\"} here\n{\"tool\": \"standalone\", \"args\": {}}";
-        let pos = StreamingToolParser::find_first_tool_call_start(text);
-        assert!(pos.is_some(), "Should find the standalone tool call");
-        // The standalone one starts after the newline
-        assert!(pos.unwrap() > 30, "Should skip the inline pattern and find the standalone one");
+        let pos = find_first_tool_call_start(text);
+        assert!(pos.is_some());
+        assert!(pos.unwrap() > 30);
     }
 
     #[test]
     fn test_multiple_inline_patterns_all_ignored() {
-        // Multiple inline patterns on same line - all should be ignored
         let text = "Compare {\"tool\": \"a\"} with {\"tool\": \"b\"}";
-        assert!(StreamingToolParser::find_first_tool_call_start(text).is_none(),
-            "All inline patterns should be ignored");
+        assert!(find_first_tool_call_start(text).is_none());
     }
 
     #[test]
     fn test_is_on_own_line() {
-        // Test the is_on_own_line helper directly
         let text = "prefix {\"tool\":\n          {\"tool\":";
-        
-        // Position 0 is always on its own line
-        assert!(StreamingToolParser::is_on_own_line(text, 0));
-        
-        // Position 7 (after "prefix ") is NOT on its own line
-        assert!(!StreamingToolParser::is_on_own_line(text, 7));
-        
-        // Position after newline with only whitespace before pattern IS on its own line
+
+        assert!(is_on_own_line(text, 0));
+        assert!(!is_on_own_line(text, 7));
+
         let newline_pos = text.find('\n').unwrap();
-        assert!(StreamingToolParser::is_on_own_line(text, newline_pos + 11)); // 10 spaces before {
+        assert!(is_on_own_line(text, newline_pos + 11));
     }
 
     #[test]
     fn test_all_pattern_variants_require_own_line() {
-        // All whitespace variants should require their own line
         let patterns = [
             "text { \"tool\":\"x\"}",
             "text {\"tool\" :\"x\"}",
             "text { \"tool\" :\"x\"}",
         ];
         for pattern in patterns {
-            assert!(StreamingToolParser::find_first_tool_call_start(pattern).is_none(),
-                "Inline pattern '{}' should be ignored", pattern);
+            assert!(
+                find_first_tool_call_start(pattern).is_none(),
+                "Inline pattern '{}' should be ignored",
+                pattern
+            );
         }
     }
 
     #[test]
     fn test_find_code_fence_ranges_simple() {
         let text = "Before\n```\ncode\n```\nAfter";
-        let ranges = StreamingToolParser::find_code_fence_ranges(text);
-        
-        assert_eq!(ranges.len(), 1, "Should find one fence range");
-        
-        // The range should cover "code\n"
+        let ranges = find_code_fence_ranges(text);
+
+        assert_eq!(ranges.len(), 1);
         let (start, end) = ranges[0];
         let inside = &text[start..end];
-        assert!(inside.contains("code"), "Range should contain 'code', got: {:?}", inside);
+        assert!(inside.contains("code"));
     }
 
     #[test]
     fn test_find_code_fence_ranges_multiple() {
         let text = "First:\n```json\ncode1\n```\n\nSecond:\n```\ncode2\n```\nEnd";
-        let ranges = StreamingToolParser::find_code_fence_ranges(text);
-        
-        assert_eq!(ranges.len(), 2, "Should find two fence ranges, got {:?}", ranges);
+        let ranges = find_code_fence_ranges(text);
+        assert_eq!(ranges.len(), 2);
     }
 
     #[test]
     fn test_find_code_fence_ranges_with_tool_json() {
-        // Use a simple string without backticks to avoid raw string issues
         let text = "Example:\n```json\n{\"tool\": \"shell\", \"args\": {}}\n```\nDone.";
-        let ranges = StreamingToolParser::find_code_fence_ranges(text);
-        
-        assert_eq!(ranges.len(), 1, "Should find one fence range");
-        
-        // Find where the tool pattern would be
+        let ranges = find_code_fence_ranges(text);
+
+        assert_eq!(ranges.len(), 1);
+
         let tool_pos = text.find("{\"tool\"").unwrap();
-        
-        // The tool position should be inside the fence range
-        assert!(
-            StreamingToolParser::is_position_in_fence_ranges(tool_pos, &ranges),
-            "Tool pattern at {} should be inside fence range {:?}",
-            tool_pos, ranges
-        );
+        assert!(is_position_in_fence_ranges(tool_pos, &ranges));
     }
 
     #[test]
     fn test_is_position_in_fence_ranges() {
         let ranges = vec![(10, 20), (30, 40)];
-        assert!(!StreamingToolParser::is_position_in_fence_ranges(5, &ranges));
-        assert!(StreamingToolParser::is_position_in_fence_ranges(15, &ranges));
-        assert!(!StreamingToolParser::is_position_in_fence_ranges(25, &ranges));
-        assert!(StreamingToolParser::is_position_in_fence_ranges(35, &ranges));
+        assert!(!is_position_in_fence_ranges(5, &ranges));
+        assert!(is_position_in_fence_ranges(15, &ranges));
+        assert!(!is_position_in_fence_ranges(25, &ranges));
+        assert!(is_position_in_fence_ranges(35, &ranges));
     }
 }