refactoring

2025-12-26 15:16:12 +11:00
parent 7e59e181f7
commit 4c25e43ee4
3 changed files with 1551 additions and 1378 deletions
--- a/crates/g3-core/src/context_window.rs
+++ b/crates/g3-core/src/context_window.rs
@@ -0,0 +1,828 @@
+//! Context window management for conversation history and token tracking.
+//!
+//! This module handles:
+//! - Token counting and usage tracking
+//! - Conversation history management
+//! - Context thinning (reducing context size by saving large tool results to disk)
+//! - Summarization triggers
+
+use g3_providers::{Message, MessageRole, Usage};
+use serde::{Deserialize, Serialize};
+use tracing::{debug, warn};
+
+use crate::paths::get_thinned_dir;
+use crate::ToolCall;
+
+/// Scope for context thinning operations
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ThinScope {
+    /// Process only the first third of messages (incremental thinning)
+    FirstThird,
+    /// Process all messages (aggressive thinning, aka "skinnify")
+    All,
+}
+
+impl ThinScope {
+    fn label(&self) -> &'static str {
+        match self {
+            ThinScope::FirstThird => "thinned",
+            ThinScope::All => "skinnified",
+        }
+    }
+
+    fn emoji(&self) -> &'static str {
+        match self {
+            ThinScope::FirstThird => "🥒",
+            ThinScope::All => "🦴",
+        }
+    }
+
+    fn file_prefix(&self) -> &'static str {
+        match self {
+            ThinScope::FirstThird => "leaned",
+            ThinScope::All => "skinny",
+        }
+    }
+
+    fn error_action(&self) -> &'static str {
+        match self {
+            ThinScope::FirstThird => "thinning",
+            ThinScope::All => "skinnifying",
+        }
+    }
+}
+
+/// Represents a modification to be applied to a message
+#[derive(Debug)]
+enum ThinModification {
+    /// Replace the entire message content
+    ReplaceContent { index: usize, new_content: String, chars_saved: usize },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ContextWindow {
+    pub used_tokens: u32,
+    pub total_tokens: u32,
+    pub cumulative_tokens: u32, // Track cumulative tokens across all interactions
+    pub conversation_history: Vec<Message>,
+    pub last_thinning_percentage: u32, // Track the last percentage at which we thinned
+}
+
+impl ContextWindow {
+    pub fn new(total_tokens: u32) -> Self {
+        Self {
+            used_tokens: 0,
+            total_tokens,
+            cumulative_tokens: 0,
+            conversation_history: Vec::new(),
+            last_thinning_percentage: 0,
+        }
+    }
+
+    pub fn add_message(&mut self, message: Message) {
+        self.add_message_with_tokens(message, None);
+    }
+
+    /// Add a message with optional token count from the provider
+    pub fn add_message_with_tokens(&mut self, message: Message, tokens: Option<u32>) {
+        // Skip messages with empty content to avoid API errors
+        if message.content.trim().is_empty() {
+            warn!("Skipping empty message to avoid API error");
+            return;
+        }
+
+        // Use provided token count if available, otherwise estimate
+        let token_count = tokens.unwrap_or_else(|| Self::estimate_tokens(&message.content));
+        self.used_tokens += token_count;
+        self.cumulative_tokens += token_count;
+        self.conversation_history.push(message);
+
+        debug!(
+            "Added message with {} tokens (used: {}/{}, cumulative: {})",
+            token_count, self.used_tokens, self.total_tokens, self.cumulative_tokens
+        );
+    }
+
+    /// Update token usage from provider response
+    /// NOTE: This only updates cumulative_tokens (total API usage tracking).
+    /// It does NOT update used_tokens because:
+    /// 1. prompt_tokens represents the ENTIRE context sent to API (already tracked via add_message)
+    /// 2. completion_tokens will be tracked when the assistant message is added via add_message
+    /// Adding total_tokens here would cause double/triple counting and break the 80% threshold check.
+    pub fn update_usage_from_response(&mut self, usage: &Usage) {
+        // Only update cumulative tokens for API usage tracking
+        // Do NOT update used_tokens - that's tracked via add_message to avoid double counting
+        self.cumulative_tokens += usage.total_tokens;
+
+        debug!(
+            "Updated cumulative tokens: {} (used: {}/{}, cumulative: {})",
+            usage.total_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
+        );
+    }
+
+    /// More accurate token estimation
+    pub fn estimate_tokens(text: &str) -> u32 {
+        // Better heuristic:
+        // - Average English text: ~4 characters per token
+        // - Code/JSON: ~3 characters per token (more symbols)
+        // - Add 10% buffer for safety
+        let base_estimate = if text.contains("{") || text.contains("```") || text.contains("fn ") {
+            (text.len() as f32 / 3.0).ceil() as u32 // Code/JSON
+        } else {
+            (text.len() as f32 / 4.0).ceil() as u32 // Regular text
+        };
+        (base_estimate as f32 * 1.1).ceil() as u32 // Add 10% buffer
+    }
+
+    pub fn update_usage(&mut self, usage: &Usage) {
+        // Deprecated: Use update_usage_from_response instead
+        self.update_usage_from_response(usage);
+    }
+
+    /// Update cumulative token usage (for streaming) when no provider usage data is available
+    /// NOTE: This only updates cumulative_tokens, not used_tokens.
+    /// The assistant message will be added via add_message which tracks used_tokens.
+    pub fn add_streaming_tokens(&mut self, new_tokens: u32) {
+        // Only update cumulative tokens - used_tokens is tracked via add_message
+        self.cumulative_tokens += new_tokens;
+        debug!(
+            "Updated cumulative streaming tokens: {} (used: {}/{}, cumulative: {})",
+            new_tokens, self.used_tokens, self.total_tokens, self.cumulative_tokens
+        );
+    }
+
+    pub fn percentage_used(&self) -> f32 {
+        if self.total_tokens == 0 {
+            0.0
+        } else {
+            (self.used_tokens as f32 / self.total_tokens as f32) * 100.0
+        }
+    }
+
+    /// Clear the conversation history while preserving system messages
+    /// Used by /clear command to start fresh
+    pub fn clear_conversation(&mut self) {
+        // Keep only system messages (system prompt, README, etc.)
+        let system_messages: Vec<Message> = self
+            .conversation_history
+            .iter()
+            .filter(|m| matches!(m.role, MessageRole::System))
+            .cloned()
+            .collect();
+
+        self.conversation_history = system_messages;
+        self.used_tokens = self
+            .conversation_history
+            .iter()
+            .map(|m| Self::estimate_tokens(&m.content))
+            .sum();
+        self.last_thinning_percentage = 0;
+    }
+
+    pub fn remaining_tokens(&self) -> u32 {
+        self.total_tokens.saturating_sub(self.used_tokens)
+    }
+
+    /// Check if we should trigger summarization (at 80% capacity)
+    pub fn should_summarize(&self) -> bool {
+        // Trigger at 80% OR if we're getting close to absolute limits
+        // This prevents issues with models that have large contexts but still hit limits
+        let percentage_trigger = self.percentage_used() >= 80.0;
+
+        // Also trigger if we're approaching common token limits
+        // Most models start having issues around 150k tokens
+        let absolute_trigger = self.used_tokens > 150_000;
+
+        percentage_trigger || absolute_trigger
+    }
+
+    /// Create a summary request prompt for the current conversation
+    pub fn create_summary_prompt(&self) -> String {
+        "Please provide a comprehensive summary of our conversation so far. Include:
+
+1. **Main Topic/Goal**: What is the primary task or objective being worked on?
+2. **Key Decisions**: What important decisions have been made?
+3. **Actions Taken**: What specific actions, commands, or code changes have been completed?
+4. **Current State**: What is the current status of the work?
+5. **Important Context**: Any critical information, file paths, configurations, or constraints that should be remembered?
+6. **Pending Items**: What remains to be done or what was the user's last request?
+
+Format this as a detailed but concise summary that can be used to resume the conversation from scratch while maintaining full context.".to_string()
+    }
+
+    /// Reset the context window with a summary
+    /// Preserves the original system prompt as the first message
+    pub fn reset_with_summary(
+        &mut self,
+        summary: String,
+        latest_user_message: Option<String>,
+    ) -> usize {
+        // Calculate chars saved (old history minus new summary)
+        let old_chars: usize = self
+            .conversation_history
+            .iter()
+            .map(|m| m.content.len())
+            .sum();
+
+        // Preserve the original system prompt (first message) and optionally the README (second message)
+        let original_system_prompt = self.conversation_history.first().cloned();
+        let readme_message = self.conversation_history.get(1).and_then(|msg| {
+            if matches!(msg.role, MessageRole::System)
+                && (msg.content.contains("Project README")
+                    || msg.content.contains("Agent Configuration"))
+            {
+                Some(msg.clone())
+            } else {
+                None
+            }
+        });
+
+        // Clear the conversation history
+        self.conversation_history.clear();
+        self.used_tokens = 0;
+
+        // Re-add the original system prompt first (critical invariant)
+        if let Some(system_prompt) = original_system_prompt {
+            self.add_message(system_prompt);
+        }
+
+        // Re-add the README message if it existed
+        if let Some(readme) = readme_message {
+            self.add_message(readme);
+        }
+
+        // Add the summary as a system message
+        let summary_message = Message::new(
+            MessageRole::System,
+            format!("Previous conversation summary:\n\n{}", summary),
+        );
+        self.add_message(summary_message);
+
+        // Add the latest user message if provided
+        if let Some(user_msg) = latest_user_message {
+            self.add_message(Message::new(MessageRole::User, user_msg));
+        }
+
+        let new_chars: usize = self
+            .conversation_history
+            .iter()
+            .map(|m| m.content.len())
+            .sum();
+        old_chars.saturating_sub(new_chars)
+    }
+
+    /// Check if we should trigger context thinning
+    /// Triggers at 50%, 60%, 70%, and 80% thresholds
+    pub fn should_thin(&self) -> bool {
+        let current_percentage = self.percentage_used() as u32;
+
+        // Check if we've crossed a new 10% threshold starting at 50%
+        if current_percentage >= 50 {
+            let current_threshold = (current_percentage / 10) * 10; // Round down to nearest 10%
+            if current_threshold > self.last_thinning_percentage && current_threshold <= 80 {
+                return true;
+            }
+        }
+
+        false
+    }
+
+    /// Perform context thinning: scan messages and replace large tool results with file references.
+    ///
+    /// # Arguments
+    /// * `session_id` - If provided, thinned content is saved to .g3/session/<session_id>/thinned/
+    /// * `scope` - Controls which messages to process (first third or all)
+    ///
+    /// # Returns
+    /// A tuple of (summary message, chars saved)
+    pub fn thin_context_with_scope(
+        &mut self,
+        session_id: Option<&str>,
+        scope: ThinScope,
+    ) -> (String, usize) {
+        let current_percentage = self.percentage_used() as u32;
+
+        // Only update last_thinning_percentage for incremental thinning
+        if scope == ThinScope::FirstThird {
+            let current_threshold = (current_percentage / 10) * 10;
+            self.last_thinning_percentage = current_threshold;
+        }
+
+        // Calculate message range based on scope
+        let total_messages = self.conversation_history.len();
+        let end_index = match scope {
+            ThinScope::FirstThird => (total_messages / 3).max(1),
+            ThinScope::All => total_messages,
+        };
+
+        // Determine output directory: use session dir if available, otherwise ~/tmp
+        let tmp_dir = match Self::resolve_thinned_dir(session_id, scope) {
+            Ok(dir) => dir,
+            Err(msg) => return (msg, 0),
+        };
+
+        // Collect modifications to apply (avoids borrow checker issues)
+        let modifications = self.collect_thin_modifications(end_index, &tmp_dir, scope.file_prefix());
+
+        // Count results
+        let mut leaned_count = 0;
+        let mut tool_call_leaned_count = 0;
+        let mut chars_saved = 0;
+
+        // Apply modifications
+        for modification in &modifications {
+            match modification {
+                ThinModification::ReplaceContent { index, new_content, chars_saved: saved } => {
+                    if let Some(msg) = self.conversation_history.get_mut(*index) {
+                        // Determine if this was a tool result or tool call based on content
+                        if msg.content.starts_with("Tool result:") {
+                            leaned_count += 1;
+                        } else {
+                            tool_call_leaned_count += 1;
+                        }
+                        msg.content = new_content.clone();
+                        chars_saved += saved;
+                    }
+                }
+            }
+        }
+
+        // Recalculate token usage after thinning
+        self.recalculate_tokens();
+
+        // Build result message
+        self.build_thin_result_message(
+            scope,
+            current_percentage,
+            leaned_count,
+            tool_call_leaned_count,
+            chars_saved,
+        )
+    }
+
+    /// Collect all modifications needed for thinning without mutating
+    fn collect_thin_modifications(
+        &self,
+        end_index: usize,
+        tmp_dir: &str,
+        file_prefix: &str,
+    ) -> Vec<ThinModification> {
+        let mut modifications = Vec::new();
+
+        for i in 0..end_index {
+            if let Some(message) = self.conversation_history.get(i) {
+                // Check if the previous message was a TODO tool call
+                let is_todo_result = self.is_todo_tool_result(i);
+
+                // Process User messages that look like tool results
+                if matches!(message.role, MessageRole::User)
+                    && message.content.starts_with("Tool result:")
+                    && !is_todo_result
+                    && message.content.len() > 500
+                {
+                    if let Some(modification) = Self::create_tool_result_modification(
+                        &message.content,
+                        i,
+                        tmp_dir,
+                        file_prefix,
+                    ) {
+                        modifications.push(modification);
+                    }
+                }
+
+                // Process Assistant messages that contain tool calls with large arguments
+                if matches!(message.role, MessageRole::Assistant) {
+                    if let Some(modification) = Self::create_tool_call_modification(
+                        &message.content,
+                        i,
+                        tmp_dir,
+                        file_prefix,
+                    ) {
+                        modifications.push(modification);
+                    }
+                }
+            }
+        }
+
+        modifications
+    }
+
+    /// Backward-compatible wrapper for thin_context (first third only)
+    pub fn thin_context(&mut self, session_id: Option<&str>) -> (String, usize) {
+        self.thin_context_with_scope(session_id, ThinScope::FirstThird)
+    }
+
+    /// Backward-compatible wrapper for thin_context_all (entire history)
+    pub fn thin_context_all(&mut self, session_id: Option<&str>) -> (String, usize) {
+        self.thin_context_with_scope(session_id, ThinScope::All)
+    }
+
+    /// Resolve the directory for storing thinned content
+    fn resolve_thinned_dir(session_id: Option<&str>, scope: ThinScope) -> Result<String, String> {
+        if let Some(sid) = session_id {
+            let thinned_dir = get_thinned_dir(sid);
+            if let Err(e) = std::fs::create_dir_all(&thinned_dir) {
+                warn!("Failed to create thinned directory: {}", e);
+                return Err(format!(
+                    "⚠️  Context {} failed: could not create thinned directory",
+                    scope.error_action()
+                ));
+            }
+            Ok(thinned_dir.to_string_lossy().to_string())
+        } else {
+            let fallback_dir = shellexpand::tilde("~/tmp").to_string();
+            if let Err(e) = std::fs::create_dir_all(&fallback_dir) {
+                warn!("Failed to create ~/tmp directory: {}", e);
+                return Err(format!(
+                    "⚠️  Context {} failed: could not create ~/tmp directory",
+                    scope.error_action()
+                ));
+            }
+            Ok(fallback_dir)
+        }
+    }
+
+    /// Check if message at index i is a result of a TODO tool call
+    fn is_todo_tool_result(&self, i: usize) -> bool {
+        if i == 0 {
+            return false;
+        }
+
+        if let Some(prev_message) = self.conversation_history.get(i - 1) {
+            if matches!(prev_message.role, MessageRole::Assistant) {
+                return prev_message.content.contains(r#""tool":"todo_read""#)
+                    || prev_message.content.contains(r#""tool":"todo_write""#)
+                    || prev_message.content.contains(r#""tool": "todo_read""#)
+                    || prev_message.content.contains(r#""tool": "todo_write""#);
+            }
+        }
+        false
+    }
+
+    /// Create a modification for thinning a tool result message
+    fn create_tool_result_modification(
+        content: &str,
+        index: usize,
+        tmp_dir: &str,
+        file_prefix: &str,
+    ) -> Option<ThinModification> {
+        let timestamp = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs();
+        let filename = format!("{}_tool_result_{}_{}.txt", file_prefix, timestamp, index);
+        let file_path = format!("{}/{}", tmp_dir, filename);
+
+        if let Err(e) = std::fs::write(&file_path, content) {
+            warn!("Failed to write thinned content to {}: {}", file_path, e);
+            return None;
+        }
+
+        let original_len = content.len();
+        let new_content = format!("Tool result saved to {}", file_path);
+        let chars_saved = original_len - new_content.len();
+
+        debug!(
+            "Thinned tool result {} ({} chars) to {}",
+            index, original_len, file_path
+        );
+
+        Some(ThinModification::ReplaceContent {
+            index,
+            new_content,
+            chars_saved,
+        })
+    }
+
+    /// Create a modification for thinning tool calls in an assistant message
+    fn create_tool_call_modification(
+        content: &str,
+        index: usize,
+        tmp_dir: &str,
+        file_prefix: &str,
+    ) -> Option<ThinModification> {
+        // Look for JSON tool call patterns
+        let tool_call_start = content
+            .find(r#"{"tool":"#)
+            .or_else(|| content.find(r#"{ "tool":"#))
+            .or_else(|| content.find(r#"{"tool" :"#))
+            .or_else(|| content.find(r#"{ "tool" :"#))?;
+
+        let json_portion = &content[tool_call_start..];
+        let json_end = Self::find_json_end(json_portion)?;
+        let json_str = &json_portion[..=json_end];
+
+        let mut tool_call: ToolCall = serde_json::from_str(json_str).ok()?;
+        let mut modified = false;
+        let mut chars_saved = 0;
+
+        // Handle write_file tool calls
+        if tool_call.tool == "write_file" {
+            if let Some((saved, new_args)) =
+                Self::thin_write_file_args(&tool_call.args, index, tmp_dir, file_prefix)
+            {
+                tool_call.args = new_args;
+                modified = true;
+                chars_saved += saved;
+            }
+        }
+
+        // Handle str_replace tool calls
+        if tool_call.tool == "str_replace" {
+            if let Some((saved, new_args)) =
+                Self::thin_str_replace_args(&tool_call.args, index, tmp_dir, file_prefix)
+            {
+                tool_call.args = new_args;
+                modified = true;
+                chars_saved += saved;
+            }
+        }
+
+        if !modified {
+            return None;
+        }
+
+        // Reconstruct the message
+        let prefix = &content[..tool_call_start];
+        let suffix = &content[tool_call_start + json_str.len()..];
+        let new_json = serde_json::to_string(&tool_call).ok()?;
+        let new_content = format!("{}{}{}", prefix, new_json, suffix);
+
+        Some(ThinModification::ReplaceContent {
+            index,
+            new_content,
+            chars_saved,
+        })
+    }
+
+    /// Thin write_file args by saving content to file
+    /// Returns (chars_saved, new_args) if thinned
+    fn thin_write_file_args(
+        args: &serde_json::Value,
+        index: usize,
+        tmp_dir: &str,
+        file_prefix: &str,
+    ) -> Option<(usize, serde_json::Value)> {
+        let args_obj = args.as_object()?;
+        let content_str = args_obj.get("content")?.as_str()?;
+        let content_len = content_str.len();
+
+        if content_len <= 500 {
+            return None;
+        }
+
+        let timestamp = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs();
+        let filename = format!("{}_write_file_content_{}_{}.txt", file_prefix, timestamp, index);
+        let file_path = format!("{}/{}", tmp_dir, filename);
+
+        if std::fs::write(&file_path, content_str).is_err() {
+            return None;
+        }
+
+        let mut new_args = args_obj.clone();
+        new_args.insert(
+            "content".to_string(),
+            serde_json::Value::String(format!("<content saved to {}>", file_path)),
+        );
+
+        debug!(
+            "Thinned write_file content {} ({} chars) to {}",
+            index, content_len, file_path
+        );
+
+        Some((content_len, serde_json::Value::Object(new_args)))
+    }
+
+    /// Thin str_replace args by saving diff to file
+    /// Returns (chars_saved, new_args) if thinned
+    fn thin_str_replace_args(
+        args: &serde_json::Value,
+        index: usize,
+        tmp_dir: &str,
+        file_prefix: &str,
+    ) -> Option<(usize, serde_json::Value)> {
+        let args_obj = args.as_object()?;
+        let diff_str = args_obj.get("diff")?.as_str()?;
+        let diff_len = diff_str.len();
+
+        if diff_len <= 500 {
+            return None;
+        }
+
+        let timestamp = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_default()
+            .as_secs();
+        let filename = format!("{}_str_replace_diff_{}_{}.txt", file_prefix, timestamp, index);
+        let file_path = format!("{}/{}", tmp_dir, filename);
+
+        if std::fs::write(&file_path, diff_str).is_err() {
+            return None;
+        }
+
+        let mut new_args = args_obj.clone();
+        new_args.insert(
+            "diff".to_string(),
+            serde_json::Value::String(format!("<diff saved to {}>", file_path)),
+        );
+
+        debug!(
+            "Thinned str_replace diff {} ({} chars) to {}",
+            index, diff_len, file_path
+        );
+
+        Some((diff_len, serde_json::Value::Object(new_args)))
+    }
+
+    /// Build the result message for thinning operations
+    fn build_thin_result_message(
+        &self,
+        scope: ThinScope,
+        current_percentage: u32,
+        leaned_count: usize,
+        tool_call_leaned_count: usize,
+        chars_saved: usize,
+    ) -> (String, usize) {
+        let emoji = scope.emoji();
+        let label = scope.label();
+        let scope_desc = match scope {
+            ThinScope::FirstThird => "",
+            ThinScope::All => " across entire history",
+        };
+
+        if leaned_count > 0 && tool_call_leaned_count > 0 {
+            (
+                format!(
+                    "{} Context {} at {}%: {} tool results + {} tool calls{}, ~{} chars saved",
+                    emoji, label, current_percentage, leaned_count, tool_call_leaned_count, scope_desc, chars_saved
+                ),
+                chars_saved,
+            )
+        } else if leaned_count > 0 {
+            (
+                format!(
+                    "{} Context {} at {}%: {} tool results{}, ~{} chars saved",
+                    emoji, label, current_percentage, leaned_count, scope_desc, chars_saved
+                ),
+                chars_saved,
+            )
+        } else if tool_call_leaned_count > 0 {
+            (
+                format!(
+                    "{} Context {} at {}%: {} tool calls{}, ~{} chars saved",
+                    emoji, label, current_percentage, tool_call_leaned_count, scope_desc, chars_saved
+                ),
+                chars_saved,
+            )
+        } else {
+            (
+                format!(
+                    "ℹ Context {} triggered at {}% but no large tool results or tool calls found{}",
+                    scope.error_action(), current_percentage, scope_desc
+                ),
+                0,
+            )
+        }
+    }
+
+    /// Recalculate token usage based on current conversation history
+    fn recalculate_tokens(&mut self) {
+        let mut total = 0;
+        for message in &self.conversation_history {
+            total += Self::estimate_tokens(&message.content);
+        }
+        self.used_tokens = total;
+
+        debug!("Recalculated tokens after thinning: {} tokens", total);
+    }
+
+    /// Helper function to find the end of a JSON object
+    pub fn find_json_end(json_str: &str) -> Option<usize> {
+        let mut brace_count = 0;
+        let mut in_string = false;
+        let mut escape_next = false;
+
+        for (i, ch) in json_str.char_indices() {
+            if escape_next {
+                escape_next = false;
+                continue;
+            }
+
+            match ch {
+                '\\' => escape_next = true,
+                '"' if !escape_next => in_string = !in_string,
+                '{' if !in_string => brace_count += 1,
+                '}' if !in_string => {
+                    brace_count -= 1;
+                    if brace_count == 0 {
+                        return Some(i);
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        None
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_new_context_window() {
+        let cw = ContextWindow::new(100_000);
+        assert_eq!(cw.used_tokens, 0);
+        assert_eq!(cw.total_tokens, 100_000);
+        assert_eq!(cw.cumulative_tokens, 0);
+        assert!(cw.conversation_history.is_empty());
+    }
+
+    #[test]
+    fn test_percentage_used() {
+        let mut cw = ContextWindow::new(100);
+        cw.used_tokens = 50;
+        assert!((cw.percentage_used() - 50.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_remaining_tokens() {
+        let mut cw = ContextWindow::new(100);
+        cw.used_tokens = 30;
+        assert_eq!(cw.remaining_tokens(), 70);
+    }
+
+    #[test]
+    fn test_should_summarize_at_80_percent() {
+        let mut cw = ContextWindow::new(100);
+        cw.used_tokens = 79;
+        assert!(!cw.should_summarize());
+        cw.used_tokens = 80;
+        assert!(cw.should_summarize());
+    }
+
+    #[test]
+    fn test_should_summarize_at_absolute_limit() {
+        let mut cw = ContextWindow::new(1_000_000);
+        cw.used_tokens = 150_001;
+        assert!(cw.should_summarize());
+    }
+
+    #[test]
+    fn test_should_thin_thresholds() {
+        let mut cw = ContextWindow::new(100);
+        
+        // Below 50% - should not thin
+        cw.used_tokens = 49;
+        assert!(!cw.should_thin());
+        
+        // At 50% - should thin (first time)
+        cw.used_tokens = 50;
+        assert!(cw.should_thin());
+        
+        // After thinning at 50%, shouldn't thin again until 60%
+        cw.last_thinning_percentage = 50;
+        cw.used_tokens = 55;
+        assert!(!cw.should_thin());
+        
+        // At 60% - should thin again
+        cw.used_tokens = 60;
+        assert!(cw.should_thin());
+    }
+
+    #[test]
+    fn test_estimate_tokens_regular_text() {
+        let text = "Hello world, this is a test.";
+        let tokens = ContextWindow::estimate_tokens(text);
+        // ~28 chars / 4 * 1.1 = ~8 tokens
+        assert!(tokens > 0 && tokens < 20);
+    }
+
+    #[test]
+    fn test_estimate_tokens_code() {
+        let code = "fn main() { println!(\"hello\"); }";
+        let tokens = ContextWindow::estimate_tokens(code);
+        // Code uses 3 chars per token estimate
+        assert!(tokens > 0);
+    }
+
+    #[test]
+    fn test_find_json_end() {
+        assert_eq!(ContextWindow::find_json_end("{}"), Some(1));
+        assert_eq!(ContextWindow::find_json_end(r#"{"a": 1}"#), Some(7));
+        assert_eq!(ContextWindow::find_json_end(r#"{"a": {"b": 2}}"#), Some(14));
+        assert_eq!(ContextWindow::find_json_end("{incomplete"), None);
+    }
+
+    #[test]
+    fn test_thin_scope_properties() {
+        assert_eq!(ThinScope::FirstThird.emoji(), "🥒");
+        assert_eq!(ThinScope::All.emoji(), "🦴");
+        assert_eq!(ThinScope::FirstThird.label(), "thinned");
+        assert_eq!(ThinScope::All.label(), "skinnified");
+    }
+}
--- a/crates/g3-core/src/lib.rs
+++ b/crates/g3-core/src/lib.rs
--- a/crates/g3-core/src/tool_definitions.rs
+++ b/crates/g3-core/src/tool_definitions.rs
@@ -0,0 +1,705 @@
+//! Tool definitions for the agent's available tools.
+//!
+//! This module contains the JSON schema definitions for all tools that can be
+//! used by the agent when interacting with LLM providers that support native
+//! tool calling.
+
+use g3_providers::Tool;
+use serde_json::json;
+
+/// Configuration for which optional tool sets to enable
+#[derive(Debug, Clone, Copy, Default)]
+pub struct ToolConfig {
+    pub webdriver: bool,
+    pub macax: bool,
+    pub computer_control: bool,
+}
+
+impl ToolConfig {
+    pub fn new(webdriver: bool, macax: bool, computer_control: bool) -> Self {
+        Self {
+            webdriver,
+            macax,
+            computer_control,
+        }
+    }
+}
+
+/// Create tool definitions for native tool calling providers.
+///
+/// Returns a vector of Tool definitions that describe the available tools
+/// and their input schemas.
+pub fn create_tool_definitions(config: ToolConfig) -> Vec<Tool> {
+    let mut tools = create_core_tools();
+
+    if config.webdriver {
+        tools.extend(create_webdriver_tools());
+    }
+
+    if config.macax {
+        tools.extend(create_macax_tools());
+    }
+
+    if config.computer_control {
+        tools.extend(create_computer_control_tools());
+    }
+
+    tools
+}
+
+/// Create the core tools that are always available
+fn create_core_tools() -> Vec<Tool> {
+    vec![
+        Tool {
+            name: "shell".to_string(),
+            description: "Execute shell commands".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "command": {
+                        "type": "string",
+                        "description": "The shell command to execute"
+                    }
+                },
+                "required": ["command"]
+            }),
+        },
+        Tool {
+            name: "background_process".to_string(),
+            description: "Launch a long-running process in the background (e.g., game servers, dev servers). The process runs independently and logs are captured to a file. Use the regular 'shell' tool to read logs (cat/tail), check status (ps), or stop the process (kill). Returns the PID and log file path.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "name": {
+                        "type": "string",
+                        "description": "A unique name for this process (e.g., 'game_server', 'my_app'). Used to identify the process and its log file."
+                    },
+                    "command": {
+                        "type": "string",
+                        "description": "The shell command to execute in the background"
+                    },
+                    "working_dir": {
+                        "type": "string",
+                        "description": "Optional working directory. Defaults to current directory if not specified."
+                    }
+                },
+                "required": ["name", "command"]
+            }),
+        },
+        Tool {
+            name: "read_file".to_string(),
+            description: "Read the contents of a file. For image files (png, jpg, jpeg, gif, bmp, tiff, webp), automatically extracts text using OCR. For text files, optionally read a specific character range.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "description": "The path to the file to read"
+                    },
+                    "start": {
+                        "type": "integer",
+                        "description": "Starting character position (0-indexed, inclusive). If omitted, reads from beginning."
+                    },
+                    "end": {
+                        "type": "integer",
+                        "description": "Ending character position (0-indexed, EXCLUSIVE). If omitted, reads to end of file."
+                    }
+                },
+                "required": ["file_path"]
+            }),
+        },
+        Tool {
+            name: "read_image".to_string(),
+            description: "Read one or more image files and send them to the LLM for visual analysis. Supports PNG, JPEG, GIF, and WebP formats. Use this when you need to visually inspect images (e.g., find sprites, analyze UI, read diagrams). The images will be included in your next response for analysis.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "file_paths": {
+                        "type": "array",
+                        "items": { "type": "string" },
+                        "description": "Array of paths to image files to read"
+                    }
+                },
+                "required": ["file_paths"]
+            }),
+        },
+        Tool {
+            name: "write_file".to_string(),
+            description: "Write content to a file (creates or overwrites). You MUST provide all arguments".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "description": "The path to the file to write"
+                    },
+                    "content": {
+                        "type": "string",
+                        "description": "The content to write to the file"
+                    }
+                },
+                "required": ["file_path", "content"]
+            }),
+        },
+        Tool {
+            name: "str_replace".to_string(),
+            description: "Apply a unified diff to a file. Supports multiple hunks and context lines. Optionally constrain the search to a [start, end) character range (0-indexed; end is EXCLUSIVE). Useful to disambiguate matches or limit scope in large files.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "file_path": {
+                        "type": "string",
+                        "description": "The path to the file to edit"
+                    },
+                    "diff": {
+                        "type": "string",
+                        "description": "A unified diff showing what to replace. Supports @@ hunk headers, context lines, and multiple hunks (---/+++ headers optional for minimal diffs)."
+                    },
+                    "start": {
+                        "type": "integer",
+                        "description": "Starting character position in the file (0-indexed, inclusive). If omitted, searches from beginning."
+                    },
+                    "end": {
+                        "type": "integer",
+                        "description": "Ending character position in the file (0-indexed, EXCLUSIVE - character at this position is NOT included). If omitted, searches to end of file."
+                    }
+                },
+                "required": ["file_path", "diff"]
+            }),
+        },
+        Tool {
+            name: "final_output".to_string(),
+            description: "Signal task completion with a detailed summary".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "summary": {
+                        "type": "string",
+                        "description": "A detailed summary in markdown of what was accomplished"
+                    }
+                },
+                "required": ["summary"]
+            }),
+        },
+        Tool {
+            name: "take_screenshot".to_string(),
+            description: "Capture a screenshot of a specific application window. You MUST specify the window_id parameter with the application name (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will automatically use the native screencapture command with the application's window ID for a clean capture. Use list_windows first to identify available windows.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Filename for the screenshot (e.g., 'safari.png'). If a relative path is provided, the screenshot will be saved to ~/tmp or $TMPDIR. Use an absolute path to save elsewhere."
+                    },
+                    "window_id": {
+                        "type": "string",
+                        "description": "REQUIRED: Application name to capture (e.g., 'Safari', 'Terminal', 'Google Chrome'). The tool will capture the frontmost window of that application using its native window ID."
+                    },
+                    "region": {
+                        "type": "object",
+                        "properties": {
+                            "x": {"type": "integer"},
+                            "y": {"type": "integer"},
+                            "width": {"type": "integer"},
+                            "height": {"type": "integer"}
+                        }
+                    }
+                },
+                "required": ["path", "window_id"]
+            }),
+        },
+        Tool {
+            name: "extract_text".to_string(),
+            description: "Extract text from an image file using OCR. For extracting text from a specific window, use vision_find_text instead which automatically handles window capture.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Path to image file (optional if region is provided)"
+                    },
+                }
+            }),
+        },
+        Tool {
+            name: "todo_read".to_string(),
+            description: "Read your current TODO list from todo.g3.md file in the session directory. Shows what tasks are planned and their status. Call this at the start of multi-step tasks to check for existing plans, and during execution to review progress before updating. TODO lists are scoped to the current session.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "todo_write".to_string(),
+            description: "Create or update your TODO list in todo.g3.md file with a complete task plan. Use markdown checkboxes: - [ ] for pending, - [x] for complete. This tool replaces the entire file content, so always call todo_read first to preserve existing content. Essential for multi-step tasks. TODO lists are scoped to the current session.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "content": {
+                        "type": "string",
+                        "description": "The TODO list content to save. Use markdown checkbox format: - [ ] for incomplete tasks, - [x] for completed tasks. Support nested tasks with indentation."
+                    }
+                },
+                "required": ["content"]
+            }),
+        },
+        Tool {
+            name: "code_coverage".to_string(),
+            description: "Generate a code coverage report for the entire workspace using cargo llvm-cov. This runs all tests with coverage instrumentation and returns a summary of coverage statistics. Requires llvm-tools-preview and cargo-llvm-cov to be installed (they will be auto-installed if missing).".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "code_search".to_string(),
+            description: "Syntax-aware code search that understands code structure, not just text. Finds actual functions, classes, methods, and other code constructs - ignores matches in comments and strings. Much more accurate than grep for code searches. Supports batch searches (up to 20 parallel) with structured results and context lines. Languages: Rust, Python, JavaScript, TypeScript, Go, Java, C, C++, Kotlin. Uses tree-sitter query syntax.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "searches": {
+                        "type": "array",
+                        "maxItems": 20,
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "name": { "type": "string", "description": "Label for this search." },
+                                "query": { "type": "string", "description": "tree-sitter query in S-expression format (e.g., \"(function_item name: (identifier) @name)\")" },
+                                "language": { "type": "string", "enum": ["rust", "python", "javascript", "typescript", "go", "java", "c", "cpp", "kotlin"], "description": "Programming language to search." },
+                                "paths": { "type": "array", "items": { "type": "string" }, "description": "Paths/dirs to search. Defaults to current dir if empty." },
+                                "context_lines": { "type": "integer", "minimum": 0, "maximum": 20, "default": 0, "description": "Lines of context to include around each match." }
+                            },
+                            "required": ["name", "query", "language"]
+                        }
+                    },
+                    "max_concurrency": { "type": "integer", "minimum": 1, "default": 4 },
+                    "max_matches_per_search": { "type": "integer", "minimum": 1, "default": 500 }
+                },
+                "required": ["searches"]
+            }),
+        },
+    ]
+}
+
+/// Create WebDriver browser automation tools
+fn create_webdriver_tools() -> Vec<Tool> {
+    vec![
+        Tool {
+            name: "webdriver_start".to_string(),
+            description: "Start a Safari WebDriver session for browser automation. Must be called before any other webdriver tools. Requires Safari's 'Allow Remote Automation' to be enabled in Develop menu.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "webdriver_navigate".to_string(),
+            description: "Navigate to a URL in the browser".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "url": {
+                        "type": "string",
+                        "description": "The URL to navigate to (must include protocol, e.g., https://)"
+                    }
+                },
+                "required": ["url"]
+            }),
+        },
+        Tool {
+            name: "webdriver_get_url".to_string(),
+            description: "Get the current URL of the browser".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "webdriver_get_title".to_string(),
+            description: "Get the title of the current page".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "webdriver_find_element".to_string(),
+            description: "Find an element on the page by CSS selector and return its text content".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "selector": {
+                        "type": "string",
+                        "description": "CSS selector to find the element (e.g., 'h1', '.class-name', '#id')"
+                    }
+                },
+                "required": ["selector"]
+            }),
+        },
+        Tool {
+            name: "webdriver_find_elements".to_string(),
+            description: "Find all elements matching a CSS selector and return their text content".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "selector": {
+                        "type": "string",
+                        "description": "CSS selector to find elements"
+                    }
+                },
+                "required": ["selector"]
+            }),
+        },
+        Tool {
+            name: "webdriver_click".to_string(),
+            description: "Click an element on the page".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "selector": {
+                        "type": "string",
+                        "description": "CSS selector for the element to click"
+                    }
+                },
+                "required": ["selector"]
+            }),
+        },
+        Tool {
+            name: "webdriver_send_keys".to_string(),
+            description: "Type text into an input element".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "selector": {
+                        "type": "string",
+                        "description": "CSS selector for the input element"
+                    },
+                    "text": {
+                        "type": "string",
+                        "description": "Text to type into the element"
+                    },
+                    "clear_first": {
+                        "type": "boolean",
+                        "description": "Whether to clear the element before typing (default: true)"
+                    }
+                },
+                "required": ["selector", "text"]
+            }),
+        },
+        Tool {
+            name: "webdriver_execute_script".to_string(),
+            description: "Execute JavaScript code in the browser and return the result".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "script": {
+                        "type": "string",
+                        "description": "JavaScript code to execute (use 'return' to return a value)"
+                    }
+                },
+                "required": ["script"]
+            }),
+        },
+        Tool {
+            name: "webdriver_get_page_source".to_string(),
+            description: "Get the rendered HTML source of the current page. Returns the current DOM state after JavaScript execution.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "max_length": {
+                        "type": "integer",
+                        "description": "Maximum length of HTML to return (default: 10000, use 0 for no truncation)"
+                    },
+                    "save_to_file": {
+                        "type": "string",
+                        "description": "Optional file path to save the HTML instead of returning it inline"
+                    }
+                },
+                "required": []
+            }),
+        },
+        Tool {
+            name: "webdriver_screenshot".to_string(),
+            description: "Take a screenshot of the browser window".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Path where to save the screenshot (e.g., '/tmp/screenshot.png')"
+                    }
+                },
+                "required": ["path"]
+            }),
+        },
+        Tool {
+            name: "webdriver_back".to_string(),
+            description: "Navigate back in browser history".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "webdriver_forward".to_string(),
+            description: "Navigate forward in browser history".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "webdriver_refresh".to_string(),
+            description: "Refresh the current page".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "webdriver_quit".to_string(),
+            description: "Close the browser and end the WebDriver session".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+    ]
+}
+
+/// Create macOS Accessibility tools
+fn create_macax_tools() -> Vec<Tool> {
+    vec![
+        Tool {
+            name: "macax_list_apps".to_string(),
+            description: "List all running applications that can be controlled via macOS Accessibility API".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "macax_get_frontmost_app".to_string(),
+            description: "Get the name of the currently active (frontmost) application".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {},
+                "required": []
+            }),
+        },
+        Tool {
+            name: "macax_activate_app".to_string(),
+            description: "Bring an application to the front (activate it)".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "app_name": {
+                        "type": "string",
+                        "description": "Name of the application to activate (e.g., 'Safari', 'TextEdit')"
+                    }
+                },
+                "required": ["app_name"]
+            }),
+        },
+        Tool {
+            name: "macax_press_key".to_string(),
+            description: "Press a keyboard key or shortcut in an application (e.g., Cmd+S to save)".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "app_name": {
+                        "type": "string",
+                        "description": "Name of the application"
+                    },
+                    "key": {
+                        "type": "string",
+                        "description": "Key to press (e.g., 's', 'return', 'tab')"
+                    },
+                    "modifiers": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        },
+                        "description": "Modifier keys (e.g., ['command', 'shift'])"
+                    }
+                },
+                "required": ["app_name", "key"]
+            }),
+        },
+        Tool {
+            name: "macax_type_text".to_string(),
+            description: "Type arbitrary text into the currently focused element in an application (supports unicode, emojis, etc.)".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "app_name": {
+                        "type": "string",
+                        "description": "Name of the application"
+                    },
+                    "text": {
+                        "type": "string",
+                        "description": "Text to type (can include unicode, emojis, special characters)"
+                    }
+                },
+                "required": ["app_name", "text"]
+            }),
+        },
+        Tool {
+            name: "extract_text_with_boxes".to_string(),
+            description: "Extract all text from an image file with bounding box coordinates for each text element. Returns JSON array with text, position (x, y), size (width, height), and confidence for each detected text. Uses Apple Vision Framework for precise sub-pixel accuracy.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "path": {
+                        "type": "string",
+                        "description": "Path to image file to extract text from"
+                    },
+                    "app_name": {
+                        "type": "string",
+                        "description": "Optional: Name of application to screenshot first (e.g., 'Safari', 'Things3'). If provided, takes screenshot of app before extracting text."
+                    }
+                },
+                "required": ["path"]
+            }),
+        },
+    ]
+}
+
+/// Create computer control / vision-guided tools
+fn create_computer_control_tools() -> Vec<Tool> {
+    vec![
+        Tool {
+            name: "vision_find_text".to_string(),
+            description: "Find text in a specific application window and return its location with bounding box coordinates (x, y, width, height) and confidence score. Useful for locating UI elements. Uses Apple Vision Framework for precise sub-pixel accuracy.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "app_name": {
+                        "type": "string",
+                        "description": "Name of the application to search in (e.g., 'Things3', 'Safari', 'TextEdit')"
+                    },
+                    "text": {
+                        "type": "string",
+                        "description": "The text to search for on screen"
+                    }
+                },
+                "required": ["app_name", "text"]
+            }),
+        },
+        Tool {
+            name: "vision_click_text".to_string(),
+            description: "Find text in a specific application window and click on it (useful for clicking buttons, links, menu items)".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "app_name": {
+                        "type": "string",
+                        "description": "Name of the application (e.g., 'Things3', 'Safari', 'TextEdit')"
+                    },
+                    "text": {
+                        "type": "string",
+                        "description": "The text to click on (e.g., 'Submit', 'OK', 'Cancel', '+')"
+                    }
+                },
+                "required": ["app_name", "text"]
+            }),
+        },
+        Tool {
+            name: "vision_click_near_text".to_string(),
+            description: "Find text in a specific application window and click near it (useful for clicking text fields next to labels)".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "app_name": {
+                        "type": "string",
+                        "description": "Name of the application (e.g., 'Things3', 'Safari', 'TextEdit')"
+                    },
+                    "text": {
+                        "type": "string",
+                        "description": "The label text to find (e.g., 'Name:', 'Email:', 'Task:')"
+                    },
+                    "direction": {
+                        "type": "string",
+                        "enum": ["right", "below", "left", "above"],
+                        "description": "Direction to click relative to the text (default: right)"
+                    },
+                    "distance": {
+                        "type": "integer",
+                        "description": "Distance in pixels from the text (default: 50)"
+                    }
+                },
+                "required": ["app_name", "text"]
+            }),
+        },
+    ]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_core_tools_count() {
+        let tools = create_core_tools();
+        // Should have the core tools: shell, background_process, read_file, read_image,
+        // write_file, str_replace, final_output, take_screenshot, extract_text,
+        // todo_read, todo_write, code_coverage, code_search
+        assert_eq!(tools.len(), 13);
+    }
+
+    #[test]
+    fn test_webdriver_tools_count() {
+        let tools = create_webdriver_tools();
+        // 15 webdriver tools
+        assert_eq!(tools.len(), 15);
+    }
+
+    #[test]
+    fn test_macax_tools_count() {
+        let tools = create_macax_tools();
+        // 6 macax tools
+        assert_eq!(tools.len(), 6);
+    }
+
+    #[test]
+    fn test_computer_control_tools_count() {
+        let tools = create_computer_control_tools();
+        // 3 vision tools
+        assert_eq!(tools.len(), 3);
+    }
+
+    #[test]
+    fn test_create_tool_definitions_core_only() {
+        let config = ToolConfig::default();
+        let tools = create_tool_definitions(config);
+        assert_eq!(tools.len(), 13);
+    }
+
+    #[test]
+    fn test_create_tool_definitions_all_enabled() {
+        let config = ToolConfig::new(true, true, true);
+        let tools = create_tool_definitions(config);
+        // 13 core + 15 webdriver + 6 macax + 3 computer_control = 37
+        assert_eq!(tools.len(), 37);
+    }
+
+    #[test]
+    fn test_tool_has_required_fields() {
+        let tools = create_core_tools();
+        for tool in tools {
+            assert!(!tool.name.is_empty(), "Tool name should not be empty");
+            assert!(!tool.description.is_empty(), "Tool description should not be empty");
+            assert!(tool.input_schema.is_object(), "Tool input_schema should be an object");
+        }
+    }
+}